1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/stream.h>
32 #include <sys/cmn_err.h>
33 #include <sys/kmem.h>
34 #define	_SUN_TPI_VERSION 2
35 #include <sys/tihdr.h>
36 #include <sys/stropts.h>
37 #include <sys/socket.h>
38 #include <sys/random.h>
39 #include <sys/policy.h>
40 
41 #include <netinet/in.h>
42 #include <netinet/ip6.h>
43 
44 #include <inet/common.h>
45 #include <inet/ip.h>
46 #include <inet/ip6.h>
47 #include <inet/ipclassifier.h>
48 #include "sctp_impl.h"
49 #include "sctp_asconf.h"
50 #include "sctp_addr.h"
51 
52 uint_t	sctp_next_port_to_try;
53 
54 /*
55  * Returns 0 on success, EACCES on permission failure.
56  */
57 static int
58 sctp_select_port(sctp_t *sctp, in_port_t *requested_port, int *user_specified)
59 {
60 	/*
61 	 * Get a valid port (within the anonymous range and should not
62 	 * be a privileged one) to use if the user has not given a port.
63 	 * If multiple threads are here, they may all start with
64 	 * with the same initial port. But, it should be fine as long as
65 	 * sctp_bindi will ensure that no two threads will be assigned
66 	 * the same port.
67 	 */
68 	if (*requested_port == 0) {
69 		*requested_port = sctp_update_next_port(sctp_next_port_to_try);
70 		*user_specified = 0;
71 	} else {
72 		int i;
73 		boolean_t priv = B_FALSE;
74 
75 		/*
76 		 * If the requested_port is in the well-known privileged range,
77 		 * verify that the stream was opened by a privileged user.
78 		 * Note: No locks are held when inspecting sctp_g_*epriv_ports
79 		 * but instead the code relies on:
80 		 * - the fact that the address of the array and its size never
81 		 *   changes
82 		 * - the atomic assignment of the elements of the array
83 		 */
84 		if (*requested_port < sctp_smallest_nonpriv_port) {
85 			priv = B_TRUE;
86 		} else {
87 			for (i = 0; i < sctp_g_num_epriv_ports; i++) {
88 				if (*requested_port == sctp_g_epriv_ports[i]) {
89 					priv = B_TRUE;
90 					break;
91 				}
92 			}
93 		}
94 		if (priv) {
95 			/*
96 			 * sctp_bind() should take a cred_t argument so that
97 			 * we can use it here.
98 			 */
99 			if (secpolicy_net_privaddr(sctp->sctp_credp,
100 			    *requested_port) != 0) {
101 				dprint(1,
102 				    ("sctp_bind(x): no prive for port %d",
103 				    *requested_port));
104 				return (TACCES);
105 			}
106 		}
107 		*user_specified = 1;
108 	}
109 
110 	return (0);
111 }
112 
113 int
114 sctp_listen(sctp_t *sctp)
115 {
116 	sctp_tf_t	*tf;
117 
118 	RUN_SCTP(sctp);
119 	/*
120 	 * TCP handles listen() increasing the backlog, need to check
121 	 * if it should be handled here too
122 	 */
123 	if (sctp->sctp_state > SCTPS_BOUND) {
124 		WAKE_SCTP(sctp);
125 		return (EINVAL);
126 	}
127 
128 	/* Do an anonymous bind for unbound socket doing listen(). */
129 	if (sctp->sctp_nsaddrs == 0) {
130 		struct sockaddr_storage ss;
131 		int ret;
132 
133 		bzero(&ss, sizeof (ss));
134 		ss.ss_family = sctp->sctp_family;
135 
136 		WAKE_SCTP(sctp);
137 		if ((ret = sctp_bind(sctp, (struct sockaddr *)&ss,
138 			sizeof (ss))) != 0)
139 			return (ret);
140 		RUN_SCTP(sctp)
141 	}
142 
143 	sctp->sctp_state = SCTPS_LISTEN;
144 	(void) random_get_pseudo_bytes(sctp->sctp_secret, SCTP_SECRET_LEN);
145 	sctp->sctp_last_secret_update = lbolt64;
146 	bzero(sctp->sctp_old_secret, SCTP_SECRET_LEN);
147 	tf = &sctp_listen_fanout[SCTP_LISTEN_HASH(ntohs(sctp->sctp_lport))];
148 	sctp_listen_hash_insert(tf, sctp);
149 	WAKE_SCTP(sctp);
150 	return (0);
151 }
152 
153 /*
154  * Bind the sctp_t to a sockaddr, which includes an address and other
155  * information, such as port or flowinfo.
156  */
157 int
158 sctp_bind(sctp_t *sctp, struct sockaddr *sa, socklen_t len)
159 {
160 	int		user_specified;
161 	boolean_t	bind_to_req_port_only;
162 	in_port_t	requested_port;
163 	in_port_t	allocated_port;
164 	int		err = 0;
165 
166 	ASSERT(sctp != NULL);
167 	ASSERT(sa);
168 
169 	RUN_SCTP(sctp);
170 
171 	if (sctp->sctp_state > SCTPS_BOUND) {
172 		err = EINVAL;
173 		goto done;
174 	}
175 
176 	switch (sa->sa_family) {
177 	case AF_INET:
178 		if (len < sizeof (struct sockaddr_in) ||
179 		    sctp->sctp_family == AF_INET6) {
180 			err = EINVAL;
181 			goto done;
182 		}
183 		requested_port = ntohs(((struct sockaddr_in *)sa)->sin_port);
184 		break;
185 	case AF_INET6:
186 		if (len < sizeof (struct sockaddr_in6) ||
187 		    sctp->sctp_family == AF_INET) {
188 			err = EINVAL;
189 			goto done;
190 		}
191 		requested_port = ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
192 		/* Set the flowinfo. */
193 		sctp->sctp_ip6h->ip6_vcf =
194 		    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
195 		    (((struct sockaddr_in6 *)sa)->sin6_flowinfo &
196 		    ~IPV6_VERS_AND_FLOW_MASK);
197 		break;
198 	default:
199 		err = EAFNOSUPPORT;
200 		goto done;
201 	}
202 	bind_to_req_port_only = requested_port == 0 ? B_FALSE : B_TRUE;
203 
204 	if (sctp_select_port(sctp, &requested_port, &user_specified) != 0) {
205 		err = EPERM;
206 		goto done;
207 	}
208 
209 	if ((err = sctp_bind_add(sctp, sa, 1, B_TRUE,
210 	    user_specified == 1 ? htons(requested_port) : 0)) != 0) {
211 		goto done;
212 	}
213 	allocated_port = sctp_bindi(sctp, requested_port,
214 	    bind_to_req_port_only, user_specified);
215 	if (allocated_port == 0) {
216 		sctp_free_saddrs(sctp);
217 		if (bind_to_req_port_only) {
218 			err = EADDRINUSE;
219 			goto done;
220 		} else {
221 			err = EADDRNOTAVAIL;
222 			goto done;
223 		}
224 	}
225 	ASSERT(sctp->sctp_state == SCTPS_BOUND);
226 done:
227 	WAKE_SCTP(sctp);
228 	return (err);
229 }
230 
231 /*
232  * Perform bind/unbind operation of a list of addresses on a sctp_t
233  */
234 int
235 sctp_bindx(sctp_t *sctp, const void *addrs, int addrcnt, int bindop)
236 {
237 	ASSERT(sctp != NULL);
238 	ASSERT(addrs != NULL);
239 	ASSERT(addrcnt > 0);
240 
241 	switch (bindop) {
242 	case SCTP_BINDX_ADD_ADDR:
243 		return (sctp_bind_add(sctp, addrs, addrcnt, B_FALSE,
244 		    sctp->sctp_lport));
245 	case SCTP_BINDX_REM_ADDR:
246 		return (sctp_bind_del(sctp, addrs, addrcnt, B_FALSE));
247 	default:
248 		return (EINVAL);
249 	}
250 }
251 
252 /*
253  * Add a list of addresses to a sctp_t.
254  */
255 int
256 sctp_bind_add(sctp_t *sctp, const void *addrs, uint32_t addrcnt,
257     boolean_t caller_hold_lock, in_port_t port)
258 {
259 	int		err = 0;
260 	boolean_t	do_asconf = B_FALSE;
261 
262 	if (!caller_hold_lock)
263 		RUN_SCTP(sctp);
264 
265 	if (sctp->sctp_state > SCTPS_ESTABLISHED) {
266 		if (!caller_hold_lock)
267 			WAKE_SCTP(sctp);
268 		return (EINVAL);
269 	}
270 
271 	if (sctp->sctp_state > SCTPS_LISTEN) {
272 		/*
273 		 * Let's do some checking here rather than undoing the
274 		 * add later (for these reasons).
275 		 */
276 		if (!sctp_addip_enabled || !sctp->sctp_understands_asconf ||
277 		    !sctp->sctp_understands_addip) {
278 			if (!caller_hold_lock)
279 				WAKE_SCTP(sctp);
280 			return (EINVAL);
281 		}
282 		do_asconf = B_TRUE;
283 	}
284 	/*
285 	 * On a clustered node, for an inaddr_any bind, we will pass the list
286 	 * of all the addresses in the global list, minus any address on the
287 	 * loopback interface, and expect the clustering susbsystem to give us
288 	 * the correct list for the 'port'. For explicit binds we give the
289 	 * list of addresses  and the clustering module validates it for the
290 	 * 'port'.
291 	 *
292 	 * On a non-clustered node, cl_sctp_check_addrs will be NULL and
293 	 * we proceed as usual.
294 	 */
295 	if (cl_sctp_check_addrs != NULL) {
296 		uchar_t		*addrlist = NULL;
297 		size_t		size = 0;
298 		int		unspec = 0;
299 		boolean_t	do_listen;
300 		uchar_t		*llist = NULL;
301 		size_t		lsize = 0;
302 
303 		/*
304 		 * If we are adding addresses after listening, but before
305 		 * an association is established, we need to update the
306 		 * clustering module with this info.
307 		 */
308 		do_listen = !do_asconf && sctp->sctp_state > SCTPS_BOUND &&
309 		    cl_sctp_listen != NULL;
310 
311 		err = sctp_get_addrlist(sctp, addrs, &addrcnt, &addrlist,
312 		    &unspec, &size);
313 		if (err != 0) {
314 			ASSERT(addrlist == NULL);
315 			ASSERT(addrcnt == 0);
316 			ASSERT(size == 0);
317 			if (!caller_hold_lock)
318 				WAKE_SCTP(sctp);
319 			return (err);
320 		}
321 		ASSERT(addrlist != NULL);
322 		(*cl_sctp_check_addrs)(sctp->sctp_family, port, &addrlist,
323 		    size, &addrcnt, unspec == 1);
324 		if (addrcnt == 0) {
325 			/* We free the list */
326 			kmem_free(addrlist, size);
327 			if (!caller_hold_lock)
328 				WAKE_SCTP(sctp);
329 			return (EINVAL);
330 		}
331 		if (do_listen) {
332 			lsize = sizeof (in6_addr_t) * addrcnt;
333 			llist = kmem_alloc(lsize, KM_SLEEP);
334 		}
335 		err = sctp_valid_addr_list(sctp, addrlist, addrcnt, llist,
336 		    lsize);
337 		if (err == 0 && do_listen) {
338 			(*cl_sctp_listen)(sctp->sctp_family, llist,
339 			    addrcnt, sctp->sctp_lport);
340 			/* list will be freed by the clustering module */
341 		} else if (err != 0 && llist != NULL) {
342 			kmem_free(llist, lsize);
343 		}
344 		/* free the list we allocated */
345 		kmem_free(addrlist, size);
346 	} else {
347 		err = sctp_valid_addr_list(sctp, addrs, addrcnt, NULL, 0);
348 	}
349 	if (err != 0) {
350 		if (!caller_hold_lock)
351 			WAKE_SCTP(sctp);
352 		return (err);
353 	}
354 	/* Need to send  ASCONF messages */
355 	if (do_asconf) {
356 		err = sctp_add_ip(sctp, addrs, addrcnt);
357 		if (err != 0) {
358 			sctp_del_saddr_list(sctp, addrs, addrcnt, B_FALSE);
359 			if (!caller_hold_lock)
360 				WAKE_SCTP(sctp);
361 			return (err);
362 		}
363 	}
364 	if (!caller_hold_lock)
365 		WAKE_SCTP(sctp);
366 	if (do_asconf)
367 		sctp_process_sendq(sctp);
368 	return (0);
369 }
370 
371 /*
372  * Remove one or more addresses bound to the sctp_t.
373  */
374 int
375 sctp_bind_del(sctp_t *sctp, const void *addrs, uint32_t addrcnt,
376     boolean_t caller_hold_lock)
377 {
378 	int		error = 0;
379 	boolean_t	do_asconf = B_FALSE;
380 	uchar_t		*ulist = NULL;
381 	size_t		usize = 0;
382 
383 	if (!caller_hold_lock)
384 		RUN_SCTP(sctp);
385 
386 	if (sctp->sctp_state > SCTPS_ESTABLISHED) {
387 		if (!caller_hold_lock)
388 			WAKE_SCTP(sctp);
389 		return (EINVAL);
390 	}
391 	/*
392 	 * Fail the remove if we are beyond listen, but can't send this
393 	 * to the peer.
394 	 */
395 	if (sctp->sctp_state > SCTPS_LISTEN) {
396 		if (!sctp_addip_enabled || !sctp->sctp_understands_asconf ||
397 		    !sctp->sctp_understands_addip) {
398 			if (!caller_hold_lock)
399 				WAKE_SCTP(sctp);
400 			return (EINVAL);
401 		}
402 		do_asconf = B_TRUE;
403 	}
404 
405 	/* Can't delete the last address nor all of the addresses */
406 	if (sctp->sctp_nsaddrs == 1 || addrcnt >= sctp->sctp_nsaddrs) {
407 		if (!caller_hold_lock)
408 			WAKE_SCTP(sctp);
409 		return (EINVAL);
410 	}
411 
412 	if (cl_sctp_unlisten != NULL && !do_asconf &&
413 	    sctp->sctp_state > SCTPS_BOUND) {
414 		usize = sizeof (in6_addr_t) * addrcnt;
415 		ulist = kmem_alloc(usize, KM_SLEEP);
416 	}
417 
418 	error = sctp_del_ip(sctp, addrs, addrcnt, ulist, usize);
419 	if (error != 0) {
420 		if (ulist != NULL)
421 			kmem_free(ulist, usize);
422 		if (!caller_hold_lock)
423 			WAKE_SCTP(sctp);
424 		return (error);
425 	}
426 	/* ulist will be non-NULL only if cl_sctp_unlisten is non-NULL */
427 	if (ulist != NULL) {
428 		ASSERT(cl_sctp_unlisten != NULL);
429 		(*cl_sctp_unlisten)(sctp->sctp_family, ulist, addrcnt,
430 		    sctp->sctp_lport);
431 		/* ulist will be freed by the clustering module */
432 	}
433 	if (!caller_hold_lock)
434 		WAKE_SCTP(sctp);
435 	if (do_asconf)
436 		sctp_process_sendq(sctp);
437 	return (error);
438 }
439 
440 /*
441  * If the "bind_to_req_port_only" parameter is set, if the requested port
442  * number is available, return it, If not return 0
443  *
444  * If "bind_to_req_port_only" parameter is not set and
445  * If the requested port number is available, return it.  If not, return
446  * the first anonymous port we happen across.  If no anonymous ports are
447  * available, return 0. addr is the requested local address, if any.
448  *
449  * In either case, when succeeding update the sctp_t to record the port number
450  * and insert it in the bind hash table.
451  */
452 in_port_t
453 sctp_bindi(sctp_t *sctp, in_port_t port, int bind_to_req_port_only,
454     int user_specified)
455 {
456 	/* number of times we have run around the loop */
457 	int count = 0;
458 	/* maximum number of times to run around the loop */
459 	int loopmax;
460 	zoneid_t zoneid = sctp->sctp_zoneid;
461 
462 	/*
463 	 * Lookup for free addresses is done in a loop and "loopmax"
464 	 * influences how long we spin in the loop
465 	 */
466 	if (bind_to_req_port_only) {
467 		/*
468 		 * If the requested port is busy, don't bother to look
469 		 * for a new one. Setting loop maximum count to 1 has
470 		 * that effect.
471 		 */
472 		loopmax = 1;
473 	} else {
474 		/*
475 		 * If the requested port is busy, look for a free one
476 		 * in the anonymous port range.
477 		 * Set loopmax appropriately so that one does not look
478 		 * forever in the case all of the anonymous ports are in use.
479 		 */
480 		loopmax = (sctp_largest_anon_port -
481 		    sctp_smallest_anon_port + 1);
482 	}
483 	do {
484 		uint16_t	lport;
485 		sctp_tf_t	*tbf;
486 		sctp_t		*lsctp;
487 		int		addrcmp;
488 
489 		lport = htons(port);
490 
491 		/*
492 		 * Ensure that the sctp_t is not currently in the bind hash.
493 		 * Hold the lock on the hash bucket to ensure that
494 		 * the duplicate check plus the insertion is an atomic
495 		 * operation.
496 		 *
497 		 * This function does an inline lookup on the bind hash list
498 		 * Make sure that we access only members of sctp_t
499 		 * and that we don't look at sctp_sctp, since we are not
500 		 * doing a SCTPB_REFHOLD. For more details please see the notes
501 		 * in sctp_compress()
502 		 */
503 		sctp_bind_hash_remove(sctp);
504 		tbf = &sctp_bind_fanout[SCTP_BIND_HASH(port)];
505 		mutex_enter(&tbf->tf_lock);
506 		for (lsctp = tbf->tf_sctp; lsctp != NULL;
507 		    lsctp = lsctp->sctp_bind_hash) {
508 
509 			if (lport != lsctp->sctp_lport ||
510 			    lsctp->sctp_zoneid != zoneid ||
511 			    lsctp->sctp_state < SCTPS_BOUND)
512 				continue;
513 
514 			addrcmp = sctp_compare_saddrs(sctp, lsctp);
515 			if (addrcmp != SCTP_ADDR_DISJOINT) {
516 				if (!sctp->sctp_reuseaddr) {
517 					/* in use */
518 					break;
519 				} else if (lsctp->sctp_state == SCTPS_BOUND ||
520 				    lsctp->sctp_state == SCTPS_LISTEN) {
521 					/*
522 					 * socket option SO_REUSEADDR is set
523 					 * on the binding sctp_t.
524 					 *
525 					 * We have found a match of IP source
526 					 * address and source port, which is
527 					 * refused regardless of the
528 					 * SO_REUSEADDR setting, so we break.
529 					 */
530 					break;
531 				}
532 			}
533 		}
534 		if (lsctp != NULL) {
535 			/* The port number is busy */
536 			mutex_exit(&tbf->tf_lock);
537 		} else {
538 			/*
539 			 * This port is ours. Insert in fanout and mark as
540 			 * bound to prevent others from getting the port
541 			 * number.
542 			 */
543 			sctp->sctp_state = SCTPS_BOUND;
544 			sctp->sctp_lport = lport;
545 			sctp->sctp_sctph->sh_sport = sctp->sctp_lport;
546 
547 			ASSERT(&sctp_bind_fanout[SCTP_BIND_HASH(port)] == tbf);
548 			sctp_bind_hash_insert(tbf, sctp, 1);
549 
550 			mutex_exit(&tbf->tf_lock);
551 
552 			/*
553 			 * We don't want sctp_next_port_to_try to "inherit"
554 			 * a port number supplied by the user in a bind.
555 			 */
556 			if (user_specified != 0)
557 				return (port);
558 
559 			/*
560 			 * This is the only place where sctp_next_port_to_try
561 			 * is updated. After the update, it may or may not
562 			 * be in the valid range.
563 			 */
564 			sctp_next_port_to_try = port + 1;
565 			return (port);
566 		}
567 
568 		if ((count == 0) && (user_specified)) {
569 			/*
570 			 * We may have to return an anonymous port. So
571 			 * get one to start with.
572 			 */
573 			port = sctp_update_next_port(sctp_next_port_to_try);
574 			user_specified = 0;
575 		} else {
576 			port = sctp_update_next_port(port + 1);
577 		}
578 
579 		/*
580 		 * Don't let this loop run forever in the case where
581 		 * all of the anonymous ports are in use.
582 		 */
583 	} while (++count < loopmax);
584 	return (0);
585 }
586 
587 /*
588  * Don't let port fall into the privileged range.
589  * Since the extra privileged ports can be arbitrary we also
590  * ensure that we exclude those from consideration.
591  * sctp_g_epriv_ports is not sorted thus we loop over it until
592  * there are no changes.
593  *
594  * Note: No locks are held when inspecting sctp_g_*epriv_ports
595  * but instead the code relies on:
596  * - the fact that the address of the array and its size never changes
597  * - the atomic assignment of the elements of the array
598  */
599 in_port_t
600 sctp_update_next_port(in_port_t port)
601 {
602 	int i;
603 
604 retry:
605 	if (port < sctp_smallest_anon_port || port > sctp_largest_anon_port)
606 		port = sctp_smallest_anon_port;
607 
608 	if (port < sctp_smallest_nonpriv_port)
609 		port = sctp_smallest_nonpriv_port;
610 
611 	for (i = 0; i < sctp_g_num_epriv_ports; i++) {
612 		if (port == sctp_g_epriv_ports[i]) {
613 			port++;
614 			/*
615 			 * Make sure whether the port is in the
616 			 * valid range.
617 			 *
618 			 * XXX Note that if sctp_g_epriv_ports contains
619 			 * all the anonymous ports this will be an
620 			 * infinite loop.
621 			 */
622 			goto retry;
623 		}
624 	}
625 	return (port);
626 }
627