1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/socket.h>
30 #include <sys/ddi.h>
31 #include <sys/sunddi.h>
32 #include <sys/tsol/tndb.h>
33 #include <sys/tsol/tnet.h>
34 
35 #include <netinet/in.h>
36 #include <netinet/ip6.h>
37 
38 #include <inet/common.h>
39 #include <inet/ip.h>
40 #include <inet/ip6.h>
41 #include <inet/ipclassifier.h>
42 #include <inet/ipsec_impl.h>
43 #include <inet/ipp_common.h>
44 #include <inet/sctp_ip.h>
45 
46 #include "sctp_impl.h"
47 #include "sctp_addr.h"
48 
49 /* Default association hash size.  The size must be a power of 2. */
50 #define	SCTP_CONN_HASH_SIZE	8192
51 
52 uint_t		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE; /* /etc/system */
53 
54 /*
55  * Cluster networking hook for traversing current assoc list.
56  * This routine is used to extract the current list of live associations
57  * which must continue to to be dispatched to this node.
58  */
59 int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *,
60     boolean_t);
61 static int cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *,
62     void *), void *arg, boolean_t cansleep, sctp_stack_t *sctps);
63 
64 void
65 sctp_hash_init(sctp_stack_t *sctps)
66 {
67 	int i;
68 
69 	/* Start with /etc/system value */
70 	sctps->sctps_conn_hash_size = sctp_conn_hash_size;
71 
72 	if (sctps->sctps_conn_hash_size & (sctps->sctps_conn_hash_size - 1)) {
73 		/* Not a power of two. Round up to nearest power of two */
74 		for (i = 0; i < 31; i++) {
75 			if (sctps->sctps_conn_hash_size < (1 << i))
76 				break;
77 		}
78 		sctps->sctps_conn_hash_size = 1 << i;
79 	}
80 	if (sctps->sctps_conn_hash_size < SCTP_CONN_HASH_SIZE) {
81 		sctps->sctps_conn_hash_size = SCTP_CONN_HASH_SIZE;
82 		cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
83 		    sctps->sctps_conn_hash_size);
84 	}
85 	sctps->sctps_conn_fanout =
86 		(sctp_tf_t *)kmem_zalloc(sctps->sctps_conn_hash_size *
87 		    sizeof (sctp_tf_t),	KM_SLEEP);
88 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
89 		mutex_init(&sctps->sctps_conn_fanout[i].tf_lock, NULL,
90 			    MUTEX_DEFAULT, NULL);
91 	}
92 	sctps->sctps_listen_fanout = kmem_zalloc(SCTP_LISTEN_FANOUT_SIZE *
93 	    sizeof (sctp_tf_t),	KM_SLEEP);
94 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
95 		mutex_init(&sctps->sctps_listen_fanout[i].tf_lock, NULL,
96 		    MUTEX_DEFAULT, NULL);
97 	}
98 	sctps->sctps_bind_fanout = kmem_zalloc(SCTP_BIND_FANOUT_SIZE *
99 	    sizeof (sctp_tf_t),	KM_SLEEP);
100 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
101 		mutex_init(&sctps->sctps_bind_fanout[i].tf_lock, NULL,
102 		    MUTEX_DEFAULT, NULL);
103 	}
104 }
105 
106 void
107 sctp_hash_destroy(sctp_stack_t *sctps)
108 {
109 	int i;
110 
111 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
112 		mutex_destroy(&sctps->sctps_conn_fanout[i].tf_lock);
113 	}
114 	kmem_free(sctps->sctps_conn_fanout, sctps->sctps_conn_hash_size *
115 	    sizeof (sctp_tf_t));
116 	sctps->sctps_conn_fanout = NULL;
117 
118 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
119 		mutex_destroy(&sctps->sctps_listen_fanout[i].tf_lock);
120 	}
121 	kmem_free(sctps->sctps_listen_fanout, SCTP_LISTEN_FANOUT_SIZE *
122 	    sizeof (sctp_tf_t));
123 	sctps->sctps_listen_fanout = NULL;
124 
125 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
126 		mutex_destroy(&sctps->sctps_bind_fanout[i].tf_lock);
127 	}
128 	kmem_free(sctps->sctps_bind_fanout, SCTP_BIND_FANOUT_SIZE *
129 	    sizeof (sctp_tf_t));
130 	sctps->sctps_bind_fanout = NULL;
131 }
132 
133 /*
134  * Walk the SCTP global list and refrele the ire for this ipif
135  * This is called when an address goes down, so that we release any reference
136  * to the ire associated with this address. Additionally, for any SCTP if
137  * this was the only/last address in its source list, we don't kill the
138  * assoc., if there is no address added subsequently, or if this does not
139  * come up, then the assoc. will die a natural death (i.e. timeout).
140  */
141 void
142 sctp_ire_cache_flush(ipif_t *ipif)
143 {
144 	sctp_t			*sctp;
145 	sctp_t			*sctp_prev = NULL;
146 	sctp_faddr_t		*fp;
147 	conn_t			*connp;
148 	ire_t			*ire;
149 	sctp_stack_t		*sctps = ipif->ipif_ill->ill_ipst->
150 	    ips_netstack->netstack_sctp;
151 
152 	sctp = sctps->sctps_gsctp;
153 	mutex_enter(&sctps->sctps_g_lock);
154 	while (sctp != NULL) {
155 		mutex_enter(&sctp->sctp_reflock);
156 		if (sctp->sctp_condemned) {
157 			mutex_exit(&sctp->sctp_reflock);
158 			sctp = list_next(&sctps->sctps_g_list, sctp);
159 			continue;
160 		}
161 		sctp->sctp_refcnt++;
162 		mutex_exit(&sctp->sctp_reflock);
163 		mutex_exit(&sctps->sctps_g_lock);
164 		if (sctp_prev != NULL)
165 			SCTP_REFRELE(sctp_prev);
166 
167 		RUN_SCTP(sctp);
168 		connp = sctp->sctp_connp;
169 		mutex_enter(&connp->conn_lock);
170 		ire = connp->conn_ire_cache;
171 		if (ire != NULL && ire->ire_ipif == ipif) {
172 			connp->conn_ire_cache = NULL;
173 			mutex_exit(&connp->conn_lock);
174 			IRE_REFRELE_NOTR(ire);
175 		} else {
176 			mutex_exit(&connp->conn_lock);
177 		}
178 		/* check for ires cached in faddr */
179 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) {
180 			/*
181 			 * If this ipif is being used as the source address
182 			 * we need to update it as well, else we will end
183 			 * up using the dead source address.
184 			 */
185 			ire = fp->ire;
186 			if (ire != NULL && ire->ire_ipif == ipif) {
187 				fp->ire = NULL;
188 				IRE_REFRELE_NOTR(ire);
189 			}
190 			/*
191 			 * This may result in setting the fp as unreachable,
192 			 * i.e. if all the source addresses are down. In
193 			 * that case the assoc. would timeout.
194 			 */
195 			if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
196 			    &fp->saddr)) {
197 				sctp_set_saddr(sctp, fp);
198 				if (fp == sctp->sctp_current &&
199 				    fp->state != SCTP_FADDRS_UNREACH) {
200 					sctp_set_faddr_current(sctp, fp);
201 				}
202 			}
203 		}
204 		WAKE_SCTP(sctp);
205 		sctp_prev = sctp;
206 		mutex_enter(&sctps->sctps_g_lock);
207 		sctp = list_next(&sctps->sctps_g_list, sctp);
208 	}
209 	mutex_exit(&sctps->sctps_g_lock);
210 	if (sctp_prev != NULL)
211 		SCTP_REFRELE(sctp_prev);
212 }
213 
214 /*
215  * Exported routine for extracting active SCTP associations.
216  * Like TCP, we terminate the walk if the callback returns non-zero.
217  *
218  * Need to walk all sctp_stack_t instances since this clustering
219  * interface is assumed global for all instances
220  */
221 int
222 cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *),
223     void *arg, boolean_t cansleep)
224 {
225 	netstack_handle_t nh;
226 	netstack_t *ns;
227 	int ret = 0;
228 
229 	netstack_next_init(&nh);
230 	while ((ns = netstack_next(&nh)) != NULL) {
231 		ret = cl_sctp_walk_list_stack(cl_callback, arg, cansleep,
232 		    ns->netstack_sctp);
233 		netstack_rele(ns);
234 	}
235 	netstack_next_fini(&nh);
236 	return (ret);
237 }
238 
239 static int
240 cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *, void *),
241     void *arg, boolean_t cansleep, sctp_stack_t *sctps)
242 {
243 	sctp_t		*sctp;
244 	sctp_t		*sctp_prev;
245 	cl_sctp_info_t	cl_sctpi;
246 	uchar_t		*slist;
247 	uchar_t		*flist;
248 
249 	sctp = sctps->sctps_gsctp;
250 	sctp_prev = NULL;
251 	mutex_enter(&sctps->sctps_g_lock);
252 	while (sctp != NULL) {
253 		size_t	ssize;
254 		size_t	fsize;
255 
256 		mutex_enter(&sctp->sctp_reflock);
257 		if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) {
258 			mutex_exit(&sctp->sctp_reflock);
259 			sctp = list_next(&sctps->sctps_g_list, sctp);
260 			continue;
261 		}
262 		sctp->sctp_refcnt++;
263 		mutex_exit(&sctp->sctp_reflock);
264 		mutex_exit(&sctps->sctps_g_lock);
265 		if (sctp_prev != NULL)
266 			SCTP_REFRELE(sctp_prev);
267 		RUN_SCTP(sctp);
268 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
269 		fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
270 
271 		slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP);
272 		flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP);
273 		if (slist == NULL || flist == NULL) {
274 			WAKE_SCTP(sctp);
275 			if (slist != NULL)
276 				kmem_free(slist, ssize);
277 			if (flist != NULL)
278 				kmem_free(flist, fsize);
279 			SCTP_REFRELE(sctp);
280 			return (1);
281 		}
282 		cl_sctpi.cl_sctpi_version = CL_SCTPI_V1;
283 		sctp_get_saddr_list(sctp, slist, ssize);
284 		sctp_get_faddr_list(sctp, flist, fsize);
285 		cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs;
286 		cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs;
287 		cl_sctpi.cl_sctpi_family = sctp->sctp_family;
288 		cl_sctpi.cl_sctpi_ipversion = sctp->sctp_ipversion;
289 		cl_sctpi.cl_sctpi_state = sctp->sctp_state;
290 		cl_sctpi.cl_sctpi_lport = sctp->sctp_lport;
291 		cl_sctpi.cl_sctpi_fport = sctp->sctp_fport;
292 		cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp;
293 		WAKE_SCTP(sctp);
294 		cl_sctpi.cl_sctpi_laddrp = slist;
295 		cl_sctpi.cl_sctpi_faddrp = flist;
296 		if ((*cl_callback)(&cl_sctpi, arg) != 0) {
297 			kmem_free(slist, ssize);
298 			kmem_free(flist, fsize);
299 			SCTP_REFRELE(sctp);
300 			return (1);
301 		}
302 		/* list will be freed by cl_callback */
303 		sctp_prev = sctp;
304 		mutex_enter(&sctps->sctps_g_lock);
305 		sctp = list_next(&sctps->sctps_g_list, sctp);
306 	}
307 	mutex_exit(&sctps->sctps_g_lock);
308 	if (sctp_prev != NULL)
309 		SCTP_REFRELE(sctp_prev);
310 	return (0);
311 }
312 
313 sctp_t *
314 sctp_conn_match(in6_addr_t *faddr, in6_addr_t *laddr, uint32_t ports,
315     zoneid_t zoneid, sctp_stack_t *sctps)
316 {
317 	sctp_tf_t		*tf;
318 	sctp_t			*sctp;
319 	sctp_faddr_t		*fp;
320 
321 	tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]);
322 	mutex_enter(&tf->tf_lock);
323 
324 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
325 		if (ports != sctp->sctp_ports ||
326 		    !IPCL_ZONE_MATCH(sctp->sctp_connp, zoneid)) {
327 			continue;
328 		}
329 
330 		/* check for faddr match */
331 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
332 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
333 				break;
334 			}
335 		}
336 
337 		/* no faddr match; keep looking */
338 		if (fp == NULL)
339 			continue;
340 
341 		/* check for laddr match */
342 		if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
343 			SCTP_REFHOLD(sctp);
344 			goto done;
345 		}
346 		/* no match; continue to the next in the chain */
347 	}
348 
349 done:
350 	mutex_exit(&tf->tf_lock);
351 	return (sctp);
352 }
353 
354 static sctp_t *
355 listen_match(in6_addr_t *laddr, uint32_t ports, zoneid_t zoneid,
356     sctp_stack_t *sctps)
357 {
358 	sctp_t			*sctp;
359 	sctp_tf_t		*tf;
360 	uint16_t		lport;
361 
362 	lport = ((uint16_t *)&ports)[1];
363 
364 	tf = &(sctps->sctps_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
365 	mutex_enter(&tf->tf_lock);
366 
367 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
368 		if (lport != sctp->sctp_lport ||
369 		    !IPCL_ZONE_MATCH(sctp->sctp_connp, zoneid)) {
370 			continue;
371 		}
372 
373 		if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
374 			SCTP_REFHOLD(sctp);
375 			goto done;
376 		}
377 		/* no match; continue to the next in the chain */
378 	}
379 
380 done:
381 	mutex_exit(&tf->tf_lock);
382 	return (sctp);
383 }
384 
385 /* called by ipsec_sctp_pol */
386 conn_t *
387 sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
388     zoneid_t zoneid, sctp_stack_t *sctps)
389 {
390 	sctp_t *sctp;
391 
392 	if ((sctp = sctp_conn_match(src, dst, ports, zoneid, sctps)) == NULL) {
393 		/* Not in conn fanout; check listen fanout */
394 		if ((sctp = listen_match(dst, ports, zoneid, sctps)) == NULL)
395 			return (NULL);
396 	}
397 	return (sctp->sctp_connp);
398 }
399 
400 conn_t *
401 sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
402     zoneid_t zoneid, mblk_t *mp, sctp_stack_t *sctps)
403 
404 {
405 	sctp_t *sctp;
406 	boolean_t shared_addr;
407 
408 	if ((sctp = sctp_conn_match(src, dst, ports, zoneid, sctps)) == NULL) {
409 		shared_addr = (zoneid == ALL_ZONES);
410 		if (shared_addr) {
411 			/*
412 			 * No need to handle exclusive-stack zones since
413 			 * ALL_ZONES only applies to the shared stack.
414 			 */
415 			zoneid = tsol_mlp_findzone(IPPROTO_SCTP,
416 			    htons(ntohl(ports) & 0xFFFF));
417 			/*
418 			 * If no shared MLP is found, tsol_mlp_findzone returns
419 			 * ALL_ZONES.  In that case, we assume it's SLP, and
420 			 * search for the zone based on the packet label.
421 			 * That will also return ALL_ZONES on failure.
422 			 */
423 			if (zoneid == ALL_ZONES)
424 				zoneid = tsol_packet_to_zoneid(mp);
425 			if (zoneid == ALL_ZONES)
426 				return (NULL);
427 		}
428 		/* Not in conn fanout; check listen fanout */
429 		if ((sctp = listen_match(dst, ports, zoneid, sctps)) == NULL)
430 			return (NULL);
431 		/*
432 		 * On systems running trusted extensions, check if dst
433 		 * should accept the packet. "IPV6_VERSION" indicates
434 		 * that dst is in 16 byte AF_INET6 format. IPv4-mapped
435 		 * IPv6 addresses are supported.
436 		 */
437 		if (is_system_labeled() &&
438 		    !tsol_receive_local(mp, dst, IPV6_VERSION,
439 		    shared_addr, sctp->sctp_connp)) {
440 			DTRACE_PROBE3(
441 			    tx__ip__log__info__classify__sctp,
442 			    char *,
443 			    "connp(1) could not receive mp(2)",
444 			    conn_t *, sctp->sctp_connp, mblk_t *, mp);
445 			SCTP_REFRELE(sctp);
446 			return (NULL);
447 		}
448 	}
449 	return (sctp->sctp_connp);
450 }
451 
452 /*
453  * Fanout for SCTP packets
454  * The caller puts <fport, lport> in the ports parameter.
455  */
456 /* ARGSUSED */
457 void
458 ip_fanout_sctp(mblk_t *mp, ill_t *recv_ill, ipha_t *ipha,
459     uint32_t ports, uint_t flags, boolean_t mctl_present, boolean_t ip_policy,
460     zoneid_t zoneid)
461 {
462 	sctp_t *sctp;
463 	boolean_t isv4;
464 	conn_t *connp;
465 	mblk_t *first_mp;
466 	ip6_t *ip6h;
467 	in6_addr_t map_src, map_dst;
468 	in6_addr_t *src, *dst;
469 	ip_stack_t	*ipst;
470 	ipsec_stack_t	*ipss;
471 	sctp_stack_t	*sctps;
472 
473 	ASSERT(recv_ill != NULL);
474 	ipst = recv_ill->ill_ipst;
475 	sctps = ipst->ips_netstack->netstack_sctp;
476 	ipss = ipst->ips_netstack->netstack_ipsec;
477 
478 	first_mp = mp;
479 	if (mctl_present) {
480 		mp = first_mp->b_cont;
481 		ASSERT(mp != NULL);
482 	}
483 
484 	/* Assume IP provides aligned packets - otherwise toss */
485 	if (!OK_32PTR(mp->b_rptr)) {
486 		BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsInDiscards);
487 		freemsg(first_mp);
488 		return;
489 	}
490 
491 	if (IPH_HDR_VERSION(ipha) == IPV6_VERSION) {
492 		ip6h = (ip6_t *)ipha;
493 		src = &ip6h->ip6_src;
494 		dst = &ip6h->ip6_dst;
495 		isv4 = B_FALSE;
496 	} else {
497 		ip6h = NULL;
498 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
499 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
500 		src = &map_src;
501 		dst = &map_dst;
502 		isv4 = B_TRUE;
503 	}
504 	connp = sctp_fanout(src, dst, ports, zoneid, mp, sctps);
505 	if (connp == NULL) {
506 		ip_fanout_sctp_raw(first_mp, recv_ill, ipha, isv4,
507 		    ports, mctl_present, flags, ip_policy, zoneid);
508 		return;
509 	}
510 	sctp = CONN2SCTP(connp);
511 
512 	/* Found a client; up it goes */
513 	BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsHCInDelivers);
514 
515 	/*
516 	 * We check some fields in conn_t without holding a lock.
517 	 * This should be fine.
518 	 */
519 	if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || mctl_present) {
520 		first_mp = ipsec_check_inbound_policy(first_mp, connp,
521 		    ipha, NULL, mctl_present);
522 		if (first_mp == NULL) {
523 			SCTP_REFRELE(sctp);
524 			return;
525 		}
526 	}
527 
528 	/* Initiate IPPF processing for fastpath */
529 	if (IPP_ENABLED(IPP_LOCAL_IN, ipst)) {
530 		ip_process(IPP_LOCAL_IN, &mp,
531 		    recv_ill->ill_phyint->phyint_ifindex);
532 		if (mp == NULL) {
533 			SCTP_REFRELE(sctp);
534 			if (mctl_present)
535 				freeb(first_mp);
536 			return;
537 		} else if (mctl_present) {
538 			/*
539 			 * ip_process might return a new mp.
540 			 */
541 			ASSERT(first_mp != mp);
542 			first_mp->b_cont = mp;
543 		} else {
544 			first_mp = mp;
545 		}
546 	}
547 
548 	if (connp->conn_recvif || connp->conn_recvslla ||
549 	    connp->conn_ip_recvpktinfo) {
550 		int in_flags = 0;
551 
552 		if (connp->conn_recvif || connp->conn_ip_recvpktinfo) {
553 			in_flags = IPF_RECVIF;
554 		}
555 		if (connp->conn_recvslla) {
556 			in_flags |= IPF_RECVSLLA;
557 		}
558 		if (isv4) {
559 			mp = ip_add_info(mp, recv_ill, in_flags,
560 			    IPCL_ZONEID(connp), ipst);
561 		} else {
562 			mp = ip_add_info_v6(mp, recv_ill, &ip6h->ip6_dst);
563 		}
564 		if (mp == NULL) {
565 			SCTP_REFRELE(sctp);
566 			if (mctl_present)
567 				freeb(first_mp);
568 			return;
569 		} else if (mctl_present) {
570 			/*
571 			 * ip_add_info might return a new mp.
572 			 */
573 			ASSERT(first_mp != mp);
574 			first_mp->b_cont = mp;
575 		} else {
576 			first_mp = mp;
577 		}
578 	}
579 
580 	mutex_enter(&sctp->sctp_lock);
581 	if (sctp->sctp_running) {
582 		if (mctl_present)
583 			mp->b_prev = first_mp;
584 		if (!sctp_add_recvq(sctp, mp, B_FALSE)) {
585 			BUMP_MIB(recv_ill->ill_ip_mib, ipIfStatsInDiscards);
586 			freemsg(first_mp);
587 		}
588 		mutex_exit(&sctp->sctp_lock);
589 	} else {
590 		sctp->sctp_running = B_TRUE;
591 		mutex_exit(&sctp->sctp_lock);
592 
593 		mutex_enter(&sctp->sctp_recvq_lock);
594 		if (sctp->sctp_recvq != NULL) {
595 			if (mctl_present)
596 				mp->b_prev = first_mp;
597 			if (!sctp_add_recvq(sctp, mp, B_TRUE)) {
598 				BUMP_MIB(recv_ill->ill_ip_mib,
599 				    ipIfStatsInDiscards);
600 				freemsg(first_mp);
601 			}
602 			mutex_exit(&sctp->sctp_recvq_lock);
603 			WAKE_SCTP(sctp);
604 		} else {
605 			mutex_exit(&sctp->sctp_recvq_lock);
606 			sctp_input_data(sctp, mp, (mctl_present ? first_mp :
607 			    NULL));
608 			WAKE_SCTP(sctp);
609 			sctp_process_sendq(sctp);
610 		}
611 	}
612 	SCTP_REFRELE(sctp);
613 }
614 
615 void
616 sctp_conn_hash_remove(sctp_t *sctp)
617 {
618 	sctp_tf_t *tf = sctp->sctp_conn_tfp;
619 
620 	if (!tf) {
621 		return;
622 	}
623 	/*
624 	 * On a clustered note send this notification to the clustering
625 	 * subsystem.
626 	 */
627 	if (cl_sctp_disconnect != NULL) {
628 		(*cl_sctp_disconnect)(sctp->sctp_family,
629 		    (cl_sctp_handle_t)sctp);
630 	}
631 
632 	mutex_enter(&tf->tf_lock);
633 	ASSERT(tf->tf_sctp);
634 	if (tf->tf_sctp == sctp) {
635 		tf->tf_sctp = sctp->sctp_conn_hash_next;
636 		if (sctp->sctp_conn_hash_next) {
637 			ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
638 			tf->tf_sctp->sctp_conn_hash_prev = NULL;
639 		}
640 	} else {
641 		ASSERT(sctp->sctp_conn_hash_prev);
642 		ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
643 		sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
644 		    sctp->sctp_conn_hash_next;
645 
646 		if (sctp->sctp_conn_hash_next) {
647 			ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
648 			    == sctp);
649 			sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
650 			    sctp->sctp_conn_hash_prev;
651 		}
652 	}
653 	sctp->sctp_conn_hash_next = NULL;
654 	sctp->sctp_conn_hash_prev = NULL;
655 	sctp->sctp_conn_tfp = NULL;
656 	mutex_exit(&tf->tf_lock);
657 }
658 
659 void
660 sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
661 {
662 	if (sctp->sctp_conn_tfp) {
663 		sctp_conn_hash_remove(sctp);
664 	}
665 
666 	if (!caller_holds_lock) {
667 		mutex_enter(&tf->tf_lock);
668 	} else {
669 		ASSERT(MUTEX_HELD(&tf->tf_lock));
670 	}
671 
672 	sctp->sctp_conn_hash_next = tf->tf_sctp;
673 	if (tf->tf_sctp) {
674 		tf->tf_sctp->sctp_conn_hash_prev = sctp;
675 	}
676 	sctp->sctp_conn_hash_prev = NULL;
677 	tf->tf_sctp = sctp;
678 	sctp->sctp_conn_tfp = tf;
679 	if (!caller_holds_lock) {
680 		mutex_exit(&tf->tf_lock);
681 	}
682 }
683 
684 void
685 sctp_listen_hash_remove(sctp_t *sctp)
686 {
687 	sctp_tf_t *tf = sctp->sctp_listen_tfp;
688 
689 	if (!tf) {
690 		return;
691 	}
692 	/*
693 	 * On a clustered note send this notification to the clustering
694 	 * subsystem.
695 	 */
696 	if (cl_sctp_unlisten != NULL) {
697 		uchar_t	*slist;
698 		ssize_t	ssize;
699 
700 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
701 		slist = kmem_alloc(ssize, KM_SLEEP);
702 		sctp_get_saddr_list(sctp, slist, ssize);
703 		(*cl_sctp_unlisten)(sctp->sctp_family, slist,
704 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
705 		/* list will be freed by the clustering module */
706 	}
707 
708 	mutex_enter(&tf->tf_lock);
709 	ASSERT(tf->tf_sctp);
710 	if (tf->tf_sctp == sctp) {
711 		tf->tf_sctp = sctp->sctp_listen_hash_next;
712 		if (sctp->sctp_listen_hash_next) {
713 			ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
714 			tf->tf_sctp->sctp_listen_hash_prev = NULL;
715 		}
716 	} else {
717 		ASSERT(sctp->sctp_listen_hash_prev);
718 		ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
719 		    sctp);
720 		sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
721 		    sctp->sctp_listen_hash_next;
722 
723 		if (sctp->sctp_listen_hash_next) {
724 			ASSERT(
725 			sctp->sctp_listen_hash_next->sctp_listen_hash_prev ==
726 			    sctp);
727 			sctp->sctp_listen_hash_next->sctp_listen_hash_prev =
728 			    sctp->sctp_listen_hash_prev;
729 		}
730 	}
731 	sctp->sctp_listen_hash_next = NULL;
732 	sctp->sctp_listen_hash_prev = NULL;
733 	sctp->sctp_listen_tfp = NULL;
734 	mutex_exit(&tf->tf_lock);
735 }
736 
737 void
738 sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
739 {
740 	if (sctp->sctp_listen_tfp) {
741 		sctp_listen_hash_remove(sctp);
742 	}
743 
744 	mutex_enter(&tf->tf_lock);
745 	sctp->sctp_listen_hash_next = tf->tf_sctp;
746 	if (tf->tf_sctp) {
747 		tf->tf_sctp->sctp_listen_hash_prev = sctp;
748 	}
749 	sctp->sctp_listen_hash_prev = NULL;
750 	tf->tf_sctp = sctp;
751 	sctp->sctp_listen_tfp = tf;
752 	mutex_exit(&tf->tf_lock);
753 	/*
754 	 * On a clustered note send this notification to the clustering
755 	 * subsystem.
756 	 */
757 	if (cl_sctp_listen != NULL) {
758 		uchar_t	*slist;
759 		ssize_t	ssize;
760 
761 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
762 		slist = kmem_alloc(ssize, KM_SLEEP);
763 		sctp_get_saddr_list(sctp, slist, ssize);
764 		(*cl_sctp_listen)(sctp->sctp_family, slist,
765 		    sctp->sctp_nsaddrs, sctp->sctp_lport);
766 		/* list will be freed by the clustering module */
767 	}
768 }
769 
770 /*
771  * Hash list insertion routine for sctp_t structures.
772  * Inserts entries with the ones bound to a specific IP address first
773  * followed by those bound to INADDR_ANY.
774  */
775 void
776 sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
777 {
778 	sctp_t	**sctpp;
779 	sctp_t	*sctpnext;
780 
781 	if (sctp->sctp_ptpbhn != NULL) {
782 		ASSERT(!caller_holds_lock);
783 		sctp_bind_hash_remove(sctp);
784 	}
785 	sctpp = &tbf->tf_sctp;
786 	if (!caller_holds_lock) {
787 		mutex_enter(&tbf->tf_lock);
788 	} else {
789 		ASSERT(MUTEX_HELD(&tbf->tf_lock));
790 	}
791 	sctpnext = sctpp[0];
792 	if (sctpnext) {
793 		sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
794 	}
795 	sctp->sctp_bind_hash = sctpnext;
796 	sctp->sctp_ptpbhn = sctpp;
797 	sctpp[0] = sctp;
798 	/* For sctp_*_hash_remove */
799 	sctp->sctp_bind_lockp = &tbf->tf_lock;
800 	if (!caller_holds_lock)
801 		mutex_exit(&tbf->tf_lock);
802 }
803 
804 /*
805  * Hash list removal routine for sctp_t structures.
806  */
807 void
808 sctp_bind_hash_remove(sctp_t *sctp)
809 {
810 	sctp_t	*sctpnext;
811 	kmutex_t *lockp;
812 
813 	lockp = sctp->sctp_bind_lockp;
814 
815 	if (sctp->sctp_ptpbhn == NULL)
816 		return;
817 
818 	ASSERT(lockp != NULL);
819 	mutex_enter(lockp);
820 	if (sctp->sctp_ptpbhn) {
821 		sctpnext = sctp->sctp_bind_hash;
822 		if (sctpnext) {
823 			sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
824 			sctp->sctp_bind_hash = NULL;
825 		}
826 		*sctp->sctp_ptpbhn = sctpnext;
827 		sctp->sctp_ptpbhn = NULL;
828 	}
829 	mutex_exit(lockp);
830 	sctp->sctp_bind_lockp = NULL;
831 }
832 
833 /*
834  * Similar to but more general than ip_sctp's conn_match().
835  *
836  * Matches sets of addresses as follows: if the argument addr set is
837  * a complete subset of the corresponding addr set in the sctp_t, it
838  * is a match.
839  *
840  * Caller must hold tf->tf_lock.
841  *
842  * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
843  */
844 sctp_t *
845 sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
846     int min_state)
847 {
848 
849 	sctp_t *sctp;
850 	sctp_faddr_t *fp;
851 
852 	ASSERT(MUTEX_HELD(&tf->tf_lock));
853 
854 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
855 		if (*ports != sctp->sctp_ports || sctp->sctp_state <
856 		    min_state) {
857 			continue;
858 		}
859 
860 		/* check for faddr match */
861 		for (fp = sctp->sctp_faddrs; fp; fp = fp->next) {
862 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->faddr)) {
863 				break;
864 			}
865 		}
866 
867 		if (!fp) {
868 			/* no faddr match; keep looking */
869 			continue;
870 		}
871 
872 		/* check for laddr subset match */
873 		if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_SUBSET) {
874 			goto done;
875 		}
876 
877 		/* no match; continue searching */
878 	}
879 
880 done:
881 	if (sctp) {
882 		SCTP_REFHOLD(sctp);
883 	}
884 	return (sctp);
885 }
886 
887 boolean_t
888 ip_fanout_sctp_raw_match(conn_t *connp, uint32_t ports, ipha_t *ipha)
889 {
890 	uint16_t lport;
891 
892 	if (connp->conn_fully_bound) {
893 		return (IPCL_CONN_MATCH(connp, IPPROTO_SCTP, ipha->ipha_src,
894 		    ipha->ipha_dst, ports));
895 	} else {
896 		lport = htons(ntohl(ports) & 0xFFFF);
897 		return (IPCL_BIND_MATCH(connp, IPPROTO_SCTP, ipha->ipha_dst,
898 		    lport));
899 	}
900 }
901 
902 boolean_t
903 ip_fanout_sctp_raw_match_v6(conn_t *connp, uint32_t ports, ip6_t *ip6h,
904     boolean_t for_v4)
905 {
906 	uint16_t lport;
907 	in6_addr_t	v6dst;
908 
909 	if (!for_v4 && connp->conn_fully_bound) {
910 		return (IPCL_CONN_MATCH_V6(connp, IPPROTO_SCTP, ip6h->ip6_src,
911 		    ip6h->ip6_dst, ports));
912 	} else {
913 		lport = htons(ntohl(ports) & 0xFFFF);
914 		if (for_v4)
915 			v6dst = ipv6_all_zeros;
916 		else
917 			v6dst = ip6h->ip6_dst;
918 		return (IPCL_BIND_MATCH_V6(connp, IPPROTO_SCTP, v6dst, lport));
919 	}
920 }
921