xref: /illumos-gate/usr/src/uts/common/inet/ip/sadb.c (revision bb25c06c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/stream.h>
30 #include <sys/stropts.h>
31 #include <sys/ddi.h>
32 #include <sys/debug.h>
33 #include <sys/cmn_err.h>
34 #include <sys/stream.h>
35 #include <sys/strlog.h>
36 #include <sys/kmem.h>
37 #include <sys/sunddi.h>
38 #include <sys/tihdr.h>
39 #include <sys/atomic.h>
40 #include <sys/socket.h>
41 #include <sys/sysmacros.h>
42 #include <sys/crypto/common.h>
43 #include <sys/crypto/api.h>
44 #include <sys/zone.h>
45 #include <netinet/in.h>
46 #include <net/if.h>
47 #include <net/pfkeyv2.h>
48 #include <inet/common.h>
49 #include <netinet/ip6.h>
50 #include <inet/ip.h>
51 #include <inet/ip_ire.h>
52 #include <inet/ip6.h>
53 #include <inet/ipsec_info.h>
54 #include <inet/ipsec_impl.h>
55 #include <inet/tcp.h>
56 #include <inet/sadb.h>
57 #include <inet/ipsecah.h>
58 #include <inet/ipsecesp.h>
59 #include <sys/random.h>
60 #include <sys/dlpi.h>
61 #include <sys/iphada.h>
62 #include <inet/ip_if.h>
63 #include <inet/ipdrop.h>
64 #include <inet/ipclassifier.h>
65 #include <inet/sctp_ip.h>
66 #include <inet/tun.h>
67 
68 /*
69  * This source file contains Security Association Database (SADB) common
70  * routines.  They are linked in with the AH module.  Since AH has no chance
71  * of falling under export control, it was safe to link it in there.
72  */
73 
74 /* Packet dropper for generic SADB drops. */
75 static ipdropper_t sadb_dropper;
76 
77 static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
78     ipsec_action_t *, boolean_t, uint32_t, uint32_t);
79 static void sadb_ill_df(ill_t *, mblk_t *, isaf_t *, int, boolean_t);
80 static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *, boolean_t, mblk_t **);
81 static void sadb_drain_torchq(queue_t *, mblk_t *);
82 static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t);
83 static void sadb_destroy(sadb_t *);
84 static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
85 
86 static time_t sadb_add_time(time_t, uint64_t);
87 
88 /*
89  * ipsacq_maxpackets is defined here to make it tunable
90  * from /etc/system.
91  */
92 extern uint64_t ipsacq_maxpackets;
93 
94 #define	SET_EXPIRE(sa, delta, exp) {				\
95 	if (((sa)->ipsa_ ## delta) != 0) {				\
96 		(sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,	\
97 			(sa)->ipsa_ ## delta);				\
98 	}								\
99 }
100 
101 #define	UPDATE_EXPIRE(sa, delta, exp) {					\
102 	if (((sa)->ipsa_ ## delta) != 0) {				\
103 		time_t tmp = sadb_add_time((sa)->ipsa_usetime,		\
104 			(sa)->ipsa_ ## delta);				\
105 		if (((sa)->ipsa_ ## exp) == 0)				\
106 			(sa)->ipsa_ ## exp = tmp;			\
107 		else							\
108 			(sa)->ipsa_ ## exp = 				\
109 			    MIN((sa)->ipsa_ ## exp, tmp); 		\
110 	}								\
111 }
112 
113 
114 /* wrap the macro so we can pass it as a function pointer */
115 void
116 sadb_sa_refrele(void *target)
117 {
118 	IPSA_REFRELE(((ipsa_t *)target));
119 }
120 
121 /*
122  * We presume that sizeof (long) == sizeof (time_t) and that time_t is
123  * a signed type.
124  */
125 #define	TIME_MAX LONG_MAX
126 
127 /*
128  * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
129  * time_t is defined to be a signed type with the same range as
130  * "long".  On ILP32 systems, we thus run the risk of wrapping around
131  * at end of time, as well as "overwrapping" the clock back around
132  * into a seemingly valid but incorrect future date earlier than the
133  * desired expiration.
134  *
135  * In order to avoid odd behavior (either negative lifetimes or loss
136  * of high order bits) when someone asks for bizarrely long SA
137  * lifetimes, we do a saturating add for expire times.
138  *
139  * We presume that ILP32 systems will be past end of support life when
140  * the 32-bit time_t overflows (a dangerous assumption, mind you..).
141  *
142  * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
143  * will hopefully have figured out clever ways to avoid the use of
144  * fixed-sized integers in computation.
145  */
146 static time_t
147 sadb_add_time(time_t base, uint64_t delta)
148 {
149 	time_t sum;
150 
151 	/*
152 	 * Clip delta to the maximum possible time_t value to
153 	 * prevent "overwrapping" back into a shorter-than-desired
154 	 * future time.
155 	 */
156 	if (delta > TIME_MAX)
157 		delta = TIME_MAX;
158 	/*
159 	 * This sum may still overflow.
160 	 */
161 	sum = base + delta;
162 
163 	/*
164 	 * .. so if the result is less than the base, we overflowed.
165 	 */
166 	if (sum < base)
167 		sum = TIME_MAX;
168 
169 	return (sum);
170 }
171 
172 /*
173  * Callers of this function have already created a working security
174  * association, and have found the appropriate table & hash chain.  All this
175  * function does is check duplicates, and insert the SA.  The caller needs to
176  * hold the hash bucket lock and increment the refcnt before insertion.
177  *
178  * Return 0 if success, EEXIST if collision.
179  */
180 int
181 sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
182 {
183 	ipsa_t **ptpn = NULL;
184 	ipsa_t *walker;
185 	boolean_t unspecsrc;
186 
187 	ASSERT(MUTEX_HELD(&bucket->isaf_lock));
188 
189 	unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
190 
191 	walker = bucket->isaf_ipsa;
192 	ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
193 
194 	/*
195 	 * Find insertion point (pointed to with **ptpn).  Insert at the head
196 	 * of the list unless there's an unspecified source address, then
197 	 * insert it after the last SA with a specified source address.
198 	 *
199 	 * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
200 	 * checking for collisions.
201 	 */
202 
203 	while (walker != NULL) {
204 		if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
205 		    ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
206 			if (walker->ipsa_spi == ipsa->ipsa_spi)
207 				return (EEXIST);
208 
209 			mutex_enter(&walker->ipsa_lock);
210 			if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
211 			    (walker->ipsa_flags & IPSA_F_USED) &&
212 			    ((walker->ipsa_unique_id &
213 				walker->ipsa_unique_mask) ==
214 				(ipsa->ipsa_unique_id &
215 				    ipsa->ipsa_unique_mask))) {
216 				walker->ipsa_flags |= IPSA_F_CINVALID;
217 			}
218 			mutex_exit(&walker->ipsa_lock);
219 		}
220 
221 		if (ptpn == NULL && unspecsrc) {
222 			if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
223 			    walker->ipsa_addrfam))
224 				ptpn = walker->ipsa_ptpn;
225 			else if (walker->ipsa_next == NULL)
226 				ptpn = &walker->ipsa_next;
227 		}
228 
229 		walker = walker->ipsa_next;
230 	}
231 
232 	if (ptpn == NULL)
233 		ptpn = &bucket->isaf_ipsa;
234 	ipsa->ipsa_next = *ptpn;
235 	ipsa->ipsa_ptpn = ptpn;
236 	if (ipsa->ipsa_next != NULL)
237 		ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
238 	*ptpn = ipsa;
239 	ipsa->ipsa_linklock = &bucket->isaf_lock;
240 
241 	return (0);
242 }
243 
244 /*
245  * Free a security association.  Its reference count is 0, which means
246  * I must free it.  The SA must be unlocked and must not be linked into
247  * any fanout list.
248  */
249 static void
250 sadb_freeassoc(ipsa_t *ipsa)
251 {
252 	ASSERT(!MUTEX_HELD(&ipsa->ipsa_lock));
253 	ASSERT(ipsa->ipsa_refcnt == 0);
254 	ASSERT(ipsa->ipsa_next == NULL);
255 	ASSERT(ipsa->ipsa_ptpn == NULL);
256 
257 	ip_drop_packet(sadb_clear_lpkt(ipsa), B_TRUE, NULL, NULL,
258 	    &ipdrops_sadb_inlarval_timeout, &sadb_dropper);
259 
260 	mutex_enter(&ipsa->ipsa_lock);
261 
262 	if (ipsa->ipsa_natt_ka_timer != 0)
263 		(void) quntimeout(ipsa->ipsa_natt_q, ipsa->ipsa_natt_ka_timer);
264 
265 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
266 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
267 	mutex_exit(&ipsa->ipsa_lock);
268 
269 	/* bzero() these fields for paranoia's sake. */
270 	if (ipsa->ipsa_authkey != NULL) {
271 		bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
272 		kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
273 	}
274 	if (ipsa->ipsa_encrkey != NULL) {
275 		bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
276 		kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
277 	}
278 	if (ipsa->ipsa_src_cid != NULL) {
279 		IPSID_REFRELE(ipsa->ipsa_src_cid);
280 	}
281 	if (ipsa->ipsa_dst_cid != NULL) {
282 		IPSID_REFRELE(ipsa->ipsa_dst_cid);
283 	}
284 	if (ipsa->ipsa_integ != NULL)
285 		kmem_free(ipsa->ipsa_integ, ipsa->ipsa_integlen);
286 	if (ipsa->ipsa_sens != NULL)
287 		kmem_free(ipsa->ipsa_sens, ipsa->ipsa_senslen);
288 
289 	mutex_destroy(&ipsa->ipsa_lock);
290 	kmem_free(ipsa, sizeof (*ipsa));
291 }
292 
293 /*
294  * Unlink a security association from a hash bucket.  Assume the hash bucket
295  * lock is held, but the association's lock is not.
296  *
297  * Note that we do not bump the bucket's generation number here because
298  * we might not be making a visible change to the set of visible SA's.
299  * All callers MUST bump the bucket's generation number before they unlock
300  * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
301  * was present in the bucket at the time it was locked.
302  */
303 void
304 sadb_unlinkassoc(ipsa_t *ipsa)
305 {
306 	ASSERT(ipsa->ipsa_linklock != NULL);
307 	ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
308 
309 	/* These fields are protected by the link lock. */
310 	*(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
311 	if (ipsa->ipsa_next != NULL) {
312 		ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
313 		ipsa->ipsa_next = NULL;
314 	}
315 
316 	ipsa->ipsa_ptpn = NULL;
317 
318 	/* This may destroy the SA. */
319 	IPSA_REFRELE(ipsa);
320 }
321 
322 /*
323  * Create a larval security association with the specified SPI.	 All other
324  * fields are zeroed.
325  */
326 static ipsa_t *
327 sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam)
328 {
329 	ipsa_t *newbie;
330 
331 	/*
332 	 * Allocate...
333 	 */
334 
335 	newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
336 	if (newbie == NULL) {
337 		/* Can't make new larval SA. */
338 		return (NULL);
339 	}
340 
341 	/* Assigned requested SPI, assume caller does SPI allocation magic. */
342 	newbie->ipsa_spi = spi;
343 
344 	/*
345 	 * Copy addresses...
346 	 */
347 
348 	IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
349 	IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
350 
351 	newbie->ipsa_addrfam = addrfam;
352 
353 	/*
354 	 * Set common initialization values, including refcnt.
355 	 */
356 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
357 	newbie->ipsa_state = IPSA_STATE_LARVAL;
358 	newbie->ipsa_refcnt = 1;
359 	newbie->ipsa_freefunc = sadb_freeassoc;
360 
361 	/*
362 	 * There aren't a lot of other common initialization values, as
363 	 * they are copied in from the PF_KEY message.
364 	 */
365 
366 	return (newbie);
367 }
368 
369 /*
370  * Call me to initialize a security association fanout.
371  */
372 static int
373 sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
374 {
375 	isaf_t *table;
376 	int i;
377 
378 	table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
379 	*tablep = table;
380 
381 	if (table == NULL)
382 		return (ENOMEM);
383 
384 	for (i = 0; i < size; i++) {
385 		mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
386 		table[i].isaf_ipsa = NULL;
387 		table[i].isaf_gen = 0;
388 	}
389 
390 	return (0);
391 }
392 
393 /*
394  * Call me to initialize an acquire fanout
395  */
396 static int
397 sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
398 {
399 	iacqf_t *table;
400 	int i;
401 
402 	table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
403 	*tablep = table;
404 
405 	if (table == NULL)
406 		return (ENOMEM);
407 
408 	for (i = 0; i < size; i++) {
409 		mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
410 		table[i].iacqf_ipsacq = NULL;
411 	}
412 
413 	return (0);
414 }
415 
416 /*
417  * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
418  * caller must clean up partial allocations.
419  */
420 static int
421 sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
422 {
423 	ASSERT(sp->sdb_of == NULL);
424 	ASSERT(sp->sdb_if == NULL);
425 	ASSERT(sp->sdb_acq == NULL);
426 
427 	sp->sdb_hashsize = size;
428 	if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
429 		return (ENOMEM);
430 	if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
431 		return (ENOMEM);
432 	if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
433 		return (ENOMEM);
434 
435 	return (0);
436 }
437 
438 /*
439  * Call me to initialize an SADB instance; fall back to default size on failure.
440  */
441 static void
442 sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver)
443 {
444 	ASSERT(sp->sdb_of == NULL);
445 	ASSERT(sp->sdb_if == NULL);
446 	ASSERT(sp->sdb_acq == NULL);
447 
448 	if (size < IPSEC_DEFAULT_HASH_SIZE)
449 		size = IPSEC_DEFAULT_HASH_SIZE;
450 
451 	if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
452 
453 		cmn_err(CE_WARN,
454 		    "Unable to allocate %u entry IPv%u %s SADB hash table",
455 		    size, ver, name);
456 
457 		sadb_destroy(sp);
458 		size = IPSEC_DEFAULT_HASH_SIZE;
459 		cmn_err(CE_WARN, "Falling back to %d entries", size);
460 		(void) sadb_init_trial(sp, size, KM_SLEEP);
461 	}
462 }
463 
464 
465 /*
466  * Initialize an SADB-pair.
467  */
468 void
469 sadbp_init(const char *name, sadbp_t *sp, int type, int size)
470 {
471 	sadb_init(name, &sp->s_v4, size, 4);
472 	sadb_init(name, &sp->s_v6, size, 6);
473 
474 	sp->s_satype = type;
475 
476 	ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
477 	if (type == SADB_SATYPE_AH)
478 		ip_drop_register(&sadb_dropper, "IPsec SADB");
479 }
480 
481 /*
482  * Deliver a single SADB_DUMP message representing a single SA.  This is
483  * called many times by sadb_dump().
484  *
485  * If the return value of this is ENOBUFS (not the same as ENOMEM), then
486  * the caller should take that as a hint that dupb() on the "original answer"
487  * failed, and that perhaps the caller should try again with a copyb()ed
488  * "original answer".
489  */
490 static int
491 sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
492     sadb_msg_t *samsg)
493 {
494 	mblk_t *answer;
495 
496 	answer = dupb(original_answer);
497 	if (answer == NULL)
498 		return (ENOBUFS);
499 	answer->b_cont = sadb_sa2msg(ipsa, samsg);
500 	if (answer->b_cont == NULL) {
501 		freeb(answer);
502 		return (ENOMEM);
503 	}
504 
505 	/* Just do a putnext, and let keysock deal with flow control. */
506 	putnext(pfkey_q, answer);
507 	return (0);
508 }
509 
510 /*
511  * Common function to allocate and prepare a keysock_out_t M_CTL message.
512  */
513 mblk_t *
514 sadb_keysock_out(minor_t serial)
515 {
516 	mblk_t *mp;
517 	keysock_out_t *kso;
518 
519 	mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
520 	if (mp != NULL) {
521 		mp->b_datap->db_type = M_CTL;
522 		mp->b_wptr += sizeof (ipsec_info_t);
523 		kso = (keysock_out_t *)mp->b_rptr;
524 		kso->ks_out_type = KEYSOCK_OUT;
525 		kso->ks_out_len = sizeof (*kso);
526 		kso->ks_out_serial = serial;
527 	}
528 
529 	return (mp);
530 }
531 
532 /*
533  * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
534  * to keysock.
535  */
536 static int
537 sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
538     int num_entries, boolean_t do_peers)
539 {
540 	int i, error = 0;
541 	mblk_t *original_answer;
542 	ipsa_t *walker;
543 	sadb_msg_t *samsg;
544 
545 	/*
546 	 * For each IPSA hash bucket do:
547 	 *	- Hold the mutex
548 	 *	- Walk each entry, doing an sadb_dump_deliver() on it.
549 	 */
550 	ASSERT(mp->b_cont != NULL);
551 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
552 
553 	original_answer = sadb_keysock_out(serial);
554 	if (original_answer == NULL)
555 		return (ENOMEM);
556 
557 	for (i = 0; i < num_entries; i++) {
558 		mutex_enter(&fanout[i].isaf_lock);
559 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
560 		    walker = walker->ipsa_next) {
561 			if (!do_peers && walker->ipsa_haspeer)
562 				continue;
563 			error = sadb_dump_deliver(pfkey_q, original_answer,
564 			    walker, samsg);
565 			if (error == ENOBUFS) {
566 				mblk_t *new_original_answer;
567 
568 				/* Ran out of dupb's.  Try a copyb. */
569 				new_original_answer = copyb(original_answer);
570 				if (new_original_answer == NULL) {
571 					error = ENOMEM;
572 				} else {
573 					freeb(original_answer);
574 					original_answer = new_original_answer;
575 					error = sadb_dump_deliver(pfkey_q,
576 					    original_answer, walker, samsg);
577 				}
578 			}
579 			if (error != 0)
580 				break;	/* out of for loop. */
581 		}
582 		mutex_exit(&fanout[i].isaf_lock);
583 		if (error != 0)
584 			break;	/* out of for loop. */
585 	}
586 
587 	freeb(original_answer);
588 	return (error);
589 }
590 
591 /*
592  * Dump an entire SADB; outbound first, then inbound.
593  */
594 
595 int
596 sadb_dump(queue_t *pfkey_q, mblk_t *mp, minor_t serial, sadb_t *sp)
597 {
598 	int error;
599 
600 	/* Dump outbound */
601 	error = sadb_dump_fanout(pfkey_q, mp, serial, sp->sdb_of,
602 	    sp->sdb_hashsize, B_TRUE);
603 	if (error)
604 		return (error);
605 
606 	/* Dump inbound */
607 	return sadb_dump_fanout(pfkey_q, mp, serial, sp->sdb_if,
608 	    sp->sdb_hashsize, B_FALSE);
609 }
610 
611 /*
612  * Generic sadb table walker.
613  *
614  * Call "walkfn" for each SA in each bucket in "table"; pass the
615  * bucket, the entry and "cookie" to the callback function.
616  * Take care to ensure that walkfn can delete the SA without screwing
617  * up our traverse.
618  *
619  * The bucket is locked for the duration of the callback, both so that the
620  * callback can just call sadb_unlinkassoc() when it wants to delete something,
621  * and so that no new entries are added while we're walking the list.
622  */
623 static void
624 sadb_walker(isaf_t *table, uint_t numentries,
625     void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
626     void *cookie)
627 {
628 	int i;
629 	for (i = 0; i < numentries; i++) {
630 		ipsa_t *entry, *next;
631 
632 		mutex_enter(&table[i].isaf_lock);
633 
634 		for (entry = table[i].isaf_ipsa; entry != NULL;
635 		    entry = next) {
636 			next = entry->ipsa_next;
637 			(*walkfn)(&table[i], entry, cookie);
638 		}
639 		mutex_exit(&table[i].isaf_lock);
640 	}
641 }
642 
643 /*
644  * From the given SA, construct a dl_ct_ipsec_key and
645  * a dl_ct_ipsec structures to be sent to the adapter as part
646  * of a DL_CONTROL_REQ.
647  *
648  * ct_sa must point to the storage allocated for the key
649  * structure and must be followed by storage allocated
650  * for the SA information that must be sent to the driver
651  * as part of the DL_CONTROL_REQ request.
652  *
653  * The is_inbound boolean indicates whether the specified
654  * SA is part of an inbound SA table.
655  *
656  * Returns B_TRUE if the corresponding SA must be passed to
657  * a provider, B_FALSE otherwise; frees *mp if it returns B_FALSE.
658  */
659 static boolean_t
660 sadb_req_from_sa(ipsa_t *sa, mblk_t *mp, boolean_t is_inbound)
661 {
662 	dl_ct_ipsec_key_t *keyp;
663 	dl_ct_ipsec_t *sap;
664 	void *ct_sa = mp->b_wptr;
665 
666 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
667 
668 	keyp = (dl_ct_ipsec_key_t *)(ct_sa);
669 	sap = (dl_ct_ipsec_t *)(keyp + 1);
670 
671 	IPSECHW_DEBUG(IPSECHW_CAPAB, ("sadb_req_from_sa: "
672 	    "is_inbound = %d\n", is_inbound));
673 
674 	/* initialize flag */
675 	sap->sadb_sa_flags = 0;
676 	if (is_inbound) {
677 		sap->sadb_sa_flags |= DL_CT_IPSEC_INBOUND;
678 		/*
679 		 * If an inbound SA has a peer, then mark it has being
680 		 * an outbound SA as well.
681 		 */
682 		if (sa->ipsa_haspeer)
683 			sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
684 	} else {
685 		/*
686 		 * If an outbound SA has a peer, then don't send it,
687 		 * since we will send the copy from the inbound table.
688 		 */
689 		if (sa->ipsa_haspeer) {
690 			freemsg(mp);
691 			return (B_FALSE);
692 		}
693 		sap->sadb_sa_flags |= DL_CT_IPSEC_OUTBOUND;
694 	}
695 
696 	keyp->dl_key_spi = sa->ipsa_spi;
697 	bcopy(sa->ipsa_dstaddr, keyp->dl_key_dest_addr,
698 	    DL_CTL_IPSEC_ADDR_LEN);
699 	keyp->dl_key_addr_family = sa->ipsa_addrfam;
700 
701 	sap->sadb_sa_auth = sa->ipsa_auth_alg;
702 	sap->sadb_sa_encrypt = sa->ipsa_encr_alg;
703 
704 	sap->sadb_key_len_a = sa->ipsa_authkeylen;
705 	sap->sadb_key_bits_a = sa->ipsa_authkeybits;
706 	bcopy(sa->ipsa_authkey,
707 	    sap->sadb_key_data_a, sap->sadb_key_len_a);
708 
709 	sap->sadb_key_len_e = sa->ipsa_encrkeylen;
710 	sap->sadb_key_bits_e = sa->ipsa_encrkeybits;
711 	bcopy(sa->ipsa_encrkey,
712 	    sap->sadb_key_data_e, sap->sadb_key_len_e);
713 
714 	mp->b_wptr += sizeof (dl_ct_ipsec_t) + sizeof (dl_ct_ipsec_key_t);
715 	return (B_TRUE);
716 }
717 
718 /*
719  * Called from AH or ESP to format a message which will be used to inform
720  * IPsec-acceleration-capable ills of a SADB change.
721  * (It is not possible to send the message to IP directly from this function
722  * since the SA, if any, is locked during the call).
723  *
724  * dl_operation: DL_CONTROL_REQ operation (add, delete, update, etc)
725  * sa_type: identifies whether the operation applies to AH or ESP
726  *	(must be one of SADB_SATYPE_AH or SADB_SATYPE_ESP)
727  * sa: Pointer to an SA.  Must be non-NULL and locked
728  *	for ADD, DELETE, GET, and UPDATE operations.
729  * This function returns an mblk chain that must be passed to IP
730  * for forwarding to the IPsec capable providers.
731  */
732 mblk_t *
733 sadb_fmt_sa_req(uint_t dl_operation, uint_t sa_type, ipsa_t *sa,
734     boolean_t is_inbound)
735 {
736 	mblk_t *mp;
737 	dl_control_req_t *ctrl;
738 	boolean_t need_key = B_FALSE;
739 	mblk_t *ctl_mp = NULL;
740 	ipsec_ctl_t *ctl;
741 
742 	/*
743 	 * 1 allocate and initialize DL_CONTROL_REQ M_PROTO
744 	 * 2 if a key is needed for the operation
745 	 *    2.1 initialize key
746 	 *    2.2 if a full SA is needed for the operation
747 	 *	2.2.1 initialize full SA info
748 	 * 3 return message; caller will call ill_ipsec_capab_send_all()
749 	 * to send the resulting message to IPsec capable ills.
750 	 */
751 
752 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
753 
754 	/*
755 	 * Allocate DL_CONTROL_REQ M_PROTO
756 	 * We allocate room for the SA even if it's not needed
757 	 * by some of the operations (for example flush)
758 	 */
759 	mp = allocb(sizeof (dl_control_req_t) +
760 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
761 	if (mp == NULL)
762 		return (NULL);
763 	mp->b_datap->db_type = M_PROTO;
764 
765 	/* initialize dl_control_req_t */
766 	ctrl = (dl_control_req_t *)mp->b_wptr;
767 	ctrl->dl_primitive = DL_CONTROL_REQ;
768 	ctrl->dl_operation = dl_operation;
769 	ctrl->dl_type = sa_type == SADB_SATYPE_AH ? DL_CT_IPSEC_AH :
770 	    DL_CT_IPSEC_ESP;
771 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
772 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
773 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
774 	    sizeof (dl_ct_ipsec_key_t);
775 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
776 	mp->b_wptr += sizeof (dl_control_req_t);
777 
778 	if ((dl_operation == DL_CO_SET) || (dl_operation == DL_CO_DELETE)) {
779 		ASSERT(sa != NULL);
780 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
781 
782 		need_key = B_TRUE;
783 
784 		/*
785 		 * Initialize key and SA data. Note that for some
786 		 * operations the SA data is ignored by the provider
787 		 * (delete, etc.)
788 		 */
789 		if (!sadb_req_from_sa(sa, mp, is_inbound))
790 			return (NULL);
791 	}
792 
793 	/* construct control message */
794 	ctl_mp = allocb(sizeof (ipsec_ctl_t), BPRI_HI);
795 	if (ctl_mp == NULL) {
796 		cmn_err(CE_WARN, "sadb_fmt_sa_req: allocb failed\n");
797 		freemsg(mp);
798 		return (NULL);
799 	}
800 
801 	ctl_mp->b_datap->db_type = M_CTL;
802 	ctl_mp->b_wptr += sizeof (ipsec_ctl_t);
803 	ctl_mp->b_cont = mp;
804 
805 	ctl = (ipsec_ctl_t *)ctl_mp->b_rptr;
806 	ctl->ipsec_ctl_type = IPSEC_CTL;
807 	ctl->ipsec_ctl_len  = sizeof (ipsec_ctl_t);
808 	ctl->ipsec_ctl_sa_type = sa_type;
809 
810 	if (need_key) {
811 		/*
812 		 * Keep an additional reference on SA, since it will be
813 		 * needed by IP to send control messages corresponding
814 		 * to that SA from its perimeter. IP will do a
815 		 * IPSA_REFRELE when done with the request.
816 		 */
817 		ASSERT(MUTEX_HELD(&sa->ipsa_lock));
818 		IPSA_REFHOLD(sa);
819 		ctl->ipsec_ctl_sa = sa;
820 	} else
821 		ctl->ipsec_ctl_sa = NULL;
822 
823 	return (ctl_mp);
824 }
825 
826 
827 /*
828  * Called by sadb_ill_download() to dump the entries for a specific
829  * fanout table.  For each SA entry in the table passed as argument,
830  * use mp as a template and constructs a full DL_CONTROL message, and
831  * call ill_dlpi_send(), provided by IP, to send the resulting
832  * messages to the ill.
833  */
834 static void
835 sadb_ill_df(ill_t *ill, mblk_t *mp, isaf_t *fanout, int num_entries,
836     boolean_t is_inbound)
837 {
838 	ipsa_t *walker;
839 	mblk_t *nmp, *salist;
840 	int i, error = 0;
841 
842 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_ill_df: fanout at 0x%p ne=%d\n",
843 	    (void *)fanout, num_entries));
844 	/*
845 	 * For each IPSA hash bucket do:
846 	 *	- Hold the mutex
847 	 *	- Walk each entry, sending a corresponding request to IP
848 	 *	  for it.
849 	 */
850 	ASSERT(mp->b_datap->db_type == M_PROTO);
851 
852 	for (i = 0; i < num_entries; i++) {
853 		mutex_enter(&fanout[i].isaf_lock);
854 		salist = NULL;
855 
856 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
857 		    walker = walker->ipsa_next) {
858 			IPSECHW_DEBUG(IPSECHW_SADB,
859 			    ("sadb_ill_df: sending SA to ill via IP \n"));
860 			/*
861 			 * Duplicate the template mp passed and
862 			 * complete DL_CONTROL_REQ data.
863 			 * To be more memory efficient, we could use
864 			 * dupb() for the M_CTL and copyb() for the M_PROTO
865 			 * as the M_CTL, since the M_CTL is the same for
866 			 * every SA entry passed down to IP for the same ill.
867 			 *
868 			 * Note that copymsg/copyb ensure that the new mblk
869 			 * is at least as large as the source mblk even if it's
870 			 * not using all its storage -- therefore, nmp
871 			 * has trailing space for sadb_req_from_sa to add
872 			 * the SA-specific bits.
873 			 */
874 			mutex_enter(&walker->ipsa_lock);
875 			if (ipsec_capab_match(ill,
876 			    ill->ill_phyint->phyint_ifindex, ill->ill_isv6,
877 			    walker)) {
878 				nmp = copymsg(mp);
879 				if (nmp == NULL) {
880 					IPSECHW_DEBUG(IPSECHW_SADB,
881 					    ("sadb_ill_df: alloc error\n"));
882 					error = ENOMEM;
883 					mutex_exit(&walker->ipsa_lock);
884 					break;
885 				}
886 				if (sadb_req_from_sa(walker, nmp, is_inbound)) {
887 					nmp->b_next = salist;
888 					salist = nmp;
889 				}
890 			}
891 			mutex_exit(&walker->ipsa_lock);
892 		}
893 		mutex_exit(&fanout[i].isaf_lock);
894 		while (salist != NULL) {
895 			nmp = salist;
896 			salist = nmp->b_next;
897 			nmp->b_next = NULL;
898 			ill_dlpi_send(ill, nmp);
899 		}
900 		if (error != 0)
901 			break;	/* out of for loop. */
902 	}
903 }
904 
905 /*
906  * Called by ill_ipsec_capab_add(). Sends a copy of the SADB of
907  * the type specified by sa_type to the specified ill.
908  *
909  * We call for each fanout table defined by the SADB (one per
910  * protocol). sadb_ill_df() finally calls ill_dlpi_send() for
911  * each SADB entry in order to send a corresponding DL_CONTROL_REQ
912  * message to the ill.
913  */
914 void
915 sadb_ill_download(ill_t *ill, uint_t sa_type)
916 {
917 	mblk_t *protomp;	/* prototype message */
918 	dl_control_req_t *ctrl;
919 	sadbp_t *spp;
920 	sadb_t *sp;
921 	int dlt;
922 
923 	ASSERT(sa_type == SADB_SATYPE_AH || sa_type == SADB_SATYPE_ESP);
924 
925 	/*
926 	 * Allocate and initialize prototype answer. A duplicate for
927 	 * each SA is sent down to the interface.
928 	 */
929 
930 	/* DL_CONTROL_REQ M_PROTO mblk_t */
931 	protomp = allocb(sizeof (dl_control_req_t) +
932 	    sizeof (dl_ct_ipsec_key_t) + sizeof (dl_ct_ipsec_t), BPRI_HI);
933 	if (protomp == NULL)
934 		return;
935 	protomp->b_datap->db_type = M_PROTO;
936 
937 	dlt = (sa_type == SADB_SATYPE_AH) ? DL_CT_IPSEC_AH : DL_CT_IPSEC_ESP;
938 	spp = (sa_type == SADB_SATYPE_ESP) ? &esp_sadb : &ah_sadb;
939 
940 	ctrl = (dl_control_req_t *)protomp->b_wptr;
941 	ctrl->dl_primitive = DL_CONTROL_REQ;
942 	ctrl->dl_operation = DL_CO_SET;
943 	ctrl->dl_type = dlt;
944 	ctrl->dl_key_offset = sizeof (dl_control_req_t);
945 	ctrl->dl_key_length = sizeof (dl_ct_ipsec_key_t);
946 	ctrl->dl_data_offset = sizeof (dl_control_req_t) +
947 	    sizeof (dl_ct_ipsec_key_t);
948 	ctrl->dl_data_length = sizeof (dl_ct_ipsec_t);
949 	protomp->b_wptr += sizeof (dl_control_req_t);
950 
951 	/*
952 	 * then for each SADB entry, we fill out the dl_ct_ipsec_key_t
953 	 * and dl_ct_ipsec_t
954 	 */
955 	sp = ill->ill_isv6 ? &(spp->s_v6) : &(spp->s_v4);
956 	sadb_ill_df(ill, protomp, sp->sdb_of, sp->sdb_hashsize, B_FALSE);
957 	sadb_ill_df(ill, protomp, sp->sdb_if, sp->sdb_hashsize, B_TRUE);
958 	freemsg(protomp);
959 }
960 
961 /*
962  * Call me to free up a security association fanout.  Use the forever
963  * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
964  * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
965  * when a module is unloaded).
966  */
967 static void
968 sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever)
969 {
970 	int i;
971 	isaf_t *table = *tablep;
972 
973 	if (table == NULL)
974 		return;
975 
976 	for (i = 0; i < numentries; i++) {
977 		mutex_enter(&table[i].isaf_lock);
978 		while (table[i].isaf_ipsa != NULL)
979 			sadb_unlinkassoc(table[i].isaf_ipsa);
980 		table[i].isaf_gen++;
981 		mutex_exit(&table[i].isaf_lock);
982 		if (forever)
983 			mutex_destroy(&(table[i].isaf_lock));
984 	}
985 
986 	if (forever) {
987 		*tablep = NULL;
988 		kmem_free(table, numentries * sizeof (*table));
989 	}
990 }
991 
992 /*
993  * Entry points to sadb_destroyer().
994  */
995 static void
996 sadb_flush(sadb_t *sp)
997 {
998 	/*
999 	 * Flush out each bucket, one at a time.  Were it not for keysock's
1000 	 * enforcement, there would be a subtlety where I could add on the
1001 	 * heels of a flush.  With keysock's enforcement, however, this
1002 	 * makes ESP's job easy.
1003 	 */
1004 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE);
1005 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE);
1006 
1007 	/* For each acquire, destroy it; leave the bucket mutex alone. */
1008 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE);
1009 }
1010 
1011 static void
1012 sadb_destroy(sadb_t *sp)
1013 {
1014 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE);
1015 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE);
1016 
1017 	/* For each acquire, destroy it, including the bucket mutex. */
1018 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE);
1019 
1020 	ASSERT(sp->sdb_of == NULL);
1021 	ASSERT(sp->sdb_if == NULL);
1022 	ASSERT(sp->sdb_acq == NULL);
1023 }
1024 
1025 static void
1026 sadb_send_flush_req(sadbp_t *spp)
1027 {
1028 	mblk_t *ctl_mp;
1029 
1030 	/*
1031 	 * we've been unplumbed, or never were plumbed; don't go there.
1032 	 */
1033 	if (spp->s_ip_q == NULL)
1034 		return;
1035 
1036 	/* have IP send a flush msg to the IPsec accelerators */
1037 	ctl_mp = sadb_fmt_sa_req(DL_CO_FLUSH, spp->s_satype, NULL, B_TRUE);
1038 	if (ctl_mp != NULL)
1039 		putnext(spp->s_ip_q, ctl_mp);
1040 }
1041 
1042 void
1043 sadbp_flush(sadbp_t *spp)
1044 {
1045 	sadb_flush(&spp->s_v4);
1046 	sadb_flush(&spp->s_v6);
1047 
1048 	sadb_send_flush_req(spp);
1049 }
1050 
1051 void
1052 sadbp_destroy(sadbp_t *spp)
1053 {
1054 	sadb_destroy(&spp->s_v4);
1055 	sadb_destroy(&spp->s_v6);
1056 
1057 	sadb_send_flush_req(spp);
1058 	if (spp->s_satype == SADB_SATYPE_AH)
1059 		ip_drop_unregister(&sadb_dropper);
1060 }
1061 
1062 
1063 /*
1064  * Check hard vs. soft lifetimes.  If there's a reality mismatch (e.g.
1065  * soft lifetimes > hard lifetimes) return an appropriate diagnostic for
1066  * EINVAL.
1067  */
1068 int
1069 sadb_hardsoftchk(sadb_lifetime_t *hard, sadb_lifetime_t *soft)
1070 {
1071 	if (hard == NULL || soft == NULL)
1072 		return (0);
1073 
1074 	if (hard->sadb_lifetime_allocations != 0 &&
1075 	    soft->sadb_lifetime_allocations != 0 &&
1076 	    hard->sadb_lifetime_allocations < soft->sadb_lifetime_allocations)
1077 		return (SADB_X_DIAGNOSTIC_ALLOC_HSERR);
1078 
1079 	if (hard->sadb_lifetime_bytes != 0 &&
1080 	    soft->sadb_lifetime_bytes != 0 &&
1081 	    hard->sadb_lifetime_bytes < soft->sadb_lifetime_bytes)
1082 		return (SADB_X_DIAGNOSTIC_BYTES_HSERR);
1083 
1084 	if (hard->sadb_lifetime_addtime != 0 &&
1085 	    soft->sadb_lifetime_addtime != 0 &&
1086 	    hard->sadb_lifetime_addtime < soft->sadb_lifetime_addtime)
1087 		return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
1088 
1089 	if (hard->sadb_lifetime_usetime != 0 &&
1090 	    soft->sadb_lifetime_usetime != 0 &&
1091 	    hard->sadb_lifetime_usetime < soft->sadb_lifetime_usetime)
1092 		return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
1093 
1094 	return (0);
1095 }
1096 
1097 /*
1098  * Clone a security association for the purposes of inserting a single SA
1099  * into inbound and outbound tables respectively.
1100  */
1101 static ipsa_t *
1102 sadb_cloneassoc(ipsa_t *ipsa)
1103 {
1104 	ipsa_t *newbie;
1105 	boolean_t error = B_FALSE;
1106 
1107 	ASSERT(!MUTEX_HELD(&(ipsa->ipsa_lock)));
1108 
1109 	newbie = kmem_alloc(sizeof (ipsa_t), KM_NOSLEEP);
1110 	if (newbie == NULL)
1111 		return (NULL);
1112 
1113 	/* Copy over what we can. */
1114 	*newbie = *ipsa;
1115 
1116 	/* bzero and initialize locks, in case *_init() allocates... */
1117 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
1118 
1119 	/*
1120 	 * While somewhat dain-bramaged, the most graceful way to
1121 	 * recover from errors is to keep plowing through the
1122 	 * allocations, and getting what I can.  It's easier to call
1123 	 * sadb_freeassoc() on the stillborn clone when all the
1124 	 * pointers aren't pointing to the parent's data.
1125 	 */
1126 
1127 	if (ipsa->ipsa_authkey != NULL) {
1128 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
1129 		    KM_NOSLEEP);
1130 		if (newbie->ipsa_authkey == NULL) {
1131 			error = B_TRUE;
1132 		} else {
1133 			bcopy(ipsa->ipsa_authkey, newbie->ipsa_authkey,
1134 			    newbie->ipsa_authkeylen);
1135 
1136 			newbie->ipsa_kcfauthkey.ck_data =
1137 			    newbie->ipsa_authkey;
1138 		}
1139 
1140 		if (newbie->ipsa_amech.cm_param != NULL) {
1141 			newbie->ipsa_amech.cm_param =
1142 			    (char *)&newbie->ipsa_mac_len;
1143 		}
1144 	}
1145 
1146 	if (ipsa->ipsa_encrkey != NULL) {
1147 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
1148 		    KM_NOSLEEP);
1149 		if (newbie->ipsa_encrkey == NULL) {
1150 			error = B_TRUE;
1151 		} else {
1152 			bcopy(ipsa->ipsa_encrkey, newbie->ipsa_encrkey,
1153 			    newbie->ipsa_encrkeylen);
1154 
1155 			newbie->ipsa_kcfencrkey.ck_data =
1156 			    newbie->ipsa_encrkey;
1157 		}
1158 	}
1159 
1160 	newbie->ipsa_authtmpl = NULL;
1161 	newbie->ipsa_encrtmpl = NULL;
1162 
1163 	if (ipsa->ipsa_integ != NULL) {
1164 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
1165 		    KM_NOSLEEP);
1166 		if (newbie->ipsa_integ == NULL) {
1167 			error = B_TRUE;
1168 		} else {
1169 			bcopy(ipsa->ipsa_integ, newbie->ipsa_integ,
1170 			    newbie->ipsa_integlen);
1171 		}
1172 	}
1173 
1174 	if (ipsa->ipsa_sens != NULL) {
1175 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
1176 		    KM_NOSLEEP);
1177 		if (newbie->ipsa_sens == NULL) {
1178 			error = B_TRUE;
1179 		} else {
1180 			bcopy(ipsa->ipsa_sens, newbie->ipsa_sens,
1181 			    newbie->ipsa_senslen);
1182 		}
1183 	}
1184 
1185 	if (ipsa->ipsa_src_cid != NULL) {
1186 		newbie->ipsa_src_cid = ipsa->ipsa_src_cid;
1187 		IPSID_REFHOLD(ipsa->ipsa_src_cid);
1188 	}
1189 
1190 	if (ipsa->ipsa_dst_cid != NULL) {
1191 		newbie->ipsa_dst_cid = ipsa->ipsa_dst_cid;
1192 		IPSID_REFHOLD(ipsa->ipsa_dst_cid);
1193 	}
1194 
1195 	if (error) {
1196 		sadb_freeassoc(newbie);
1197 		return (NULL);
1198 	}
1199 
1200 	return (newbie);
1201 }
1202 
1203 /*
1204  * Initialize a SADB address extension at the address specified by addrext.
1205  * Return a pointer to the end of the new address extension.
1206  */
1207 static uint8_t *
1208 sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
1209     sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
1210 {
1211 	struct sockaddr_in *sin;
1212 	struct sockaddr_in6 *sin6;
1213 	uint8_t *cur = start;
1214 	int addrext_len;
1215 	int sin_len;
1216 	sadb_address_t *addrext	= (sadb_address_t *)cur;
1217 
1218 	if (cur == NULL)
1219 		return (NULL);
1220 
1221 	cur += sizeof (*addrext);
1222 	if (cur > end)
1223 		return (NULL);
1224 
1225 	addrext->sadb_address_proto = proto;
1226 	addrext->sadb_address_prefixlen = prefix;
1227 	addrext->sadb_address_reserved = 0;
1228 	addrext->sadb_address_exttype = exttype;
1229 
1230 	switch (af) {
1231 	case AF_INET:
1232 		sin = (struct sockaddr_in *)cur;
1233 		sin_len = sizeof (*sin);
1234 		cur += sin_len;
1235 		if (cur > end)
1236 			return (NULL);
1237 
1238 		sin->sin_family = af;
1239 		bzero(sin->sin_zero, sizeof (sin->sin_zero));
1240 		sin->sin_port = port;
1241 		IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
1242 		break;
1243 	case AF_INET6:
1244 		sin6 = (struct sockaddr_in6 *)cur;
1245 		sin_len = sizeof (*sin6);
1246 		cur += sin_len;
1247 		if (cur > end)
1248 			return (NULL);
1249 
1250 		bzero(sin6, sizeof (*sin6));
1251 		sin6->sin6_family = af;
1252 		sin6->sin6_port = port;
1253 		IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
1254 		break;
1255 	}
1256 
1257 	addrext_len = roundup(cur - start, sizeof (uint64_t));
1258 	addrext->sadb_address_len = SADB_8TO64(addrext_len);
1259 
1260 	cur = start + addrext_len;
1261 	if (cur > end)
1262 		cur = NULL;
1263 
1264 	return (cur);
1265 }
1266 
1267 /*
1268  * Construct a key management cookie extension.
1269  */
1270 
1271 static uint8_t *
1272 sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint32_t kmc)
1273 {
1274 	sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
1275 
1276 	if (cur == NULL)
1277 		return (NULL);
1278 
1279 	cur += sizeof (*kmcext);
1280 
1281 	if (cur > end)
1282 		return (NULL);
1283 
1284 	kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1285 	kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1286 	kmcext->sadb_x_kmc_proto = kmp;
1287 	kmcext->sadb_x_kmc_cookie = kmc;
1288 	kmcext->sadb_x_kmc_reserved = 0;
1289 
1290 	return (cur);
1291 }
1292 
1293 /*
1294  * Given an original message header with sufficient space following it, and an
1295  * SA, construct a full PF_KEY message with all of the relevant extensions.
1296  * This is mostly used for SADB_GET, and SADB_DUMP.
1297  */
1298 static mblk_t *
1299 sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
1300 {
1301 	int alloclen, addrsize, paddrsize, authsize, encrsize;
1302 	int srcidsize, dstidsize;
1303 	sa_family_t fam, pfam;	/* Address family for SADB_EXT_ADDRESS */
1304 				/* src/dst and proxy sockaddrs. */
1305 	/*
1306 	 * The following are pointers into the PF_KEY message this PF_KEY
1307 	 * message creates.
1308 	 */
1309 	sadb_msg_t *newsamsg;
1310 	sadb_sa_t *assoc;
1311 	sadb_lifetime_t *lt;
1312 	sadb_key_t *key;
1313 	sadb_ident_t *ident;
1314 	sadb_sens_t *sens;
1315 	sadb_ext_t *walker;	/* For when we need a generic ext. pointer. */
1316 	mblk_t *mp;
1317 	uint64_t *bitmap;
1318 	uint8_t *cur, *end;
1319 	/* These indicate the presence of the above extension fields. */
1320 	boolean_t soft, hard, isrc, idst, auth, encr, sensinteg, srcid, dstid;
1321 
1322 	/* First off, figure out the allocation length for this message. */
1323 
1324 	/*
1325 	 * Constant stuff.  This includes base, SA, address (src, dst),
1326 	 * and lifetime (current).
1327 	 */
1328 	alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
1329 	    sizeof (sadb_lifetime_t);
1330 
1331 	fam = ipsa->ipsa_addrfam;
1332 	switch (fam) {
1333 	case AF_INET:
1334 		addrsize = roundup(sizeof (struct sockaddr_in) +
1335 		    sizeof (sadb_address_t), sizeof (uint64_t));
1336 		break;
1337 	case AF_INET6:
1338 		addrsize = roundup(sizeof (struct sockaddr_in6) +
1339 		    sizeof (sadb_address_t), sizeof (uint64_t));
1340 		break;
1341 	default:
1342 		return (NULL);
1343 	}
1344 	/*
1345 	 * Allocate TWO address extensions, for source and destination.
1346 	 * (Thus, the * 2.)
1347 	 */
1348 	alloclen += addrsize * 2;
1349 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM)
1350 	    alloclen += addrsize;
1351 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC)
1352 	    alloclen += addrsize;
1353 
1354 
1355 	/* How 'bout other lifetimes? */
1356 	if (ipsa->ipsa_softaddlt != 0 || ipsa->ipsa_softuselt != 0 ||
1357 	    ipsa->ipsa_softbyteslt != 0 || ipsa->ipsa_softalloc != 0) {
1358 		alloclen += sizeof (sadb_lifetime_t);
1359 		soft = B_TRUE;
1360 	} else {
1361 		soft = B_FALSE;
1362 	}
1363 
1364 	if (ipsa->ipsa_hardaddlt != 0 || ipsa->ipsa_harduselt != 0 ||
1365 	    ipsa->ipsa_hardbyteslt != 0 || ipsa->ipsa_hardalloc != 0) {
1366 		alloclen += sizeof (sadb_lifetime_t);
1367 		hard = B_TRUE;
1368 	} else {
1369 		hard = B_FALSE;
1370 	}
1371 
1372 	/* Inner addresses. */
1373 	if (ipsa->ipsa_innerfam == 0) {
1374 		isrc = B_FALSE;
1375 		idst = B_FALSE;
1376 	} else {
1377 		pfam = ipsa->ipsa_innerfam;
1378 		switch (pfam) {
1379 		case AF_INET6:
1380 			paddrsize = roundup(sizeof (struct sockaddr_in6) +
1381 			    sizeof (sadb_address_t), sizeof (uint64_t));
1382 			break;
1383 		case AF_INET:
1384 			paddrsize = roundup(sizeof (struct sockaddr_in) +
1385 			    sizeof (sadb_address_t), sizeof (uint64_t));
1386 			break;
1387 		default:
1388 			cmn_err(CE_PANIC,
1389 			    "IPsec SADB: Proxy length failure.\n");
1390 			break;
1391 		}
1392 		isrc = B_TRUE;
1393 		idst = B_TRUE;
1394 		alloclen += 2 * paddrsize;
1395 	}
1396 
1397 	/* For the following fields, assume that length != 0 ==> stuff */
1398 	if (ipsa->ipsa_authkeylen != 0) {
1399 		authsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_authkeylen,
1400 		    sizeof (uint64_t));
1401 		alloclen += authsize;
1402 		auth = B_TRUE;
1403 	} else {
1404 		auth = B_FALSE;
1405 	}
1406 
1407 	if (ipsa->ipsa_encrkeylen != 0) {
1408 		encrsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_encrkeylen,
1409 		    sizeof (uint64_t));
1410 		alloclen += encrsize;
1411 		encr = B_TRUE;
1412 	} else {
1413 		encr = B_FALSE;
1414 	}
1415 
1416 	/* No need for roundup on sens and integ. */
1417 	if (ipsa->ipsa_integlen != 0 || ipsa->ipsa_senslen != 0) {
1418 		alloclen += sizeof (sadb_key_t) + ipsa->ipsa_integlen +
1419 		    ipsa->ipsa_senslen;
1420 		sensinteg = B_TRUE;
1421 	} else {
1422 		sensinteg = B_FALSE;
1423 	}
1424 
1425 	/*
1426 	 * Must use strlen() here for lengths.	Identities use NULL
1427 	 * pointers to indicate their nonexistence.
1428 	 */
1429 	if (ipsa->ipsa_src_cid != NULL) {
1430 		srcidsize = roundup(sizeof (sadb_ident_t) +
1431 		    strlen(ipsa->ipsa_src_cid->ipsid_cid) + 1,
1432 		    sizeof (uint64_t));
1433 		alloclen += srcidsize;
1434 		srcid = B_TRUE;
1435 	} else {
1436 		srcid = B_FALSE;
1437 	}
1438 
1439 	if (ipsa->ipsa_dst_cid != NULL) {
1440 		dstidsize = roundup(sizeof (sadb_ident_t) +
1441 		    strlen(ipsa->ipsa_dst_cid->ipsid_cid) + 1,
1442 		    sizeof (uint64_t));
1443 		alloclen += dstidsize;
1444 		dstid = B_TRUE;
1445 	} else {
1446 		dstid = B_FALSE;
1447 	}
1448 
1449 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
1450 		alloclen += sizeof (sadb_x_kmc_t);
1451 
1452 	/* Make sure the allocation length is a multiple of 8 bytes. */
1453 	ASSERT((alloclen & 0x7) == 0);
1454 
1455 	/* XXX Possibly make it esballoc, with a bzero-ing free_ftn. */
1456 	mp = allocb(alloclen, BPRI_HI);
1457 	if (mp == NULL)
1458 		return (NULL);
1459 
1460 	mp->b_wptr += alloclen;
1461 	end = mp->b_wptr;
1462 	newsamsg = (sadb_msg_t *)mp->b_rptr;
1463 	*newsamsg = *samsg;
1464 	newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
1465 
1466 	mutex_enter(&ipsa->ipsa_lock);	/* Since I'm grabbing SA fields... */
1467 
1468 	newsamsg->sadb_msg_satype = ipsa->ipsa_type;
1469 
1470 	assoc = (sadb_sa_t *)(newsamsg + 1);
1471 	assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
1472 	assoc->sadb_sa_exttype = SADB_EXT_SA;
1473 	assoc->sadb_sa_spi = ipsa->ipsa_spi;
1474 	assoc->sadb_sa_replay = ipsa->ipsa_replay_wsize;
1475 	assoc->sadb_sa_state = ipsa->ipsa_state;
1476 	assoc->sadb_sa_auth = ipsa->ipsa_auth_alg;
1477 	assoc->sadb_sa_encrypt = ipsa->ipsa_encr_alg;
1478 	assoc->sadb_sa_flags = ipsa->ipsa_flags;
1479 
1480 	lt = (sadb_lifetime_t *)(assoc + 1);
1481 	lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1482 	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
1483 	lt->sadb_lifetime_allocations = ipsa->ipsa_alloc;
1484 	lt->sadb_lifetime_bytes = ipsa->ipsa_bytes;
1485 	lt->sadb_lifetime_addtime = ipsa->ipsa_addtime;
1486 	lt->sadb_lifetime_usetime = ipsa->ipsa_usetime;
1487 
1488 	if (hard) {
1489 		lt++;
1490 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1491 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
1492 		lt->sadb_lifetime_allocations = ipsa->ipsa_hardalloc;
1493 		lt->sadb_lifetime_bytes = ipsa->ipsa_hardbyteslt;
1494 		lt->sadb_lifetime_addtime = ipsa->ipsa_hardaddlt;
1495 		lt->sadb_lifetime_usetime = ipsa->ipsa_harduselt;
1496 	}
1497 
1498 	if (soft) {
1499 		lt++;
1500 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1501 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
1502 		lt->sadb_lifetime_allocations = ipsa->ipsa_softalloc;
1503 		lt->sadb_lifetime_bytes = ipsa->ipsa_softbyteslt;
1504 		lt->sadb_lifetime_addtime = ipsa->ipsa_softaddlt;
1505 		lt->sadb_lifetime_usetime = ipsa->ipsa_softuselt;
1506 	}
1507 
1508 	cur = (uint8_t *)(lt + 1);
1509 
1510 	/* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
1511 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
1512 	    ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
1513 	    SA_PROTO(ipsa), 0);
1514 	if (cur == NULL) {
1515 		freemsg(mp);
1516 		mp = NULL;
1517 		goto bail;
1518 	}
1519 
1520 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
1521 	    ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
1522 	    SA_PROTO(ipsa), 0);
1523 	if (cur == NULL) {
1524 		freemsg(mp);
1525 		mp = NULL;
1526 		goto bail;
1527 	}
1528 
1529 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
1530 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
1531 		    fam, ipsa->ipsa_natt_addr_loc, 0, 0, 0);
1532 		if (cur == NULL) {
1533 			freemsg(mp);
1534 			mp = NULL;
1535 			goto bail;
1536 		}
1537 	}
1538 
1539 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
1540 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
1541 		    fam, ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_port,
1542 		    IPPROTO_UDP, 0);
1543 		if (cur == NULL) {
1544 			freemsg(mp);
1545 			mp = NULL;
1546 			goto bail;
1547 		}
1548 	}
1549 
1550 	/* If we are a tunnel-mode SA, fill in the inner-selectors. */
1551 	if (isrc) {
1552 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
1553 		    pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
1554 		    SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
1555 		if (cur == NULL) {
1556 			freemsg(mp);
1557 			mp = NULL;
1558 			goto bail;
1559 		}
1560 	}
1561 
1562 	if (idst) {
1563 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
1564 		    pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
1565 		    SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
1566 		if (cur == NULL) {
1567 			freemsg(mp);
1568 			mp = NULL;
1569 			goto bail;
1570 		}
1571 	}
1572 
1573 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0)) {
1574 		cur = sadb_make_kmc_ext(cur, end,
1575 		    ipsa->ipsa_kmp, ipsa->ipsa_kmc);
1576 		if (cur == NULL) {
1577 			freemsg(mp);
1578 			mp = NULL;
1579 			goto bail;
1580 		}
1581 	}
1582 
1583 	walker = (sadb_ext_t *)cur;
1584 	if (auth) {
1585 		key = (sadb_key_t *)walker;
1586 		key->sadb_key_len = SADB_8TO64(authsize);
1587 		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
1588 		key->sadb_key_bits = ipsa->ipsa_authkeybits;
1589 		key->sadb_key_reserved = 0;
1590 		bcopy(ipsa->ipsa_authkey, key + 1, ipsa->ipsa_authkeylen);
1591 		walker = (sadb_ext_t *)((uint64_t *)walker +
1592 		    walker->sadb_ext_len);
1593 	}
1594 
1595 	if (encr) {
1596 		key = (sadb_key_t *)walker;
1597 		key->sadb_key_len = SADB_8TO64(encrsize);
1598 		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1599 		key->sadb_key_bits = ipsa->ipsa_encrkeybits;
1600 		key->sadb_key_reserved = 0;
1601 		bcopy(ipsa->ipsa_encrkey, key + 1, ipsa->ipsa_encrkeylen);
1602 		walker = (sadb_ext_t *)((uint64_t *)walker +
1603 		    walker->sadb_ext_len);
1604 	}
1605 
1606 	if (srcid) {
1607 		ident = (sadb_ident_t *)walker;
1608 		ident->sadb_ident_len = SADB_8TO64(srcidsize);
1609 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_SRC;
1610 		ident->sadb_ident_type = ipsa->ipsa_src_cid->ipsid_type;
1611 		ident->sadb_ident_id = 0;
1612 		ident->sadb_ident_reserved = 0;
1613 		(void) strcpy((char *)(ident + 1),
1614 		    ipsa->ipsa_src_cid->ipsid_cid);
1615 		walker = (sadb_ext_t *)((uint64_t *)walker +
1616 		    walker->sadb_ext_len);
1617 	}
1618 
1619 	if (dstid) {
1620 		ident = (sadb_ident_t *)walker;
1621 		ident->sadb_ident_len = SADB_8TO64(dstidsize);
1622 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_DST;
1623 		ident->sadb_ident_type = ipsa->ipsa_dst_cid->ipsid_type;
1624 		ident->sadb_ident_id = 0;
1625 		ident->sadb_ident_reserved = 0;
1626 		(void) strcpy((char *)(ident + 1),
1627 		    ipsa->ipsa_dst_cid->ipsid_cid);
1628 		walker = (sadb_ext_t *)((uint64_t *)walker +
1629 		    walker->sadb_ext_len);
1630 	}
1631 
1632 	if (sensinteg) {
1633 		sens = (sadb_sens_t *)walker;
1634 		sens->sadb_sens_len = SADB_8TO64(sizeof (sadb_sens_t *) +
1635 		    ipsa->ipsa_senslen + ipsa->ipsa_integlen);
1636 		sens->sadb_sens_dpd = ipsa->ipsa_dpd;
1637 		sens->sadb_sens_sens_level = ipsa->ipsa_senslevel;
1638 		sens->sadb_sens_integ_level = ipsa->ipsa_integlevel;
1639 		sens->sadb_sens_sens_len = SADB_8TO64(ipsa->ipsa_senslen);
1640 		sens->sadb_sens_integ_len = SADB_8TO64(ipsa->ipsa_integlen);
1641 		sens->sadb_sens_reserved = 0;
1642 		bitmap = (uint64_t *)(sens + 1);
1643 		if (ipsa->ipsa_sens != NULL) {
1644 			bcopy(ipsa->ipsa_sens, bitmap, ipsa->ipsa_senslen);
1645 			bitmap += sens->sadb_sens_sens_len;
1646 		}
1647 		if (ipsa->ipsa_integ != NULL)
1648 			bcopy(ipsa->ipsa_integ, bitmap, ipsa->ipsa_integlen);
1649 		walker = (sadb_ext_t *)((uint64_t *)walker +
1650 		    walker->sadb_ext_len);
1651 	}
1652 
1653 bail:
1654 	/* Pardon any delays... */
1655 	mutex_exit(&ipsa->ipsa_lock);
1656 
1657 	return (mp);
1658 }
1659 
1660 /*
1661  * Strip out key headers or unmarked headers (SADB_EXT_KEY_*, SADB_EXT_UNKNOWN)
1662  * and adjust base message accordingly.
1663  *
1664  * Assume message is pulled up in one piece of contiguous memory.
1665  *
1666  * Say if we start off with:
1667  *
1668  * +------+----+-------------+-----------+---------------+---------------+
1669  * | base | SA | source addr | dest addr | rsrvd. or key | soft lifetime |
1670  * +------+----+-------------+-----------+---------------+---------------+
1671  *
1672  * we will end up with
1673  *
1674  * +------+----+-------------+-----------+---------------+
1675  * | base | SA | source addr | dest addr | soft lifetime |
1676  * +------+----+-------------+-----------+---------------+
1677  */
1678 static void
1679 sadb_strip(sadb_msg_t *samsg)
1680 {
1681 	sadb_ext_t *ext;
1682 	uint8_t *target = NULL;
1683 	uint8_t *msgend;
1684 	int sofar = SADB_8TO64(sizeof (*samsg));
1685 	int copylen;
1686 
1687 	ext = (sadb_ext_t *)(samsg + 1);
1688 	msgend = (uint8_t *)samsg;
1689 	msgend += SADB_64TO8(samsg->sadb_msg_len);
1690 	while ((uint8_t *)ext < msgend) {
1691 		if (ext->sadb_ext_type == SADB_EXT_RESERVED ||
1692 		    ext->sadb_ext_type == SADB_EXT_KEY_AUTH ||
1693 		    ext->sadb_ext_type == SADB_EXT_KEY_ENCRYPT) {
1694 			/*
1695 			 * Aha!	 I found a header to be erased.
1696 			 */
1697 
1698 			if (target != NULL) {
1699 				/*
1700 				 * If I had a previous header to be erased,
1701 				 * copy over it.  I can get away with just
1702 				 * copying backwards because the target will
1703 				 * always be 8 bytes behind the source.
1704 				 */
1705 				copylen = ((uint8_t *)ext) - (target +
1706 				    SADB_64TO8(
1707 					((sadb_ext_t *)target)->sadb_ext_len));
1708 				ovbcopy(((uint8_t *)ext - copylen), target,
1709 				    copylen);
1710 				target += copylen;
1711 				((sadb_ext_t *)target)->sadb_ext_len =
1712 				    SADB_8TO64(((uint8_t *)ext) - target +
1713 					SADB_64TO8(ext->sadb_ext_len));
1714 			} else {
1715 				target = (uint8_t *)ext;
1716 			}
1717 		} else {
1718 			sofar += ext->sadb_ext_len;
1719 		}
1720 
1721 		ext = (sadb_ext_t *)(((uint64_t *)ext) + ext->sadb_ext_len);
1722 	}
1723 
1724 	ASSERT((uint8_t *)ext == msgend);
1725 
1726 	if (target != NULL) {
1727 		copylen = ((uint8_t *)ext) - (target +
1728 		    SADB_64TO8(((sadb_ext_t *)target)->sadb_ext_len));
1729 		if (copylen != 0)
1730 			ovbcopy(((uint8_t *)ext - copylen), target, copylen);
1731 	}
1732 
1733 	/* Adjust samsg. */
1734 	samsg->sadb_msg_len = (uint16_t)sofar;
1735 
1736 	/* Assume all of the rest is cleared by caller in sadb_pfkey_echo(). */
1737 }
1738 
1739 /*
1740  * AH needs to send an error to PF_KEY.	 Assume mp points to an M_CTL
1741  * followed by an M_DATA with a PF_KEY message in it.  The serial of
1742  * the sending keysock instance is included.
1743  */
1744 void
1745 sadb_pfkey_error(queue_t *pfkey_q, mblk_t *mp, int error, int diagnostic,
1746     uint_t serial)
1747 {
1748 	mblk_t *msg = mp->b_cont;
1749 	sadb_msg_t *samsg;
1750 	keysock_out_t *kso;
1751 
1752 	/*
1753 	 * Enough functions call this to merit a NULL queue check.
1754 	 */
1755 	if (pfkey_q == NULL) {
1756 		freemsg(mp);
1757 		return;
1758 	}
1759 
1760 	ASSERT(msg != NULL);
1761 	ASSERT((mp->b_wptr - mp->b_rptr) == sizeof (ipsec_info_t));
1762 	ASSERT((msg->b_wptr - msg->b_rptr) >= sizeof (sadb_msg_t));
1763 	samsg = (sadb_msg_t *)msg->b_rptr;
1764 	kso = (keysock_out_t *)mp->b_rptr;
1765 
1766 	kso->ks_out_type = KEYSOCK_OUT;
1767 	kso->ks_out_len = sizeof (*kso);
1768 	kso->ks_out_serial = serial;
1769 
1770 	/*
1771 	 * Only send the base message up in the event of an error.
1772 	 * Don't worry about bzero()-ing, because it was probably bogus
1773 	 * anyway.
1774 	 */
1775 	msg->b_wptr = msg->b_rptr + sizeof (*samsg);
1776 	samsg = (sadb_msg_t *)msg->b_rptr;
1777 	samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1778 	samsg->sadb_msg_errno = (uint8_t)error;
1779 	if (diagnostic != SADB_X_DIAGNOSTIC_PRESET)
1780 		samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1781 
1782 	putnext(pfkey_q, mp);
1783 }
1784 
1785 /*
1786  * Send a successful return packet back to keysock via the queue in pfkey_q.
1787  *
1788  * Often, an SA is associated with the reply message, it's passed in if needed,
1789  * and NULL if not.  BTW, that ipsa will have its refcnt appropriately held,
1790  * and the caller will release said refcnt.
1791  */
1792 void
1793 sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
1794     keysock_in_t *ksi, ipsa_t *ipsa)
1795 {
1796 	keysock_out_t *kso;
1797 	mblk_t *mp1;
1798 	sadb_msg_t *newsamsg;
1799 	uint8_t *oldend;
1800 
1801 	ASSERT((mp->b_cont != NULL) &&
1802 	    ((void *)samsg == (void *)mp->b_cont->b_rptr) &&
1803 	    ((void *)mp->b_rptr == (void *)ksi));
1804 
1805 	switch (samsg->sadb_msg_type) {
1806 	case SADB_ADD:
1807 	case SADB_UPDATE:
1808 	case SADB_FLUSH:
1809 	case SADB_DUMP:
1810 		/*
1811 		 * I have all of the message already.  I just need to strip
1812 		 * out the keying material and echo the message back.
1813 		 *
1814 		 * NOTE: for SADB_DUMP, the function sadb_dump() did the
1815 		 * work.  When DUMP reaches here, it should only be a base
1816 		 * message.
1817 		 */
1818 	justecho:
1819 		ASSERT(samsg->sadb_msg_type != SADB_DUMP ||
1820 		    samsg->sadb_msg_len == SADB_8TO64(sizeof (sadb_msg_t)));
1821 
1822 		if (ksi->ks_in_extv[SADB_EXT_KEY_AUTH] != NULL ||
1823 		    ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL) {
1824 			sadb_strip(samsg);
1825 			/* Assume PF_KEY message is contiguous. */
1826 			ASSERT(mp->b_cont->b_cont == NULL);
1827 			oldend = mp->b_cont->b_wptr;
1828 			mp->b_cont->b_wptr = mp->b_cont->b_rptr +
1829 			    SADB_64TO8(samsg->sadb_msg_len);
1830 			bzero(mp->b_cont->b_wptr, oldend - mp->b_cont->b_wptr);
1831 		}
1832 		break;
1833 	case SADB_GET:
1834 		/*
1835 		 * Do a lot of work here, because of the ipsa I just found.
1836 		 * First construct the new PF_KEY message, then abandon
1837 		 * the old one.
1838 		 */
1839 		mp1 = sadb_sa2msg(ipsa, samsg);
1840 		if (mp1 == NULL) {
1841 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1842 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1843 			return;
1844 		}
1845 		freemsg(mp->b_cont);
1846 		mp->b_cont = mp1;
1847 		break;
1848 	case SADB_DELETE:
1849 		if (ipsa == NULL)
1850 			goto justecho;
1851 		/*
1852 		 * Because listening KMds may require more info, treat
1853 		 * DELETE like a special case of GET.
1854 		 */
1855 		mp1 = sadb_sa2msg(ipsa, samsg);
1856 		if (mp1 == NULL) {
1857 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1858 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1859 			return;
1860 		}
1861 		newsamsg = (sadb_msg_t *)mp1->b_rptr;
1862 		sadb_strip(newsamsg);
1863 		oldend = mp1->b_wptr;
1864 		mp1->b_wptr = mp1->b_rptr + SADB_64TO8(newsamsg->sadb_msg_len);
1865 		bzero(mp1->b_wptr, oldend - mp1->b_wptr);
1866 		freemsg(mp->b_cont);
1867 		mp->b_cont = mp1;
1868 		break;
1869 	default:
1870 		if (mp != NULL)
1871 			freemsg(mp);
1872 		return;
1873 	}
1874 
1875 	/* ksi is now null and void. */
1876 	kso = (keysock_out_t *)ksi;
1877 	kso->ks_out_type = KEYSOCK_OUT;
1878 	kso->ks_out_len = sizeof (*kso);
1879 	kso->ks_out_serial = ksi->ks_in_serial;
1880 	/* We're ready to send... */
1881 	putnext(pfkey_q, mp);
1882 }
1883 
1884 /*
1885  * Set up a global pfkey_q instance for AH, ESP, or some other consumer.
1886  */
1887 void
1888 sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp,
1889     void (*ager)(void *), timeout_id_t *top, int satype)
1890 {
1891 	keysock_hello_ack_t *kha;
1892 	queue_t *oldq;
1893 
1894 	ASSERT(OTHERQ(q) != NULL);
1895 
1896 	/*
1897 	 * First, check atomically that I'm the first and only keysock
1898 	 * instance.
1899 	 *
1900 	 * Use OTHERQ(q), because qreply(q, mp) == putnext(OTHERQ(q), mp),
1901 	 * and I want this module to say putnext(*_pfkey_q, mp) for PF_KEY
1902 	 * messages.
1903 	 */
1904 
1905 	oldq = casptr((void **)pfkey_qp, NULL, OTHERQ(q));
1906 	if (oldq != NULL) {
1907 		ASSERT(oldq != q);
1908 		cmn_err(CE_WARN, "Danger!  Multiple keysocks on top of %s.\n",
1909 		    (satype == SADB_SATYPE_ESP)? "ESP" : "AH or other");
1910 		freemsg(mp);
1911 		return;
1912 	}
1913 
1914 	kha = (keysock_hello_ack_t *)mp->b_rptr;
1915 	kha->ks_hello_len = sizeof (keysock_hello_ack_t);
1916 	kha->ks_hello_type = KEYSOCK_HELLO_ACK;
1917 	kha->ks_hello_satype = (uint8_t)satype;
1918 
1919 	/*
1920 	 * If we made it past the casptr, then we have "exclusive" access
1921 	 * to the timeout handle.  Fire it off in 4 seconds, because it
1922 	 * just seems like a good interval.
1923 	 */
1924 	*top = qtimeout(*pfkey_qp, ager, NULL, drv_usectohz(4000000));
1925 
1926 	putnext(*pfkey_qp, mp);
1927 }
1928 
1929 /*
1930  * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
1931  *
1932  * Check addresses themselves for wildcard or multicast.
1933  * Check ire table for local/non-local/broadcast.
1934  */
1935 int
1936 sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial)
1937 {
1938 	sadb_address_t *addr = (sadb_address_t *)ext;
1939 	struct sockaddr_in *sin;
1940 	struct sockaddr_in6 *sin6;
1941 	ire_t *ire;
1942 	int diagnostic, type;
1943 	boolean_t normalized = B_FALSE;
1944 
1945 	ASSERT(ext != NULL);
1946 	ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
1947 	    (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
1948 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
1949 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
1950 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
1951 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
1952 
1953 	/* Assign both sockaddrs, the compiler will do the right thing. */
1954 	sin = (struct sockaddr_in *)(addr + 1);
1955 	sin6 = (struct sockaddr_in6 *)(addr + 1);
1956 
1957 	if (sin6->sin6_family == AF_INET6) {
1958 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
1959 			/*
1960 			 * Convert to an AF_INET sockaddr.  This means the
1961 			 * return messages will have the extra space, but have
1962 			 * AF_INET sockaddrs instead of AF_INET6.
1963 			 *
1964 			 * Yes, RFC 2367 isn't clear on what to do here w.r.t.
1965 			 * mapped addresses, but since AF_INET6 ::ffff:<v4> is
1966 			 * equal to AF_INET <v4>, it shouldnt be a huge
1967 			 * problem.
1968 			 */
1969 			sin->sin_family = AF_INET;
1970 			IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
1971 			    &sin->sin_addr);
1972 			bzero(&sin->sin_zero, sizeof (sin->sin_zero));
1973 			normalized = B_TRUE;
1974 		}
1975 	} else if (sin->sin_family != AF_INET) {
1976 		switch (ext->sadb_ext_type) {
1977 		case SADB_EXT_ADDRESS_SRC:
1978 			diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
1979 			break;
1980 		case SADB_EXT_ADDRESS_DST:
1981 			diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
1982 			break;
1983 		case SADB_X_EXT_ADDRESS_INNER_SRC:
1984 			diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
1985 			break;
1986 		case SADB_X_EXT_ADDRESS_INNER_DST:
1987 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
1988 			break;
1989 		case SADB_X_EXT_ADDRESS_NATT_LOC:
1990 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
1991 			break;
1992 		case SADB_X_EXT_ADDRESS_NATT_REM:
1993 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
1994 			break;
1995 			/* There is no default, see above ASSERT. */
1996 		}
1997 bail:
1998 		if (pfkey_q != NULL) {
1999 			sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
2000 			    serial);
2001 		} else {
2002 			/*
2003 			 * Scribble in sadb_msg that we got passed in.
2004 			 * Overload "mp" to be an sadb_msg pointer.
2005 			 */
2006 			sadb_msg_t *samsg = (sadb_msg_t *)mp;
2007 
2008 			samsg->sadb_msg_errno = EINVAL;
2009 			samsg->sadb_x_msg_diagnostic = diagnostic;
2010 		}
2011 		return (KS_IN_ADDR_UNKNOWN);
2012 	}
2013 
2014 	if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
2015 	    ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
2016 		/*
2017 		 * We need only check for prefix issues.
2018 		 */
2019 
2020 		/* Set diagnostic now, in case we need it later. */
2021 		diagnostic =
2022 		    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
2023 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
2024 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
2025 
2026 		if (normalized)
2027 			addr->sadb_address_prefixlen -= 96;
2028 
2029 		/*
2030 		 * Verify and mask out inner-addresses based on prefix length.
2031 		 */
2032 		if (sin->sin_family == AF_INET) {
2033 			if (addr->sadb_address_prefixlen > 32)
2034 				goto bail;
2035 			sin->sin_addr.s_addr &=
2036 			    ip_plen_to_mask(addr->sadb_address_prefixlen);
2037 		} else {
2038 			in6_addr_t mask;
2039 
2040 			ASSERT(sin->sin_family == AF_INET6);
2041 			/*
2042 			 * ip_plen_to_mask_v6() returns NULL if the value in
2043 			 * question is out of range.
2044 			 */
2045 			if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
2046 				&mask) == NULL)
2047 				goto bail;
2048 			sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
2049 			sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
2050 			sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
2051 			sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
2052 		}
2053 
2054 		/* We don't care in these cases. */
2055 		return (KS_IN_ADDR_DONTCARE);
2056 	}
2057 
2058 	if (sin->sin_family == AF_INET6) {
2059 		/* Check the easy ones now. */
2060 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
2061 			return (KS_IN_ADDR_MBCAST);
2062 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
2063 			return (KS_IN_ADDR_UNSPEC);
2064 		/*
2065 		 * At this point, we're a unicast IPv6 address.
2066 		 *
2067 		 * A ctable lookup for local is sufficient here.  If we're
2068 		 * local, return KS_IN_ADDR_ME, otherwise KS_IN_ADDR_NOTME.
2069 		 *
2070 		 * XXX Zones alert -> me/notme decision needs to be tempered
2071 		 * by what zone we're in when we go to zone-aware IPsec.
2072 		 */
2073 		ire = ire_ctable_lookup_v6(&sin6->sin6_addr, NULL,
2074 		    IRE_LOCAL, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE);
2075 		if (ire != NULL) {
2076 			/* Hey hey, it's local. */
2077 			IRE_REFRELE(ire);
2078 			return (KS_IN_ADDR_ME);
2079 		}
2080 	} else {
2081 		ASSERT(sin->sin_family == AF_INET);
2082 		if (sin->sin_addr.s_addr == INADDR_ANY)
2083 			return (KS_IN_ADDR_UNSPEC);
2084 		if (CLASSD(sin->sin_addr.s_addr))
2085 			return (KS_IN_ADDR_MBCAST);
2086 		/*
2087 		 * At this point we're a unicast or broadcast IPv4 address.
2088 		 *
2089 		 * Lookup on the ctable for IRE_BROADCAST or IRE_LOCAL.
2090 		 * A NULL return value is NOTME, otherwise, look at the
2091 		 * returned ire for broadcast or not and return accordingly.
2092 		 *
2093 		 * XXX Zones alert -> me/notme decision needs to be tempered
2094 		 * by what zone we're in when we go to zone-aware IPsec.
2095 		 */
2096 		ire = ire_ctable_lookup(sin->sin_addr.s_addr, 0,
2097 		    IRE_LOCAL | IRE_BROADCAST, NULL, ALL_ZONES, NULL,
2098 		    MATCH_IRE_TYPE);
2099 		if (ire != NULL) {
2100 			/* Check for local or broadcast */
2101 			type = ire->ire_type;
2102 			IRE_REFRELE(ire);
2103 			ASSERT(type == IRE_LOCAL || type == IRE_BROADCAST);
2104 			return ((type == IRE_LOCAL) ? KS_IN_ADDR_ME :
2105 			    KS_IN_ADDR_MBCAST);
2106 		}
2107 	}
2108 
2109 	return (KS_IN_ADDR_NOTME);
2110 }
2111 
2112 /*
2113  * Address normalizations and reality checks for inbound PF_KEY messages.
2114  *
2115  * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
2116  * the source to AF_INET.  Do the same for the inner sources.
2117  */
2118 boolean_t
2119 sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp)
2120 {
2121 	struct sockaddr_in *src, *isrc;
2122 	struct sockaddr_in6 *dst, *idst;
2123 	sadb_address_t *srcext, *dstext;
2124 	uint16_t sport;
2125 	sadb_ext_t **extv = ksi->ks_in_extv;
2126 	int rc;
2127 
2128 	if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
2129 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
2130 		    ksi->ks_in_serial);
2131 		if (rc == KS_IN_ADDR_UNKNOWN)
2132 			return (B_FALSE);
2133 		if (rc == KS_IN_ADDR_MBCAST) {
2134 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2135 			    SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
2136 			return (B_FALSE);
2137 		}
2138 		ksi->ks_in_srctype = rc;
2139 	}
2140 
2141 	if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
2142 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
2143 		    ksi->ks_in_serial);
2144 		if (rc == KS_IN_ADDR_UNKNOWN)
2145 			return (B_FALSE);
2146 		if (rc == KS_IN_ADDR_UNSPEC) {
2147 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2148 			    SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
2149 			return (B_FALSE);
2150 		}
2151 		ksi->ks_in_dsttype = rc;
2152 	}
2153 
2154 	/*
2155 	 * NAT-Traversal addrs are simple enough to not require all of
2156 	 * the checks in sadb_addrcheck().  Just normalize or reject if not
2157 	 * AF_INET.
2158 	 */
2159 	if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
2160 		rc = sadb_addrcheck(pfkey_q, mp,
2161 		    extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial);
2162 
2163 		/*
2164 		 * NATT addresses never use an IRE_LOCAL, so it should
2165 		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
2166 		 */
2167 		if (rc != KS_IN_ADDR_NOTME &&
2168 		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
2169 			rc == KS_IN_ADDR_UNSPEC)) {
2170 			if (rc != KS_IN_ADDR_UNKNOWN)
2171 				sadb_pfkey_error(pfkey_q, mp, EINVAL,
2172 				    SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
2173 				    ksi->ks_in_serial);
2174 			return (B_FALSE);
2175 		}
2176 		src = (struct sockaddr_in *)
2177 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
2178 		if (src->sin_family != AF_INET) {
2179 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2180 			    SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
2181 			    ksi->ks_in_serial);
2182 			return (B_FALSE);
2183 		}
2184 	}
2185 
2186 	if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
2187 		rc = sadb_addrcheck(pfkey_q, mp,
2188 		    extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial);
2189 
2190 		/*
2191 		 * NATT addresses never use an IRE_LOCAL, so it should
2192 		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
2193 		 */
2194 		if (rc != KS_IN_ADDR_NOTME &&
2195 		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
2196 			rc == KS_IN_ADDR_UNSPEC)) {
2197 			if (rc != KS_IN_ADDR_UNKNOWN)
2198 				sadb_pfkey_error(pfkey_q, mp, EINVAL,
2199 				    SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
2200 				    ksi->ks_in_serial);
2201 			return (B_FALSE);
2202 		}
2203 		src = (struct sockaddr_in *)
2204 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
2205 		if (src->sin_family != AF_INET) {
2206 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2207 			    SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
2208 			    ksi->ks_in_serial);
2209 			return (B_FALSE);
2210 		}
2211 	}
2212 
2213 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
2214 		if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
2215 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2216 			    SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
2217 			    ksi->ks_in_serial);
2218 			return (B_FALSE);
2219 		}
2220 
2221 		if (sadb_addrcheck(pfkey_q, mp,
2222 			extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial)
2223 		    == KS_IN_ADDR_UNKNOWN ||
2224 		    sadb_addrcheck(pfkey_q, mp,
2225 			extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial)
2226 		    == KS_IN_ADDR_UNKNOWN)
2227 			return (B_FALSE);
2228 
2229 		isrc = (struct sockaddr_in *)
2230 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
2231 			1);
2232 		idst = (struct sockaddr_in6 *)
2233 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
2234 			1);
2235 		if (isrc->sin_family != idst->sin6_family) {
2236 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2237 			    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
2238 			    ksi->ks_in_serial);
2239 			return (B_FALSE);
2240 		}
2241 	} else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
2242 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2243 			    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
2244 			    ksi->ks_in_serial);
2245 			return (B_FALSE);
2246 	} else {
2247 		isrc = NULL;	/* For inner/outer port check below. */
2248 	}
2249 
2250 	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
2251 	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
2252 
2253 	if (dstext == NULL || srcext == NULL)
2254 		return (B_TRUE);
2255 
2256 	dst = (struct sockaddr_in6 *)(dstext + 1);
2257 	src = (struct sockaddr_in *)(srcext + 1);
2258 
2259 	if (isrc != NULL &&
2260 	    (isrc->sin_port != 0 || idst->sin6_port != 0) &&
2261 	    (src->sin_port != 0 || dst->sin6_port != 0)) {
2262 		/* Can't set inner and outer ports in one SA. */
2263 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2264 		    SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
2265 		    ksi->ks_in_serial);
2266 		return (B_FALSE);
2267 	}
2268 
2269 	if (dst->sin6_family == src->sin_family)
2270 		return (B_TRUE);
2271 
2272 	if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
2273 		if (srcext->sadb_address_proto == 0) {
2274 			srcext->sadb_address_proto = dstext->sadb_address_proto;
2275 		} else if (dstext->sadb_address_proto == 0) {
2276 			dstext->sadb_address_proto = srcext->sadb_address_proto;
2277 		} else {
2278 			/* Inequal protocols, neither were 0.  Report error. */
2279 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2280 			    SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
2281 			    ksi->ks_in_serial);
2282 			return (B_FALSE);
2283 		}
2284 	}
2285 
2286 	/*
2287 	 * With the exception of an unspec IPv6 source and an IPv4
2288 	 * destination, address families MUST me matched.
2289 	 */
2290 	if (src->sin_family == AF_INET ||
2291 	    ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
2292 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2293 		    SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
2294 		return (B_FALSE);
2295 	}
2296 
2297 	/*
2298 	 * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
2299 	 * in the same place for sockaddr_in and sockaddr_in6.
2300 	 */
2301 	sport = src->sin_port;
2302 	bzero(src, sizeof (*src));
2303 	src->sin_family = AF_INET;
2304 	src->sin_port = sport;
2305 
2306 	return (B_TRUE);
2307 }
2308 
2309 /*
2310  * Set the results in "addrtype", given an IRE as requested by
2311  * sadb_addrcheck().
2312  */
2313 int
2314 sadb_addrset(ire_t *ire)
2315 {
2316 	if ((ire->ire_type & IRE_BROADCAST) ||
2317 	    (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) ||
2318 	    (ire->ire_ipversion == IPV6_VERSION &&
2319 		IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))))
2320 		return (KS_IN_ADDR_MBCAST);
2321 	if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))
2322 		return (KS_IN_ADDR_ME);
2323 	return (KS_IN_ADDR_NOTME);
2324 }
2325 
2326 
2327 /*
2328  * Walker callback function to delete sa's based on src/dst address.
2329  * Assumes that we're called with *head locked, no other locks held;
2330  * Conveniently, and not coincidentally, this is both what sadb_walker
2331  * gives us and also what sadb_unlinkassoc expects.
2332  */
2333 
2334 struct sadb_purge_state
2335 {
2336 	uint32_t *src;
2337 	uint32_t *dst;
2338 	sa_family_t af;
2339 	boolean_t inbnd;
2340 	char *sidstr;
2341 	char *didstr;
2342 	uint16_t sidtype;
2343 	uint16_t didtype;
2344 	uint32_t kmproto;
2345 	mblk_t *mq;
2346 };
2347 
2348 static void
2349 sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie)
2350 {
2351 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2352 
2353 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2354 
2355 	mutex_enter(&entry->ipsa_lock);
2356 
2357 	if ((entry->ipsa_state == IPSA_STATE_LARVAL) ||
2358 	    (ps->src != NULL &&
2359 		!IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, ps->src, ps->af)) ||
2360 	    (ps->dst != NULL &&
2361 		!IPSA_ARE_ADDR_EQUAL(entry->ipsa_dstaddr, ps->dst, ps->af)) ||
2362 	    (ps->didstr != NULL &&
2363 		(entry->ipsa_dst_cid != NULL) &&
2364 		!(ps->didtype == entry->ipsa_dst_cid->ipsid_type &&
2365 		    strcmp(ps->didstr, entry->ipsa_dst_cid->ipsid_cid) == 0)) ||
2366 	    (ps->sidstr != NULL &&
2367 		(entry->ipsa_src_cid != NULL) &&
2368 		!(ps->sidtype == entry->ipsa_src_cid->ipsid_type &&
2369 		    strcmp(ps->sidstr, entry->ipsa_src_cid->ipsid_cid) == 0)) ||
2370 	    (ps->kmproto <= SADB_X_KMP_MAX && ps->kmproto != entry->ipsa_kmp)) {
2371 		mutex_exit(&entry->ipsa_lock);
2372 		return;
2373 	}
2374 
2375 	entry->ipsa_state = IPSA_STATE_DEAD;
2376 	(void) sadb_torch_assoc(head, entry, ps->inbnd, &ps->mq);
2377 }
2378 
2379 /*
2380  * Common code to purge an SA with a matching src or dst address.
2381  * Don't kill larval SA's in such a purge.
2382  */
2383 int
2384 sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp, queue_t *pfkey_q,
2385     queue_t *ip_q)
2386 {
2387 	sadb_address_t *dstext =
2388 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2389 	sadb_address_t *srcext =
2390 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2391 	sadb_ident_t *dstid =
2392 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
2393 	sadb_ident_t *srcid =
2394 	    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
2395 	sadb_x_kmc_t *kmc =
2396 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2397 	struct sockaddr_in *src, *dst;
2398 	struct sockaddr_in6 *src6, *dst6;
2399 	struct sadb_purge_state ps;
2400 
2401 	/*
2402 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2403 	 * takes care of them.
2404 	 */
2405 
2406 	/* enforced by caller */
2407 	ASSERT((dstext != NULL) || (srcext != NULL));
2408 
2409 	ps.src = NULL;
2410 	ps.dst = NULL;
2411 #ifdef DEBUG
2412 	ps.af = (sa_family_t)-1;
2413 #endif
2414 	ps.mq = NULL;
2415 	ps.sidstr = NULL;
2416 	ps.didstr = NULL;
2417 	ps.kmproto = SADB_X_KMP_MAX + 1;
2418 
2419 	if (dstext != NULL) {
2420 		dst = (struct sockaddr_in *)(dstext + 1);
2421 		ps.af = dst->sin_family;
2422 		if (dst->sin_family == AF_INET6) {
2423 			dst6 = (struct sockaddr_in6 *)dst;
2424 			ps.dst = (uint32_t *)&dst6->sin6_addr;
2425 		} else {
2426 			ps.dst = (uint32_t *)&dst->sin_addr;
2427 		}
2428 	}
2429 
2430 	if (srcext != NULL) {
2431 		src = (struct sockaddr_in *)(srcext + 1);
2432 		ps.af = src->sin_family;
2433 		if (src->sin_family == AF_INET6) {
2434 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2435 			ps.src = (uint32_t *)&src6->sin6_addr;
2436 		} else {
2437 			ps.src = (uint32_t *)&src->sin_addr;
2438 		}
2439 		ASSERT(dstext == NULL || src->sin_family == dst->sin_family);
2440 	}
2441 
2442 	ASSERT(ps.af != (sa_family_t)-1);
2443 
2444 	if (dstid != NULL) {
2445 		/*
2446 		 * NOTE:  May need to copy string in the future
2447 		 * if the inbound keysock message disappears for some strange
2448 		 * reason.
2449 		 */
2450 		ps.didstr = (char *)(dstid + 1);
2451 		ps.didtype = dstid->sadb_ident_type;
2452 	}
2453 
2454 	if (srcid != NULL) {
2455 		/*
2456 		 * NOTE:  May need to copy string in the future
2457 		 * if the inbound keysock message disappears for some strange
2458 		 * reason.
2459 		 */
2460 		ps.sidstr = (char *)(srcid + 1);
2461 		ps.sidtype = srcid->sadb_ident_type;
2462 	}
2463 
2464 	if (kmc != NULL)
2465 		ps.kmproto = kmc->sadb_x_kmc_proto;
2466 
2467 	/*
2468 	 * This is simple, crude, and effective.
2469 	 * Unimplemented optimizations (TBD):
2470 	 * - we can limit how many places we search based on where we
2471 	 * think the SA is filed.
2472 	 * - if we get a dst address, we can hash based on dst addr to find
2473 	 * the correct bucket in the outbound table.
2474 	 */
2475 	ps.inbnd = B_TRUE;
2476 	sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_purge_cb, &ps);
2477 	ps.inbnd = B_FALSE;
2478 	sadb_walker(sp->sdb_of, sp->sdb_hashsize, sadb_purge_cb, &ps);
2479 
2480 	if (ps.mq != NULL)
2481 		sadb_drain_torchq(ip_q, ps.mq);
2482 
2483 	ASSERT(mp->b_cont != NULL);
2484 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2485 	    NULL);
2486 	return (0);
2487 }
2488 
2489 /*
2490  * Common code to delete/get an SA.
2491  */
2492 int
2493 sadb_delget_sa(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2494     int *diagnostic, queue_t *pfkey_q, boolean_t delete)
2495 {
2496 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2497 	sadb_address_t *srcext =
2498 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2499 	sadb_address_t *dstext =
2500 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2501 	struct sockaddr_in *src, *dst;
2502 	struct sockaddr_in6 *src6, *dst6;
2503 	sadb_t *sp;
2504 	ipsa_t *outbound_target, *inbound_target;
2505 	isaf_t *inbound, *outbound;
2506 	uint32_t *srcaddr, *dstaddr;
2507 	mblk_t *torchq = NULL;
2508 	sa_family_t af;
2509 
2510 	if (dstext == NULL) {
2511 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2512 		return (EINVAL);
2513 	}
2514 	if (assoc == NULL) {
2515 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2516 		return (EINVAL);
2517 	}
2518 
2519 	/*
2520 	 * Don't worry about IPv6 v4-mapped addresses, sadb_addrcheck()
2521 	 * takes care of them.
2522 	 */
2523 
2524 	dst = (struct sockaddr_in *)(dstext + 1);
2525 	af = dst->sin_family;
2526 	if (af == AF_INET6) {
2527 		sp = &spp->s_v6;
2528 		dst6 = (struct sockaddr_in6 *)dst;
2529 		dstaddr = (uint32_t *)&dst6->sin6_addr;
2530 		if (srcext != NULL) {
2531 			src6 = (struct sockaddr_in6 *)(srcext + 1);
2532 			srcaddr = (uint32_t *)&src6->sin6_addr;
2533 			ASSERT(src6->sin6_family == AF_INET6);
2534 		} else {
2535 			srcaddr = ALL_ZEROES_PTR;
2536 		}
2537 
2538 		outbound = OUTBOUND_BUCKET_V6(sp, *(uint32_t *)dstaddr);
2539 	} else {
2540 		sp = &spp->s_v4;
2541 		dstaddr = (uint32_t *)&dst->sin_addr;
2542 		if (srcext != NULL) {
2543 			src = (struct sockaddr_in *)(srcext + 1);
2544 			srcaddr = (uint32_t *)&src->sin_addr;
2545 			ASSERT(src->sin_family == AF_INET);
2546 		} else {
2547 			srcaddr = ALL_ZEROES_PTR;
2548 		}
2549 		outbound = OUTBOUND_BUCKET_V4(sp, *(uint32_t *)dstaddr);
2550 	}
2551 
2552 	inbound = INBOUND_BUCKET(sp, assoc->sadb_sa_spi);
2553 
2554 	/* Lock down both buckets. */
2555 	mutex_enter(&outbound->isaf_lock);
2556 	mutex_enter(&inbound->isaf_lock);
2557 
2558 	/* Try outbound first. */
2559 	outbound_target = ipsec_getassocbyspi(outbound, assoc->sadb_sa_spi,
2560 	    srcaddr, dstaddr, af);
2561 
2562 	if (outbound_target == NULL || outbound_target->ipsa_haspeer) {
2563 		inbound_target = ipsec_getassocbyspi(inbound,
2564 		    assoc->sadb_sa_spi, srcaddr, dstaddr, af);
2565 	} else {
2566 		inbound_target = NULL;
2567 	}
2568 
2569 	if (outbound_target == NULL && inbound_target == NULL) {
2570 		mutex_exit(&inbound->isaf_lock);
2571 		mutex_exit(&outbound->isaf_lock);
2572 		return (ESRCH);
2573 	}
2574 
2575 	if (delete) {
2576 		/* At this point, I have one or two SAs to be deleted. */
2577 		if (outbound_target != NULL) {
2578 			mutex_enter(&outbound_target->ipsa_lock);
2579 			outbound_target->ipsa_state = IPSA_STATE_DEAD;
2580 			(void) sadb_torch_assoc(outbound, outbound_target,
2581 			    B_FALSE, &torchq);
2582 		}
2583 
2584 		if (inbound_target != NULL) {
2585 			mutex_enter(&inbound_target->ipsa_lock);
2586 			inbound_target->ipsa_state = IPSA_STATE_DEAD;
2587 			(void) sadb_torch_assoc(inbound, inbound_target,
2588 			    B_TRUE, &torchq);
2589 		}
2590 	}
2591 
2592 	mutex_exit(&inbound->isaf_lock);
2593 	mutex_exit(&outbound->isaf_lock);
2594 
2595 	if (torchq != NULL)
2596 		sadb_drain_torchq(spp->s_ip_q, torchq);
2597 
2598 	/*
2599 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
2600 	 * them in { }.
2601 	 */
2602 	ASSERT(mp->b_cont != NULL);
2603 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2604 	    (outbound_target != NULL ? outbound_target : inbound_target));
2605 
2606 	if (outbound_target != NULL) {
2607 		IPSA_REFRELE(outbound_target);
2608 	}
2609 	if (inbound_target != NULL) {
2610 		IPSA_REFRELE(inbound_target);
2611 	}
2612 
2613 	return (0);
2614 }
2615 
2616 /*
2617  * Initialize the mechanism parameters associated with an SA.
2618  * These parameters can be shared by multiple packets, which saves
2619  * us from the overhead of consulting the algorithm table for
2620  * each packet.
2621  */
2622 static void
2623 sadb_init_alginfo(ipsa_t *sa)
2624 {
2625 	ipsec_alginfo_t *alg;
2626 
2627 	mutex_enter(&alg_lock);
2628 
2629 	if (sa->ipsa_encrkey != NULL) {
2630 		alg = ipsec_alglists[IPSEC_ALG_ENCR][sa->ipsa_encr_alg];
2631 		if (alg != NULL && ALG_VALID(alg)) {
2632 			sa->ipsa_emech.cm_type = alg->alg_mech_type;
2633 			sa->ipsa_emech.cm_param = NULL;
2634 			sa->ipsa_emech.cm_param_len = 0;
2635 			sa->ipsa_iv_len = alg->alg_datalen;
2636 		} else
2637 			sa->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
2638 	}
2639 
2640 	if (sa->ipsa_authkey != NULL) {
2641 		alg = ipsec_alglists[IPSEC_ALG_AUTH][sa->ipsa_auth_alg];
2642 		if (alg != NULL && ALG_VALID(alg)) {
2643 			sa->ipsa_amech.cm_type = alg->alg_mech_type;
2644 			sa->ipsa_amech.cm_param = (char *)&sa->ipsa_mac_len;
2645 			sa->ipsa_amech.cm_param_len = sizeof (size_t);
2646 			sa->ipsa_mac_len = (size_t)alg->alg_datalen;
2647 		} else
2648 			sa->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
2649 	}
2650 
2651 	mutex_exit(&alg_lock);
2652 }
2653 
2654 /*
2655  * Perform NAT-traversal cached checksum offset calculations here.
2656  */
2657 static void
2658 sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
2659     sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
2660     uint32_t *dst_addr_ptr)
2661 {
2662 	struct sockaddr_in *natt_loc, *natt_rem;
2663 	uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
2664 	uint32_t running_sum = 0;
2665 
2666 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
2667 
2668 
2669 	if (natt_rem_ext != NULL) {
2670 		uint32_t l_src;
2671 		uint32_t l_rem;
2672 
2673 		natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
2674 
2675 		/* Ensured by sadb_addrfix(). */
2676 		ASSERT(natt_rem->sin_family == AF_INET);
2677 
2678 		natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
2679 		newbie->ipsa_remote_port = natt_rem->sin_port;
2680 		l_src = *src_addr_ptr;
2681 		l_rem = *natt_rem_ptr;
2682 
2683 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2684 		newbie->ipsa_natt_addr_rem[0] = *natt_rem_ptr;
2685 
2686 		l_src = ntohl(l_src);
2687 		DOWN_SUM(l_src);
2688 		DOWN_SUM(l_src);
2689 		l_rem = ntohl(l_rem);
2690 		DOWN_SUM(l_rem);
2691 		DOWN_SUM(l_rem);
2692 
2693 		/*
2694 		 * We're 1's complement for checksums, so check for wraparound
2695 		 * here.
2696 		 */
2697 		if (l_rem > l_src)
2698 			l_src--;
2699 
2700 		running_sum += l_src - l_rem;
2701 
2702 		DOWN_SUM(running_sum);
2703 		DOWN_SUM(running_sum);
2704 	}
2705 
2706 	if (natt_loc_ext != NULL) {
2707 		uint32_t l_dst;
2708 		uint32_t l_loc;
2709 
2710 		natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
2711 
2712 		/* Ensured by sadb_addrfix(). */
2713 		ASSERT(natt_loc->sin_family == AF_INET);
2714 
2715 		natt_loc_ptr = (uint32_t *)&natt_loc->sin_addr;
2716 		/* TODO - future port flexibility beyond 4500. */
2717 		l_dst = *dst_addr_ptr;
2718 		l_loc = *natt_loc_ptr;
2719 
2720 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2721 		newbie->ipsa_natt_addr_loc[0] = *natt_loc_ptr;
2722 
2723 		l_loc = ntohl(l_loc);
2724 		DOWN_SUM(l_loc);
2725 		DOWN_SUM(l_loc);
2726 		l_dst = ntohl(l_dst);
2727 		DOWN_SUM(l_dst);
2728 		DOWN_SUM(l_dst);
2729 
2730 		/*
2731 		 * We're 1's complement for checksums, so check for wraparound
2732 		 * here.
2733 		 */
2734 		if (l_loc > l_dst)
2735 			l_dst--;
2736 
2737 		running_sum += l_dst - l_loc;
2738 		DOWN_SUM(running_sum);
2739 		DOWN_SUM(running_sum);
2740 	}
2741 
2742 	newbie->ipsa_inbound_cksum = running_sum;
2743 #undef DOWN_SUM
2744 }
2745 
2746 /*
2747  * This function is called from consumers that need to insert a fully-grown
2748  * security association into its tables.  This function takes into account that
2749  * SAs can be "inbound", "outbound", or "both".	 The "primary" and "secondary"
2750  * hash bucket parameters are set in order of what the SA will be most of the
2751  * time.  (For example, an SA with an unspecified source, and a multicast
2752  * destination will primarily be an outbound SA.  OTOH, if that destination
2753  * is unicast for this node, then the SA will primarily be inbound.)
2754  *
2755  * It takes a lot of parameters because even if clone is B_FALSE, this needs
2756  * to check both buckets for purposes of collision.
2757  *
2758  * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
2759  * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
2760  * with additional diagnostic information because there is at least one EINVAL
2761  * case here.
2762  */
2763 int
2764 sadb_common_add(queue_t *ip_q, queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
2765     keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
2766     ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic)
2767 {
2768 	ipsa_t *newbie_clone = NULL, *scratch;
2769 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2770 	sadb_address_t *srcext =
2771 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2772 	sadb_address_t *dstext =
2773 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2774 	sadb_address_t *isrcext =
2775 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
2776 	sadb_address_t *idstext =
2777 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
2778 	sadb_x_kmc_t *kmcext =
2779 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2780 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
2781 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
2782 #if 0
2783 	/*
2784 	 * XXXMLS - When Trusted Solaris or Multi-Level Secure functionality
2785 	 * comes to ON, examine these if 0'ed fragments.  Look for XXXMLS.
2786 	 */
2787 	sadb_sens_t *sens = (sadb_sens_t *);
2788 #endif
2789 	struct sockaddr_in *src, *dst, *isrc, *idst;
2790 	struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
2791 	sadb_lifetime_t *soft =
2792 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
2793 	sadb_lifetime_t *hard =
2794 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
2795 	sa_family_t af;
2796 	int error = 0;
2797 	boolean_t isupdate = (newbie != NULL);
2798 	uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
2799 	mblk_t *ctl_mp = NULL;
2800 
2801 	src = (struct sockaddr_in *)(srcext + 1);
2802 	src6 = (struct sockaddr_in6 *)(srcext + 1);
2803 	dst = (struct sockaddr_in *)(dstext + 1);
2804 	dst6 = (struct sockaddr_in6 *)(dstext + 1);
2805 	if (isrcext != NULL) {
2806 		isrc = (struct sockaddr_in *)(isrcext + 1);
2807 		isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
2808 		ASSERT(idstext != NULL);
2809 		idst = (struct sockaddr_in *)(idstext + 1);
2810 		idst6 = (struct sockaddr_in6 *)(idstext + 1);
2811 	} else {
2812 		isrc = NULL;
2813 		isrc6 = NULL;
2814 	}
2815 
2816 	af = src->sin_family;
2817 
2818 	if (af == AF_INET) {
2819 		src_addr_ptr = (uint32_t *)&src->sin_addr;
2820 		dst_addr_ptr = (uint32_t *)&dst->sin_addr;
2821 	} else {
2822 		ASSERT(af == AF_INET6);
2823 		src_addr_ptr = (uint32_t *)&src6->sin6_addr;
2824 		dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
2825 	}
2826 
2827 	if (!isupdate) {
2828 		newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
2829 		    src_addr_ptr, dst_addr_ptr, af);
2830 		if (newbie == NULL)
2831 			return (ENOMEM);
2832 	}
2833 
2834 	mutex_enter(&newbie->ipsa_lock);
2835 
2836 	if (isrc != NULL) {
2837 		if (isrc->sin_family == AF_INET) {
2838 			if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
2839 				if (srcext->sadb_address_proto != 0) {
2840 					/*
2841 					 * Mismatched outer-packet protocol
2842 					 * and inner-packet address family.
2843 					 */
2844 					mutex_exit(&newbie->ipsa_lock);
2845 					error = EPROTOTYPE;
2846 					goto error;
2847 				} else {
2848 					/* Fill in with explicit protocol. */
2849 					srcext->sadb_address_proto =
2850 					    IPPROTO_ENCAP;
2851 					dstext->sadb_address_proto =
2852 					    IPPROTO_ENCAP;
2853 				}
2854 			}
2855 			isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
2856 			idst_addr_ptr = (uint32_t *)&idst->sin_addr;
2857 		} else {
2858 			ASSERT(isrc->sin_family == AF_INET6);
2859 			if (srcext->sadb_address_proto != IPPROTO_IPV6) {
2860 				if (srcext->sadb_address_proto != 0) {
2861 					/*
2862 					 * Mismatched outer-packet protocol
2863 					 * and inner-packet address family.
2864 					 */
2865 					mutex_exit(&newbie->ipsa_lock);
2866 					error = EPROTOTYPE;
2867 					goto error;
2868 				} else {
2869 					/* Fill in with explicit protocol. */
2870 					srcext->sadb_address_proto =
2871 					    IPPROTO_IPV6;
2872 					dstext->sadb_address_proto =
2873 					    IPPROTO_IPV6;
2874 				}
2875 			}
2876 			isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
2877 			idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
2878 		}
2879 		newbie->ipsa_innerfam = isrc->sin_family;
2880 
2881 		IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
2882 		    newbie->ipsa_innerfam);
2883 		IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
2884 		    newbie->ipsa_innerfam);
2885 		newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
2886 		newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
2887 
2888 		/* Unique value uses inner-ports for Tunnel Mode... */
2889 		newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
2890 		    idst->sin_port, dstext->sadb_address_proto,
2891 		    idstext->sadb_address_proto);
2892 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
2893 		    idst->sin_port, dstext->sadb_address_proto,
2894 		    idstext->sadb_address_proto);
2895 	} else {
2896 		/* ... and outer-ports for Transport Mode. */
2897 		newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
2898 		    dst->sin_port, dstext->sadb_address_proto, 0);
2899 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
2900 		    dst->sin_port, dstext->sadb_address_proto, 0);
2901 	}
2902 	if (newbie->ipsa_unique_mask != (uint64_t)0)
2903 		newbie->ipsa_flags |= IPSA_F_UNIQUE;
2904 
2905 
2906 	sadb_nat_calculations(newbie,
2907 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
2908 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
2909 	    src_addr_ptr, dst_addr_ptr);
2910 
2911 	newbie->ipsa_type = samsg->sadb_msg_satype;
2912 	ASSERT(assoc->sadb_sa_state == SADB_SASTATE_MATURE);
2913 	newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
2914 	newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
2915 	/*
2916 	 * Use |= because we set unique fields above.  UNIQUE is filtered
2917 	 * out before we reach here so it's not like we're sabotaging anything.
2918 	 * ASSERT we're either 0 or UNIQUE for good measure, though.
2919 	 */
2920 	ASSERT((newbie->ipsa_flags & IPSA_F_UNIQUE) == newbie->ipsa_flags);
2921 	newbie->ipsa_flags |= assoc->sadb_sa_flags;
2922 	if ((newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
2923 		ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) ||
2924 	    (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
2925 		ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) ||
2926 	    (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
2927 		ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL)) {
2928 		mutex_exit(&newbie->ipsa_lock);
2929 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
2930 		error = EINVAL;
2931 		goto error;
2932 	}
2933 	/*
2934 	 * If unspecified source address, force replay_wsize to 0.
2935 	 * This is because an SA that has multiple sources of secure
2936 	 * traffic cannot enforce a replay counter w/o synchronizing the
2937 	 * senders.
2938 	 */
2939 	if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC)
2940 		newbie->ipsa_replay_wsize = assoc->sadb_sa_replay;
2941 	else
2942 		newbie->ipsa_replay_wsize = 0;
2943 
2944 	(void) drv_getparm(TIME, &newbie->ipsa_addtime);
2945 
2946 	if (kmcext != NULL) {
2947 		newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
2948 		newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie;
2949 	}
2950 
2951 	/*
2952 	 * XXX CURRENT lifetime checks MAY BE needed for an UPDATE.
2953 	 * The spec says that one can update current lifetimes, but
2954 	 * that seems impractical, especially in the larval-to-mature
2955 	 * update that this function performs.
2956 	 */
2957 	if (soft != NULL) {
2958 		newbie->ipsa_softaddlt = soft->sadb_lifetime_addtime;
2959 		newbie->ipsa_softuselt = soft->sadb_lifetime_usetime;
2960 		newbie->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
2961 		newbie->ipsa_softalloc = soft->sadb_lifetime_allocations;
2962 		SET_EXPIRE(newbie, softaddlt, softexpiretime);
2963 	}
2964 	if (hard != NULL) {
2965 		newbie->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
2966 		newbie->ipsa_harduselt = hard->sadb_lifetime_usetime;
2967 		newbie->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
2968 		newbie->ipsa_hardalloc = hard->sadb_lifetime_allocations;
2969 		SET_EXPIRE(newbie, hardaddlt, hardexpiretime);
2970 	}
2971 
2972 	newbie->ipsa_authtmpl = NULL;
2973 	newbie->ipsa_encrtmpl = NULL;
2974 
2975 	if (akey != NULL) {
2976 		newbie->ipsa_authkeybits = akey->sadb_key_bits;
2977 		newbie->ipsa_authkeylen = SADB_1TO8(akey->sadb_key_bits);
2978 		/* In case we have to round up to the next byte... */
2979 		if ((akey->sadb_key_bits & 0x7) != 0)
2980 			newbie->ipsa_authkeylen++;
2981 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
2982 		    KM_NOSLEEP);
2983 		if (newbie->ipsa_authkey == NULL) {
2984 			error = ENOMEM;
2985 			mutex_exit(&newbie->ipsa_lock);
2986 			goto error;
2987 		}
2988 		bcopy(akey + 1, newbie->ipsa_authkey, newbie->ipsa_authkeylen);
2989 		bzero(akey + 1, newbie->ipsa_authkeylen);
2990 
2991 		/*
2992 		 * Pre-initialize the kernel crypto framework key
2993 		 * structure.
2994 		 */
2995 		newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
2996 		newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
2997 		newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
2998 
2999 		mutex_enter(&alg_lock);
3000 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3001 		mutex_exit(&alg_lock);
3002 		if (error != 0) {
3003 			mutex_exit(&newbie->ipsa_lock);
3004 			goto error;
3005 		}
3006 	}
3007 
3008 	if (ekey != NULL) {
3009 		newbie->ipsa_encrkeybits = ekey->sadb_key_bits;
3010 		newbie->ipsa_encrkeylen = SADB_1TO8(ekey->sadb_key_bits);
3011 		/* In case we have to round up to the next byte... */
3012 		if ((ekey->sadb_key_bits & 0x7) != 0)
3013 			newbie->ipsa_encrkeylen++;
3014 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
3015 		    KM_NOSLEEP);
3016 		if (newbie->ipsa_encrkey == NULL) {
3017 			error = ENOMEM;
3018 			mutex_exit(&newbie->ipsa_lock);
3019 			goto error;
3020 		}
3021 		bcopy(ekey + 1, newbie->ipsa_encrkey, newbie->ipsa_encrkeylen);
3022 		/* XXX is this safe w.r.t db_ref, etc? */
3023 		bzero(ekey + 1, newbie->ipsa_encrkeylen);
3024 
3025 		/*
3026 		 * Pre-initialize the kernel crypto framework key
3027 		 * structure.
3028 		 */
3029 		newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3030 		newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3031 		newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3032 
3033 		mutex_enter(&alg_lock);
3034 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3035 		mutex_exit(&alg_lock);
3036 		if (error != 0) {
3037 			mutex_exit(&newbie->ipsa_lock);
3038 			goto error;
3039 		}
3040 	}
3041 
3042 	sadb_init_alginfo(newbie);
3043 
3044 	/*
3045 	 * Ptrs to processing functions.
3046 	 */
3047 	if (newbie->ipsa_type == SADB_SATYPE_ESP)
3048 		ipsecesp_init_funcs(newbie);
3049 	else
3050 		ipsecah_init_funcs(newbie);
3051 	ASSERT(newbie->ipsa_output_func != NULL &&
3052 	    newbie->ipsa_input_func != NULL);
3053 
3054 	/*
3055 	 * Certificate ID stuff.
3056 	 */
3057 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC] != NULL) {
3058 		sadb_ident_t *id =
3059 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
3060 
3061 		/*
3062 		 * Can assume strlen() will return okay because ext_check() in
3063 		 * keysock.c prepares the string for us.
3064 		 */
3065 		newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type,
3066 		    (char *)(id+1));
3067 		if (newbie->ipsa_src_cid == NULL) {
3068 			error = ENOMEM;
3069 			mutex_exit(&newbie->ipsa_lock);
3070 			goto error;
3071 		}
3072 	}
3073 
3074 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_DST] != NULL) {
3075 		sadb_ident_t *id =
3076 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
3077 
3078 		/*
3079 		 * Can assume strlen() will return okay because ext_check() in
3080 		 * keysock.c prepares the string for us.
3081 		 */
3082 		newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type,
3083 		    (char *)(id+1));
3084 		if (newbie->ipsa_dst_cid == NULL) {
3085 			error = ENOMEM;
3086 			mutex_exit(&newbie->ipsa_lock);
3087 			goto error;
3088 		}
3089 	}
3090 
3091 #if 0
3092 	/* XXXMLS  SENSITIVITY handling code. */
3093 	if (sens != NULL) {
3094 		int i;
3095 		uint64_t *bitmap = (uint64_t *)(sens + 1);
3096 
3097 		newbie->ipsa_dpd = sens->sadb_sens_dpd;
3098 		newbie->ipsa_senslevel = sens->sadb_sens_sens_level;
3099 		newbie->ipsa_integlevel = sens->sadb_sens_integ_level;
3100 		newbie->ipsa_senslen = SADB_64TO8(sens->sadb_sens_sens_len);
3101 		newbie->ipsa_integlen = SADB_64TO8(sens->sadb_sens_integ_len);
3102 		newbie->ipsa_integ = kmem_alloc(newbie->ipsa_integlen,
3103 		    KM_NOSLEEP);
3104 		if (newbie->ipsa_integ == NULL) {
3105 			error = ENOMEM;
3106 			mutex_exit(&newbie->ipsa_lock);
3107 			goto error;
3108 		}
3109 		newbie->ipsa_sens = kmem_alloc(newbie->ipsa_senslen,
3110 		    KM_NOSLEEP);
3111 		if (newbie->ipsa_sens == NULL) {
3112 			error = ENOMEM;
3113 			mutex_exit(&newbie->ipsa_lock);
3114 			goto error;
3115 		}
3116 		for (i = 0; i < sens->sadb_sens_sens_len; i++) {
3117 			newbie->ipsa_sens[i] = *bitmap;
3118 			bitmap++;
3119 		}
3120 		for (i = 0; i < sens->sadb_sens_integ_len; i++) {
3121 			newbie->ipsa_integ[i] = *bitmap;
3122 			bitmap++;
3123 		}
3124 	}
3125 
3126 #endif
3127 
3128 	/* now that the SA has been updated, set its new state */
3129 	newbie->ipsa_state = assoc->sadb_sa_state;
3130 
3131 	/*
3132 	 * The less locks I hold when doing an insertion and possible cloning,
3133 	 * the better!
3134 	 */
3135 	mutex_exit(&newbie->ipsa_lock);
3136 
3137 	if (clone) {
3138 		newbie_clone = sadb_cloneassoc(newbie);
3139 
3140 		if (newbie_clone == NULL) {
3141 			error = ENOMEM;
3142 			goto error;
3143 		}
3144 		newbie->ipsa_haspeer = B_TRUE;
3145 		newbie_clone->ipsa_haspeer = B_TRUE;
3146 	}
3147 
3148 	/*
3149 	 * Enter the bucket locks.  The order of entry is outbound,
3150 	 * inbound.  We map "primary" and "secondary" into outbound and inbound
3151 	 * based on the destination address type.  If the destination address
3152 	 * type is for a node that isn't mine (or potentially mine), the
3153 	 * "primary" bucket is the outbound one.
3154 	 */
3155 	if (ksi->ks_in_dsttype == KS_IN_ADDR_NOTME) {
3156 		/* primary == outbound */
3157 		mutex_enter(&primary->isaf_lock);
3158 		mutex_enter(&secondary->isaf_lock);
3159 	} else {
3160 		/* primary == inbound */
3161 		mutex_enter(&secondary->isaf_lock);
3162 		mutex_enter(&primary->isaf_lock);
3163 	}
3164 
3165 	IPSECHW_DEBUG(IPSECHW_SADB, ("sadb_common_add: spi = 0x%x\n",
3166 	    newbie->ipsa_spi));
3167 
3168 	/*
3169 	 * sadb_insertassoc() doesn't increment the reference
3170 	 * count.  We therefore have to increment the
3171 	 * reference count one more time to reflect the
3172 	 * pointers of the table that reference this SA.
3173 	 */
3174 	IPSA_REFHOLD(newbie);
3175 
3176 	if (isupdate) {
3177 		/*
3178 		 * Unlink from larval holding cell in the "inbound" fanout.
3179 		 */
3180 		ASSERT(newbie->ipsa_linklock == &primary->isaf_lock ||
3181 		    newbie->ipsa_linklock == &secondary->isaf_lock);
3182 		sadb_unlinkassoc(newbie);
3183 	}
3184 
3185 	mutex_enter(&newbie->ipsa_lock);
3186 	error = sadb_insertassoc(newbie, primary);
3187 	if (error == 0) {
3188 		ctl_mp = sadb_fmt_sa_req(DL_CO_SET, newbie->ipsa_type, newbie,
3189 		    is_inbound);
3190 	}
3191 	mutex_exit(&newbie->ipsa_lock);
3192 
3193 	if (error != 0) {
3194 		/*
3195 		 * Since sadb_insertassoc() failed, we must decrement the
3196 		 * refcount again so the cleanup code will actually free
3197 		 * the offending SA.
3198 		 */
3199 		IPSA_REFRELE(newbie);
3200 		goto error_unlock;
3201 	}
3202 
3203 	if (newbie_clone != NULL) {
3204 		mutex_enter(&newbie_clone->ipsa_lock);
3205 		error = sadb_insertassoc(newbie_clone, secondary);
3206 		mutex_exit(&newbie_clone->ipsa_lock);
3207 		if (error != 0) {
3208 			/* Collision in secondary table. */
3209 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3210 			goto error_unlock;
3211 		}
3212 		IPSA_REFHOLD(newbie_clone);
3213 	} else {
3214 		ASSERT(primary != secondary);
3215 		scratch = ipsec_getassocbyspi(secondary, newbie->ipsa_spi,
3216 		    ALL_ZEROES_PTR, newbie->ipsa_dstaddr, af);
3217 		if (scratch != NULL) {
3218 			/* Collision in secondary table. */
3219 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3220 			/* Set the error, since ipsec_getassocbyspi() can't. */
3221 			error = EEXIST;
3222 			goto error_unlock;
3223 		}
3224 	}
3225 
3226 	/* OKAY!  So let's do some reality check assertions. */
3227 
3228 	ASSERT(!MUTEX_HELD(&newbie->ipsa_lock));
3229 	ASSERT(newbie_clone == NULL || (!MUTEX_HELD(&newbie_clone->ipsa_lock)));
3230 	/*
3231 	 * If hardware acceleration could happen, send it.
3232 	 */
3233 	if (ctl_mp != NULL) {
3234 		putnext(ip_q, ctl_mp);
3235 		ctl_mp = NULL;
3236 	}
3237 
3238 error_unlock:
3239 
3240 	/*
3241 	 * We can exit the locks in any order.	Only entrance needs to
3242 	 * follow any protocol.
3243 	 */
3244 	mutex_exit(&secondary->isaf_lock);
3245 	mutex_exit(&primary->isaf_lock);
3246 
3247 	/* Common error point for this routine. */
3248 error:
3249 	if (newbie != NULL) {
3250 		IPSA_REFRELE(newbie);
3251 	}
3252 	if (newbie_clone != NULL) {
3253 		IPSA_REFRELE(newbie_clone);
3254 	}
3255 	if (ctl_mp != NULL)
3256 		freemsg(ctl_mp);
3257 
3258 	if (error == 0) {
3259 		/*
3260 		 * Construct favorable PF_KEY return message and send to
3261 		 * keysock.  (Q:  Do I need to pass "newbie"?  If I do,
3262 		 * make sure to REFHOLD, call, then REFRELE.)
3263 		 */
3264 		sadb_pfkey_echo(pfkey_q, mp, samsg, ksi, NULL);
3265 	}
3266 
3267 	return (error);
3268 }
3269 
3270 /*
3271  * Set the time of first use for a security association.  Update any
3272  * expiration times as a result.
3273  */
3274 void
3275 sadb_set_usetime(ipsa_t *assoc)
3276 {
3277 	mutex_enter(&assoc->ipsa_lock);
3278 	/*
3279 	 * Caller does check usetime before calling me usually, and
3280 	 * double-checking is better than a mutex_enter/exit hit.
3281 	 */
3282 	if (assoc->ipsa_usetime == 0) {
3283 		/*
3284 		 * This is redundant for outbound SA's, as
3285 		 * ipsec_getassocbyconn() sets the IPSA_F_USED flag already.
3286 		 * Inbound SAs, however, have no such protection.
3287 		 */
3288 		assoc->ipsa_flags |= IPSA_F_USED;
3289 
3290 		(void) drv_getparm(TIME, &assoc->ipsa_usetime);
3291 
3292 		/*
3293 		 * After setting the use time, see if we have a use lifetime
3294 		 * that would cause the actual SA expiration time to shorten.
3295 		 */
3296 		UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
3297 		UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
3298 	}
3299 	mutex_exit(&assoc->ipsa_lock);
3300 }
3301 
3302 /*
3303  * Send up a PF_KEY expire message for this association.
3304  */
3305 static void
3306 sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc)
3307 {
3308 	mblk_t *mp, *mp1;
3309 	int alloclen, af;
3310 	sadb_msg_t *samsg;
3311 	sadb_lifetime_t *current, *expire;
3312 	sadb_sa_t *saext;
3313 	uint8_t *end;
3314 	boolean_t tunnel_mode;
3315 
3316 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3317 
3318 	/* Don't bother sending if there's no queue. */
3319 	if (pfkey_q == NULL)
3320 		return;
3321 
3322 	mp = sadb_keysock_out(0);
3323 	if (mp == NULL) {
3324 		/* cmn_err(CE_WARN, */
3325 		/*	"sadb_expire_assoc: Can't allocate KEYSOCK_OUT.\n"); */
3326 		return;
3327 	}
3328 
3329 	alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
3330 	    2 * sizeof (sadb_address_t) + sizeof (*saext);
3331 
3332 	af = assoc->ipsa_addrfam;
3333 	switch (af) {
3334 	case AF_INET:
3335 		alloclen += 2 * sizeof (struct sockaddr_in);
3336 		break;
3337 	case AF_INET6:
3338 		alloclen += 2 * sizeof (struct sockaddr_in6);
3339 		break;
3340 	default:
3341 		/* Won't happen unless there's a kernel bug. */
3342 		freeb(mp);
3343 		cmn_err(CE_WARN,
3344 		    "sadb_expire_assoc: Unknown address length.\n");
3345 		return;
3346 	}
3347 
3348 	tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
3349 	if (tunnel_mode) {
3350 		alloclen += 2 * sizeof (sadb_address_t);
3351 		switch (assoc->ipsa_innerfam) {
3352 		case AF_INET:
3353 			alloclen += 2 * sizeof (struct sockaddr_in);
3354 			break;
3355 		case AF_INET6:
3356 			alloclen += 2 * sizeof (struct sockaddr_in6);
3357 			break;
3358 		default:
3359 			/* Won't happen unless there's a kernel bug. */
3360 			freeb(mp);
3361 			cmn_err(CE_WARN, "sadb_expire_assoc: "
3362 			    "Unknown inner address length.\n");
3363 			return;
3364 		}
3365 	}
3366 
3367 	mp->b_cont = allocb(alloclen, BPRI_HI);
3368 	if (mp->b_cont == NULL) {
3369 		freeb(mp);
3370 		/* cmn_err(CE_WARN, */
3371 		/*	"sadb_expire_assoc: Can't allocate message.\n"); */
3372 		return;
3373 	}
3374 
3375 	mp1 = mp;
3376 	mp = mp->b_cont;
3377 	end = mp->b_wptr + alloclen;
3378 
3379 	samsg = (sadb_msg_t *)mp->b_wptr;
3380 	mp->b_wptr += sizeof (*samsg);
3381 	samsg->sadb_msg_version = PF_KEY_V2;
3382 	samsg->sadb_msg_type = SADB_EXPIRE;
3383 	samsg->sadb_msg_errno = 0;
3384 	samsg->sadb_msg_satype = assoc->ipsa_type;
3385 	samsg->sadb_msg_len = SADB_8TO64(alloclen);
3386 	samsg->sadb_msg_reserved = 0;
3387 	samsg->sadb_msg_seq = 0;
3388 	samsg->sadb_msg_pid = 0;
3389 
3390 	saext = (sadb_sa_t *)mp->b_wptr;
3391 	mp->b_wptr += sizeof (*saext);
3392 	saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
3393 	saext->sadb_sa_exttype = SADB_EXT_SA;
3394 	saext->sadb_sa_spi = assoc->ipsa_spi;
3395 	saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
3396 	saext->sadb_sa_state = assoc->ipsa_state;
3397 	saext->sadb_sa_auth = assoc->ipsa_auth_alg;
3398 	saext->sadb_sa_encrypt = assoc->ipsa_encr_alg;
3399 	saext->sadb_sa_flags = assoc->ipsa_flags;
3400 
3401 	current = (sadb_lifetime_t *)mp->b_wptr;
3402 	mp->b_wptr += sizeof (sadb_lifetime_t);
3403 	current->sadb_lifetime_len = SADB_8TO64(sizeof (*current));
3404 	current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
3405 	current->sadb_lifetime_allocations = assoc->ipsa_alloc;
3406 	current->sadb_lifetime_bytes = assoc->ipsa_bytes;
3407 	current->sadb_lifetime_addtime = assoc->ipsa_addtime;
3408 	current->sadb_lifetime_usetime = assoc->ipsa_usetime;
3409 
3410 	expire = (sadb_lifetime_t *)mp->b_wptr;
3411 	mp->b_wptr += sizeof (*expire);
3412 	expire->sadb_lifetime_len = SADB_8TO64(sizeof (*expire));
3413 
3414 	if (assoc->ipsa_state == IPSA_STATE_DEAD) {
3415 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
3416 		expire->sadb_lifetime_allocations = assoc->ipsa_hardalloc;
3417 		expire->sadb_lifetime_bytes = assoc->ipsa_hardbyteslt;
3418 		expire->sadb_lifetime_addtime = assoc->ipsa_hardaddlt;
3419 		expire->sadb_lifetime_usetime = assoc->ipsa_harduselt;
3420 	} else {
3421 		ASSERT(assoc->ipsa_state == IPSA_STATE_DYING);
3422 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
3423 		expire->sadb_lifetime_allocations = assoc->ipsa_softalloc;
3424 		expire->sadb_lifetime_bytes = assoc->ipsa_softbyteslt;
3425 		expire->sadb_lifetime_addtime = assoc->ipsa_softaddlt;
3426 		expire->sadb_lifetime_usetime = assoc->ipsa_softuselt;
3427 	}
3428 
3429 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
3430 	    af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
3431 	    SA_PROTO(assoc), 0);
3432 	ASSERT(mp->b_wptr != NULL);
3433 
3434 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
3435 	    af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
3436 	    SA_PROTO(assoc), 0);
3437 	ASSERT(mp->b_wptr != NULL);
3438 
3439 	if (tunnel_mode) {
3440 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3441 		    SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
3442 		    assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
3443 		    assoc->ipsa_innersrcpfx);
3444 		ASSERT(mp->b_wptr != NULL);
3445 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3446 		    SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
3447 		    assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
3448 		    assoc->ipsa_innerdstpfx);
3449 		ASSERT(mp->b_wptr != NULL);
3450 	}
3451 
3452 	/* Can just putnext, we're ready to go! */
3453 	putnext(pfkey_q, mp1);
3454 }
3455 
3456 /*
3457  * "Age" the SA with the number of bytes that was used to protect traffic.
3458  * Send an SADB_EXPIRE message if appropriate.	Return B_TRUE if there was
3459  * enough "charge" left in the SA to protect the data.	Return B_FALSE
3460  * otherwise.  (If B_FALSE is returned, the association either was, or became
3461  * DEAD.)
3462  */
3463 boolean_t
3464 sadb_age_bytes(queue_t *pfkey_q, ipsa_t *assoc, uint64_t bytes,
3465     boolean_t sendmsg)
3466 {
3467 	boolean_t rc = B_TRUE;
3468 	uint64_t newtotal;
3469 
3470 	mutex_enter(&assoc->ipsa_lock);
3471 	newtotal = assoc->ipsa_bytes + bytes;
3472 	if (assoc->ipsa_hardbyteslt != 0 &&
3473 	    newtotal >= assoc->ipsa_hardbyteslt) {
3474 		if (assoc->ipsa_state < IPSA_STATE_DEAD) {
3475 			/*
3476 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3477 			 * this off on another non-interrupt thread.  Also
3478 			 * unlink this SA immediately.
3479 			 */
3480 			assoc->ipsa_state = IPSA_STATE_DEAD;
3481 			if (sendmsg)
3482 				sadb_expire_assoc(pfkey_q, assoc);
3483 			/*
3484 			 * Set non-zero expiration time so sadb_age_assoc()
3485 			 * will work when reaping.
3486 			 */
3487 			assoc->ipsa_hardexpiretime = (time_t)1;
3488 		} /* Else someone beat me to it! */
3489 		rc = B_FALSE;
3490 	} else if (assoc->ipsa_softbyteslt != 0 &&
3491 	    (newtotal >= assoc->ipsa_softbyteslt)) {
3492 		if (assoc->ipsa_state < IPSA_STATE_DYING) {
3493 			/*
3494 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3495 			 * this off on another non-interrupt thread.
3496 			 */
3497 			assoc->ipsa_state = IPSA_STATE_DYING;
3498 			assoc->ipsa_bytes = newtotal;
3499 			if (sendmsg)
3500 				sadb_expire_assoc(pfkey_q, assoc);
3501 		} /* Else someone beat me to it! */
3502 	}
3503 	if (rc == B_TRUE)
3504 		assoc->ipsa_bytes = newtotal;
3505 	mutex_exit(&assoc->ipsa_lock);
3506 	return (rc);
3507 }
3508 
3509 /*
3510  * Push one or more DL_CO_DELETE messages queued up by
3511  * sadb_torch_assoc down to the underlying driver now that it's a
3512  * convenient time for it (i.e., ipsa bucket locks not held).
3513  */
3514 static void
3515 sadb_drain_torchq(queue_t *q, mblk_t *mp)
3516 {
3517 	while (mp != NULL) {
3518 		mblk_t *next = mp->b_next;
3519 		mp->b_next = NULL;
3520 		if (q != NULL)
3521 			putnext(q, mp);
3522 		else
3523 			freemsg(mp);
3524 		mp = next;
3525 	}
3526 }
3527 
3528 /*
3529  * "Torch" an individual SA.  Returns NULL, so it can be tail-called from
3530  *     sadb_age_assoc().
3531  *
3532  * If SA is hardware-accelerated, and we can't allocate the mblk
3533  * containing the DL_CO_DELETE, just return; it will remain in the
3534  * table and be swept up by sadb_ager() in a subsequent pass.
3535  */
3536 static ipsa_t *
3537 sadb_torch_assoc(isaf_t *head, ipsa_t *sa, boolean_t inbnd, mblk_t **mq)
3538 {
3539 	mblk_t *mp;
3540 
3541 	ASSERT(MUTEX_HELD(&head->isaf_lock));
3542 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
3543 	ASSERT(sa->ipsa_state == IPSA_STATE_DEAD);
3544 
3545 	/*
3546 	 * Force cached SAs to be revalidated..
3547 	 */
3548 	head->isaf_gen++;
3549 
3550 	if (sa->ipsa_flags & IPSA_F_HW) {
3551 		mp = sadb_fmt_sa_req(DL_CO_DELETE, sa->ipsa_type, sa, inbnd);
3552 		if (mp == NULL) {
3553 			mutex_exit(&sa->ipsa_lock);
3554 			return (NULL);
3555 		}
3556 		mp->b_next = *mq;
3557 		*mq = mp;
3558 	}
3559 	mutex_exit(&sa->ipsa_lock);
3560 	sadb_unlinkassoc(sa);
3561 
3562 	return (NULL);
3563 }
3564 
3565 /*
3566  * Return "assoc" iff haspeer is true and I send an expire.  This allows
3567  * the consumers' aging functions to tidy up an expired SA's peer.
3568  */
3569 static ipsa_t *
3570 sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc,
3571     time_t current, int reap_delay, boolean_t inbnd, mblk_t **mq)
3572 {
3573 	ipsa_t *retval = NULL;
3574 
3575 	ASSERT(MUTEX_HELD(&head->isaf_lock));
3576 
3577 	mutex_enter(&assoc->ipsa_lock);
3578 
3579 	if ((assoc->ipsa_state == IPSA_STATE_LARVAL) &&
3580 	    (assoc->ipsa_hardexpiretime <= current)) {
3581 		assoc->ipsa_state = IPSA_STATE_DEAD;
3582 		return (sadb_torch_assoc(head, assoc, inbnd, mq));
3583 	}
3584 
3585 	/*
3586 	 * Check lifetimes.  Fortunately, SA setup is done
3587 	 * such that there are only two times to look at,
3588 	 * softexpiretime, and hardexpiretime.
3589 	 *
3590 	 * Check hard first.
3591 	 */
3592 
3593 	if (assoc->ipsa_hardexpiretime != 0 &&
3594 	    assoc->ipsa_hardexpiretime <= current) {
3595 		if (assoc->ipsa_state == IPSA_STATE_DEAD)
3596 			return (sadb_torch_assoc(head, assoc, inbnd, mq));
3597 
3598 		/*
3599 		 * Send SADB_EXPIRE with hard lifetime, delay for unlinking.
3600 		 */
3601 		assoc->ipsa_state = IPSA_STATE_DEAD;
3602 		if (assoc->ipsa_haspeer) {
3603 			/*
3604 			 * If I return assoc, I have to bump up its
3605 			 * reference count to keep with the ipsa_t reference
3606 			 * count semantics.
3607 			 */
3608 			IPSA_REFHOLD(assoc);
3609 			retval = assoc;
3610 		}
3611 		sadb_expire_assoc(pfkey_q, assoc);
3612 		assoc->ipsa_hardexpiretime = current + reap_delay;
3613 	} else if (assoc->ipsa_softexpiretime != 0 &&
3614 	    assoc->ipsa_softexpiretime <= current &&
3615 	    assoc->ipsa_state < IPSA_STATE_DYING) {
3616 		/*
3617 		 * Send EXPIRE message to PF_KEY.  May wish to pawn
3618 		 * this off on another non-interrupt thread.
3619 		 */
3620 		assoc->ipsa_state = IPSA_STATE_DYING;
3621 		if (assoc->ipsa_haspeer) {
3622 			/*
3623 			 * If I return assoc, I have to bump up its
3624 			 * reference count to keep with the ipsa_t reference
3625 			 * count semantics.
3626 			 */
3627 			IPSA_REFHOLD(assoc);
3628 			retval = assoc;
3629 		}
3630 		sadb_expire_assoc(pfkey_q, assoc);
3631 	}
3632 
3633 	mutex_exit(&assoc->ipsa_lock);
3634 	return (retval);
3635 }
3636 
3637 /*
3638  * Called by a consumer protocol to do ther dirty work of reaping dead
3639  * Security Associations.
3640  */
3641 void
3642 sadb_ager(sadb_t *sp, queue_t *pfkey_q, queue_t *ip_q, int reap_delay)
3643 {
3644 	int i;
3645 	isaf_t *bucket;
3646 	ipsa_t *assoc, *spare;
3647 	iacqf_t *acqlist;
3648 	ipsacq_t *acqrec, *spareacq;
3649 	struct templist {
3650 		ipsa_t *ipsa;
3651 		struct templist *next;
3652 	} *haspeerlist = NULL, *newbie;
3653 	time_t current;
3654 	int outhash;
3655 	mblk_t *mq = NULL;
3656 
3657 	/*
3658 	 * Do my dirty work.  This includes aging real entries, aging
3659 	 * larvals, and aging outstanding ACQUIREs.
3660 	 *
3661 	 * I hope I don't tie up resources for too long.
3662 	 */
3663 
3664 	/* Snapshot current time now. */
3665 	(void) drv_getparm(TIME, &current);
3666 
3667 	/* Age acquires. */
3668 
3669 	for (i = 0; i < sp->sdb_hashsize; i++) {
3670 		acqlist = &sp->sdb_acq[i];
3671 		mutex_enter(&acqlist->iacqf_lock);
3672 		for (acqrec = acqlist->iacqf_ipsacq; acqrec != NULL;
3673 		    acqrec = spareacq) {
3674 			spareacq = acqrec->ipsacq_next;
3675 			if (current > acqrec->ipsacq_expire)
3676 				sadb_destroy_acquire(acqrec);
3677 		}
3678 		mutex_exit(&acqlist->iacqf_lock);
3679 	}
3680 
3681 	/* Age inbound associations. */
3682 	for (i = 0; i < sp->sdb_hashsize; i++) {
3683 		bucket = &(sp->sdb_if[i]);
3684 		mutex_enter(&bucket->isaf_lock);
3685 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
3686 		    assoc = spare) {
3687 			spare = assoc->ipsa_next;
3688 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
3689 			    reap_delay, B_TRUE, &mq) != NULL) {
3690 				/*
3691 				 * sadb_age_assoc() increments the refcnt,
3692 				 * effectively doing an IPSA_REFHOLD().
3693 				 */
3694 				newbie = kmem_alloc(sizeof (*newbie),
3695 				    KM_NOSLEEP);
3696 				if (newbie == NULL) {
3697 					/*
3698 					 * Don't forget to REFRELE().
3699 					 */
3700 					IPSA_REFRELE(assoc);
3701 					continue;	/* for loop... */
3702 				}
3703 				newbie->next = haspeerlist;
3704 				newbie->ipsa = assoc;
3705 				haspeerlist = newbie;
3706 			}
3707 		}
3708 		mutex_exit(&bucket->isaf_lock);
3709 	}
3710 
3711 	if (mq != NULL) {
3712 		sadb_drain_torchq(ip_q, mq);
3713 		mq = NULL;
3714 	}
3715 	/*
3716 	 * Haspeer cases will contain both IPv4 and IPv6.  This code
3717 	 * is address independent.
3718 	 */
3719 	while (haspeerlist != NULL) {
3720 		/* "spare" contains the SA that has a peer. */
3721 		spare = haspeerlist->ipsa;
3722 		newbie = haspeerlist;
3723 		haspeerlist = newbie->next;
3724 		kmem_free(newbie, sizeof (*newbie));
3725 		/*
3726 		 * Pick peer bucket based on addrfam.
3727 		 */
3728 		if (spare->ipsa_addrfam == AF_INET6) {
3729 			outhash = OUTBOUND_HASH_V6(sp,
3730 			    *((in6_addr_t *)&spare->ipsa_dstaddr));
3731 		} else {
3732 			outhash = OUTBOUND_HASH_V4(sp,
3733 			    *((ipaddr_t *)&spare->ipsa_dstaddr));
3734 		}
3735 		bucket = &(sp->sdb_of[outhash]);
3736 
3737 		mutex_enter(&bucket->isaf_lock);
3738 		assoc = ipsec_getassocbyspi(bucket, spare->ipsa_spi,
3739 		    spare->ipsa_srcaddr, spare->ipsa_dstaddr,
3740 		    spare->ipsa_addrfam);
3741 		mutex_exit(&bucket->isaf_lock);
3742 		if (assoc != NULL) {
3743 			mutex_enter(&assoc->ipsa_lock);
3744 			mutex_enter(&spare->ipsa_lock);
3745 			assoc->ipsa_state = spare->ipsa_state;
3746 			if (assoc->ipsa_state == IPSA_STATE_DEAD)
3747 				assoc->ipsa_hardexpiretime = 1;
3748 			mutex_exit(&spare->ipsa_lock);
3749 			mutex_exit(&assoc->ipsa_lock);
3750 			IPSA_REFRELE(assoc);
3751 		}
3752 		IPSA_REFRELE(spare);
3753 	}
3754 
3755 	/* Age outbound associations. */
3756 	for (i = 0; i < sp->sdb_hashsize; i++) {
3757 		bucket = &(sp->sdb_of[i]);
3758 		mutex_enter(&bucket->isaf_lock);
3759 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
3760 		    assoc = spare) {
3761 			spare = assoc->ipsa_next;
3762 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
3763 			    reap_delay, B_FALSE, &mq) != NULL) {
3764 				/*
3765 				 * sadb_age_assoc() increments the refcnt,
3766 				 * effectively doing an IPSA_REFHOLD().
3767 				 */
3768 				newbie = kmem_alloc(sizeof (*newbie),
3769 				    KM_NOSLEEP);
3770 				if (newbie == NULL) {
3771 					/*
3772 					 * Don't forget to REFRELE().
3773 					 */
3774 					IPSA_REFRELE(assoc);
3775 					continue;	/* for loop... */
3776 				}
3777 				newbie->next = haspeerlist;
3778 				newbie->ipsa = assoc;
3779 				haspeerlist = newbie;
3780 			}
3781 		}
3782 		mutex_exit(&bucket->isaf_lock);
3783 	}
3784 	if (mq != NULL) {
3785 		sadb_drain_torchq(ip_q, mq);
3786 		mq = NULL;
3787 	}
3788 	/*
3789 	 * Haspeer cases will contain both IPv4 and IPv6.  This code
3790 	 * is address independent.
3791 	 */
3792 	while (haspeerlist != NULL) {
3793 		/* "spare" contains the SA that has a peer. */
3794 		spare = haspeerlist->ipsa;
3795 		newbie = haspeerlist;
3796 		haspeerlist = newbie->next;
3797 		kmem_free(newbie, sizeof (*newbie));
3798 		/*
3799 		 * Pick peer bucket based on addrfam.
3800 		 */
3801 		bucket = INBOUND_BUCKET(sp, spare->ipsa_spi);
3802 		mutex_enter(&bucket->isaf_lock);
3803 		assoc = ipsec_getassocbyspi(bucket, spare->ipsa_spi,
3804 		    spare->ipsa_srcaddr, spare->ipsa_dstaddr,
3805 		    spare->ipsa_addrfam);
3806 		mutex_exit(&bucket->isaf_lock);
3807 		if (assoc != NULL) {
3808 			mutex_enter(&assoc->ipsa_lock);
3809 			mutex_enter(&spare->ipsa_lock);
3810 			assoc->ipsa_state = spare->ipsa_state;
3811 			if (assoc->ipsa_state == IPSA_STATE_DEAD)
3812 				assoc->ipsa_hardexpiretime = 1;
3813 			mutex_exit(&spare->ipsa_lock);
3814 			mutex_exit(&assoc->ipsa_lock);
3815 			IPSA_REFRELE(assoc);
3816 		}
3817 		IPSA_REFRELE(spare);
3818 	}
3819 	/*
3820 	 * Run a GC pass to clean out dead identities.
3821 	 */
3822 	ipsid_gc();
3823 }
3824 
3825 /*
3826  * Figure out when to reschedule the ager.
3827  */
3828 timeout_id_t
3829 sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *),
3830     uint_t *intp, uint_t intmax, short mid)
3831 {
3832 	hrtime_t end = gethrtime();
3833 	uint_t interval = *intp;
3834 
3835 	/*
3836 	 * See how long this took.  If it took too long, increase the
3837 	 * aging interval.
3838 	 */
3839 	if ((end - begin) > interval * 1000000) {
3840 		if (interval >= intmax) {
3841 			/* XXX Rate limit this?  Or recommend flush? */
3842 			(void) strlog(mid, 0, 0, SL_ERROR | SL_WARN,
3843 			    "Too many SA's to age out in %d msec.\n",
3844 			    intmax);
3845 		} else {
3846 			/* Double by shifting by one bit. */
3847 			interval <<= 1;
3848 			interval = min(interval, intmax);
3849 		}
3850 	} else if ((end - begin) <= interval * 500000 &&
3851 		interval > SADB_AGE_INTERVAL_DEFAULT) {
3852 		/*
3853 		 * If I took less than half of the interval, then I should
3854 		 * ratchet the interval back down.  Never automatically
3855 		 * shift below the default aging interval.
3856 		 *
3857 		 * NOTE:This even overrides manual setting of the age
3858 		 *	interval using NDD.
3859 		 */
3860 		/* Halve by shifting one bit. */
3861 		interval >>= 1;
3862 		interval = max(interval, SADB_AGE_INTERVAL_DEFAULT);
3863 	}
3864 	*intp = interval;
3865 	return (qtimeout(pfkey_q, ager, NULL, interval * drv_usectohz(1000)));
3866 }
3867 
3868 
3869 /*
3870  * Update the lifetime values of an SA.	 This is the path an SADB_UPDATE
3871  * message takes when updating a MATURE or DYING SA.
3872  */
3873 static void
3874 sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
3875     sadb_lifetime_t *soft)
3876 {
3877 	mutex_enter(&assoc->ipsa_lock);
3878 
3879 	assoc->ipsa_state = IPSA_STATE_MATURE;
3880 
3881 	/*
3882 	 * XXX RFC 2367 mentions how an SADB_EXT_LIFETIME_CURRENT can be
3883 	 * passed in during an update message.	We currently don't handle
3884 	 * these.
3885 	 */
3886 
3887 	if (hard != NULL) {
3888 		if (hard->sadb_lifetime_bytes != 0)
3889 			assoc->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
3890 		if (hard->sadb_lifetime_usetime != 0)
3891 			assoc->ipsa_harduselt = hard->sadb_lifetime_usetime;
3892 		if (hard->sadb_lifetime_addtime != 0)
3893 			assoc->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
3894 		if (assoc->ipsa_hardaddlt != 0) {
3895 			assoc->ipsa_hardexpiretime =
3896 			    assoc->ipsa_addtime + assoc->ipsa_hardaddlt;
3897 		}
3898 		if (assoc->ipsa_harduselt != 0) {
3899 			if (assoc->ipsa_hardexpiretime != 0) {
3900 				assoc->ipsa_hardexpiretime =
3901 				    min(assoc->ipsa_hardexpiretime,
3902 					assoc->ipsa_usetime +
3903 					assoc->ipsa_harduselt);
3904 			} else {
3905 				assoc->ipsa_hardexpiretime =
3906 				    assoc->ipsa_usetime + assoc->ipsa_harduselt;
3907 			}
3908 		}
3909 
3910 		if (hard->sadb_lifetime_allocations != 0)
3911 			assoc->ipsa_hardalloc = hard->sadb_lifetime_allocations;
3912 	}
3913 
3914 	if (soft != NULL) {
3915 		if (soft->sadb_lifetime_bytes != 0)
3916 			assoc->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
3917 		if (soft->sadb_lifetime_usetime != 0)
3918 			assoc->ipsa_softuselt = soft->sadb_lifetime_usetime;
3919 		if (soft->sadb_lifetime_addtime != 0)
3920 			assoc->ipsa_softaddlt = soft->sadb_lifetime_addtime;
3921 		if (assoc->ipsa_softaddlt != 0) {
3922 			assoc->ipsa_softexpiretime =
3923 			    assoc->ipsa_addtime + assoc->ipsa_softaddlt;
3924 		}
3925 		if (assoc->ipsa_softuselt != 0) {
3926 			if (assoc->ipsa_softexpiretime != 0) {
3927 				assoc->ipsa_softexpiretime =
3928 				    min(assoc->ipsa_softexpiretime,
3929 					assoc->ipsa_usetime +
3930 					assoc->ipsa_softuselt);
3931 			} else {
3932 				assoc->ipsa_softexpiretime =
3933 				    assoc->ipsa_usetime + assoc->ipsa_softuselt;
3934 			}
3935 		}
3936 
3937 		if (soft->sadb_lifetime_allocations != 0)
3938 			assoc->ipsa_softalloc = soft->sadb_lifetime_allocations;
3939 	}
3940 
3941 	mutex_exit(&assoc->ipsa_lock);
3942 }
3943 
3944 /*
3945  * Common code to update an SA.
3946  */
3947 
3948 int
3949 sadb_update_sa(mblk_t *mp, keysock_in_t *ksi,
3950     sadb_t *sp, int *diagnostic, queue_t *pfkey_q,
3951     int (*add_sa_func)(mblk_t *, keysock_in_t *, int *))
3952 {
3953 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
3954 	sadb_address_t *srcext =
3955 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
3956 	sadb_address_t *dstext =
3957 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
3958 	sadb_x_kmc_t *kmcext =
3959 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
3960 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
3961 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
3962 	struct sockaddr_in *src, *dst;
3963 	struct sockaddr_in6 *src6, *dst6;
3964 	sadb_lifetime_t *soft =
3965 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
3966 	sadb_lifetime_t *hard =
3967 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
3968 	isaf_t *inbound, *outbound;
3969 	ipsa_t *outbound_target = NULL, *inbound_target = NULL;
3970 	int error = 0;
3971 	uint32_t *srcaddr, *dstaddr;
3972 	sa_family_t af;
3973 	uint32_t kmp = 0, kmc = 0;
3974 
3975 	/* I need certain extensions present for either UPDATE message. */
3976 	if (srcext == NULL) {
3977 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
3978 		return (EINVAL);
3979 	}
3980 	if (dstext == NULL) {
3981 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
3982 		return (EINVAL);
3983 	}
3984 	if (assoc == NULL) {
3985 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
3986 		return (EINVAL);
3987 	}
3988 
3989 	if (kmcext != NULL) {
3990 		kmp = kmcext->sadb_x_kmc_proto;
3991 		kmc = kmcext->sadb_x_kmc_cookie;
3992 	}
3993 
3994 	dst = (struct sockaddr_in *)(dstext + 1);
3995 	src = (struct sockaddr_in *)(srcext + 1);
3996 	af = dst->sin_family;
3997 	if (af == AF_INET6) {
3998 		dst6 = (struct sockaddr_in6 *)dst;
3999 		src6 = (struct sockaddr_in6 *)src;
4000 
4001 		srcaddr = (uint32_t *)&src6->sin6_addr;
4002 		dstaddr = (uint32_t *)&dst6->sin6_addr;
4003 		outbound = OUTBOUND_BUCKET_V6(sp, *(uint32_t *)dstaddr);
4004 	} else {
4005 		srcaddr = (uint32_t *)&src->sin_addr;
4006 		dstaddr = (uint32_t *)&dst->sin_addr;
4007 		outbound = OUTBOUND_BUCKET_V4(sp, *(uint32_t *)dstaddr);
4008 	}
4009 	inbound = INBOUND_BUCKET(sp, assoc->sadb_sa_spi);
4010 
4011 	/* Lock down both buckets. */
4012 	mutex_enter(&outbound->isaf_lock);
4013 	mutex_enter(&inbound->isaf_lock);
4014 
4015 	/* Try outbound first. */
4016 	outbound_target = ipsec_getassocbyspi(outbound, assoc->sadb_sa_spi,
4017 	    srcaddr, dstaddr, af);
4018 	inbound_target = ipsec_getassocbyspi(inbound, assoc->sadb_sa_spi,
4019 	    srcaddr, dstaddr, af);
4020 
4021 	mutex_exit(&inbound->isaf_lock);
4022 	mutex_exit(&outbound->isaf_lock);
4023 
4024 	if (outbound_target == NULL) {
4025 		if (inbound_target == NULL) {
4026 			return (ESRCH);
4027 		} else if (inbound_target->ipsa_state == IPSA_STATE_LARVAL) {
4028 			/*
4029 			 * REFRELE the target and let the add_sa_func()
4030 			 * deal with updating a larval SA.
4031 			 */
4032 			IPSA_REFRELE(inbound_target);
4033 			return (add_sa_func(mp, ksi, diagnostic));
4034 		}
4035 	}
4036 
4037 	/*
4038 	 * Reality checks for updates of active associations.
4039 	 * Sundry first-pass UPDATE-specific reality checks.
4040 	 * Have to do the checks here, because it's after the add_sa code.
4041 	 * XXX STATS : logging/stats here?
4042 	 */
4043 
4044 	if (assoc->sadb_sa_state != SADB_SASTATE_MATURE) {
4045 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4046 		error = EINVAL;
4047 		goto bail;
4048 	}
4049 	if (assoc->sadb_sa_flags & ~(SADB_SAFLAGS_NOREPLAY |
4050 		SADB_X_SAFLAGS_NATT_LOC | SADB_X_SAFLAGS_NATT_REM)) {
4051 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
4052 		error = EINVAL;
4053 		goto bail;
4054 	}
4055 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) {
4056 		error = EOPNOTSUPP;
4057 		goto bail;
4058 	}
4059 	if ((*diagnostic = sadb_hardsoftchk(hard, soft)) != 0) {
4060 		error = EINVAL;
4061 		goto bail;
4062 	}
4063 	ASSERT(src->sin_family == dst->sin_family);
4064 	if (akey != NULL) {
4065 		*diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
4066 		error = EINVAL;
4067 		goto bail;
4068 	}
4069 	if (ekey != NULL) {
4070 		*diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
4071 		error = EINVAL;
4072 		goto bail;
4073 	}
4074 
4075 	if (outbound_target != NULL) {
4076 		if (outbound_target->ipsa_state == IPSA_STATE_DEAD) {
4077 			error = ESRCH;	/* DEAD == Not there, in this case. */
4078 			goto bail;
4079 		}
4080 		if ((kmp != 0) &&
4081 		    ((outbound_target->ipsa_kmp != 0) ||
4082 			(outbound_target->ipsa_kmp != kmp))) {
4083 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4084 			error = EINVAL;
4085 			goto bail;
4086 		}
4087 		if ((kmc != 0) &&
4088 		    ((outbound_target->ipsa_kmc != 0) ||
4089 			(outbound_target->ipsa_kmc != kmc))) {
4090 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4091 			error = EINVAL;
4092 			goto bail;
4093 		}
4094 	}
4095 
4096 	if (inbound_target != NULL) {
4097 		if (inbound_target->ipsa_state == IPSA_STATE_DEAD) {
4098 			error = ESRCH;	/* DEAD == Not there, in this case. */
4099 			goto bail;
4100 		}
4101 		if ((kmp != 0) &&
4102 		    ((inbound_target->ipsa_kmp != 0) ||
4103 			(inbound_target->ipsa_kmp != kmp))) {
4104 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4105 			error = EINVAL;
4106 			goto bail;
4107 		}
4108 		if ((kmc != 0) &&
4109 		    ((inbound_target->ipsa_kmc != 0) ||
4110 			(inbound_target->ipsa_kmc != kmc))) {
4111 			*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4112 			error = EINVAL;
4113 			goto bail;
4114 		}
4115 	}
4116 
4117 	if (outbound_target != NULL) {
4118 		sadb_update_lifetimes(outbound_target, hard, soft);
4119 		if (kmp != 0)
4120 			outbound_target->ipsa_kmp = kmp;
4121 		if (kmc != 0)
4122 			outbound_target->ipsa_kmc = kmc;
4123 	}
4124 
4125 	if (inbound_target != NULL) {
4126 		sadb_update_lifetimes(inbound_target, hard, soft);
4127 		if (kmp != 0)
4128 			inbound_target->ipsa_kmp = kmp;
4129 		if (kmc != 0)
4130 			inbound_target->ipsa_kmc = kmc;
4131 	}
4132 
4133 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4134 	    ksi, (outbound_target == NULL) ? inbound_target : outbound_target);
4135 
4136 bail:
4137 	/*
4138 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
4139 	 * them in { }.
4140 	 */
4141 	if (outbound_target != NULL) {
4142 		IPSA_REFRELE(outbound_target);
4143 	}
4144 	if (inbound_target != NULL) {
4145 		IPSA_REFRELE(inbound_target);
4146 	}
4147 
4148 	return (error);
4149 }
4150 
4151 /*
4152  * The following functions deal with ACQUIRE LISTS.  An ACQUIRE list is
4153  * a list of outstanding SADB_ACQUIRE messages.	 If ipsec_getassocbyconn() fails
4154  * for an outbound datagram, that datagram is queued up on an ACQUIRE record,
4155  * and an SADB_ACQUIRE message is sent up.  Presumably, a user-space key
4156  * management daemon will process the ACQUIRE, use a SADB_GETSPI to reserve
4157  * an SPI value and a larval SA, then SADB_UPDATE the larval SA, and ADD the
4158  * other direction's SA.
4159  */
4160 
4161 /*
4162  * Check the ACQUIRE lists.  If there's an existing ACQUIRE record,
4163  * grab it, lock it, and return it.  Otherwise return NULL.
4164  */
4165 static ipsacq_t *
4166 sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
4167     uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
4168     uint64_t unique_id)
4169 {
4170 	ipsacq_t *walker;
4171 	sa_family_t fam;
4172 	uint32_t blank_address[4] = {0, 0, 0, 0};
4173 
4174 	if (isrc == NULL) {
4175 		ASSERT(idst == NULL);
4176 		isrc = idst = blank_address;
4177 	}
4178 
4179 	/*
4180 	 * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
4181 	 *
4182 	 * XXX May need search for duplicates based on other things too!
4183 	 */
4184 	for (walker = bucket->iacqf_ipsacq; walker != NULL;
4185 	    walker = walker->ipsacq_next) {
4186 		mutex_enter(&walker->ipsacq_lock);
4187 		fam = walker->ipsacq_addrfam;
4188 		if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
4189 		    IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
4190 		    ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
4191 			(in6_addr_t *)walker->ipsacq_innersrc) &&
4192 		    ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
4193 			(in6_addr_t *)walker->ipsacq_innerdst) &&
4194 		    (ap == walker->ipsacq_act) &&
4195 		    (pp == walker->ipsacq_policy) &&
4196 		    /* XXX do deep compares of ap/pp? */
4197 		    (unique_id == walker->ipsacq_unique_id))
4198 			break;			/* everything matched */
4199 		mutex_exit(&walker->ipsacq_lock);
4200 	}
4201 
4202 	return (walker);
4203 }
4204 
4205 /*
4206  * For this mblk, insert a new acquire record.  Assume bucket contains addrs
4207  * of all of the same length.  Give up (and drop) if memory
4208  * cannot be allocated for a new one; otherwise, invoke callback to
4209  * send the acquire up..
4210  *
4211  * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
4212  * list.  The ah_add_sa_finish() routines can look at the packet's ipsec_out_t
4213  * and handle this case specially.
4214  */
4215 void
4216 sadb_acquire(mblk_t *mp, ipsec_out_t *io, boolean_t need_ah, boolean_t need_esp)
4217 {
4218 	sadbp_t *spp;
4219 	sadb_t *sp;
4220 	ipsacq_t *newbie;
4221 	iacqf_t *bucket;
4222 	mblk_t *datamp = mp->b_cont;
4223 	mblk_t *extended;
4224 	ipha_t *ipha = (ipha_t *)datamp->b_rptr;
4225 	ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
4226 	uint32_t *src, *dst, *isrc, *idst;
4227 	ipsec_policy_t *pp = io->ipsec_out_policy;
4228 	ipsec_action_t *ap = io->ipsec_out_act;
4229 	sa_family_t af;
4230 	int hashoffset;
4231 	uint32_t seq;
4232 	uint64_t unique_id = 0;
4233 	ipsec_selector_t sel;
4234 	boolean_t tunnel_mode = io->ipsec_out_tunnel;
4235 
4236 	ASSERT((pp != NULL) || (ap != NULL));
4237 
4238 	ASSERT(need_ah != NULL || need_esp != NULL);
4239 	/* Assign sadb pointers */
4240 	spp = need_esp ? &esp_sadb : &ah_sadb; /* ESP for AH+ESP */
4241 	sp = io->ipsec_out_v4 ? &spp->s_v4 : &spp->s_v6;
4242 
4243 	if (ap == NULL)
4244 		ap = pp->ipsp_act;
4245 
4246 	ASSERT(ap != NULL);
4247 
4248 	if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
4249 		unique_id = SA_FORM_UNIQUE_ID(io);
4250 
4251 	/*
4252 	 * Set up an ACQUIRE record.
4253 	 *
4254 	 * Immediately, make sure the ACQUIRE sequence number doesn't slip
4255 	 * below the lowest point allowed in the kernel.  (In other words,
4256 	 * make sure the high bit on the sequence number is set.)
4257 	 */
4258 
4259 	seq = keysock_next_seq() | IACQF_LOWEST_SEQ;
4260 
4261 	if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
4262 		src = (uint32_t *)&ipha->ipha_src;
4263 		dst = (uint32_t *)&ipha->ipha_dst;
4264 		af = AF_INET;
4265 		hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
4266 		ASSERT(io->ipsec_out_v4 == B_TRUE);
4267 	} else {
4268 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
4269 		src = (uint32_t *)&ip6h->ip6_src;
4270 		dst = (uint32_t *)&ip6h->ip6_dst;
4271 		af = AF_INET6;
4272 		hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
4273 		ASSERT(io->ipsec_out_v4 == B_FALSE);
4274 	}
4275 
4276 	if (tunnel_mode) {
4277 		/* Snag inner addresses. */
4278 		isrc = io->ipsec_out_insrc;
4279 		idst = io->ipsec_out_indst;
4280 	} else {
4281 		isrc = idst = NULL;
4282 	}
4283 
4284 	/*
4285 	 * Check buckets to see if there is an existing entry.  If so,
4286 	 * grab it.  sadb_checkacquire locks newbie if found.
4287 	 */
4288 	bucket = &(sp->sdb_acq[hashoffset]);
4289 	mutex_enter(&bucket->iacqf_lock);
4290 	newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
4291 	    unique_id);
4292 
4293 	if (newbie == NULL) {
4294 		/*
4295 		 * Otherwise, allocate a new one.
4296 		 */
4297 		newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
4298 		if (newbie == NULL) {
4299 			mutex_exit(&bucket->iacqf_lock);
4300 			ip_drop_packet(mp, B_FALSE, NULL, NULL,
4301 			    &ipdrops_sadb_acquire_nomem, &sadb_dropper);
4302 			return;
4303 		}
4304 		newbie->ipsacq_policy = pp;
4305 		if (pp != NULL) {
4306 			IPPOL_REFHOLD(pp);
4307 		}
4308 		IPACT_REFHOLD(ap);
4309 		newbie->ipsacq_act = ap;
4310 		newbie->ipsacq_linklock = &bucket->iacqf_lock;
4311 		newbie->ipsacq_next = bucket->iacqf_ipsacq;
4312 		newbie->ipsacq_ptpn = &bucket->iacqf_ipsacq;
4313 		if (newbie->ipsacq_next != NULL)
4314 			newbie->ipsacq_next->ipsacq_ptpn = &newbie->ipsacq_next;
4315 		bucket->iacqf_ipsacq = newbie;
4316 		mutex_init(&newbie->ipsacq_lock, NULL, MUTEX_DEFAULT, NULL);
4317 		mutex_enter(&newbie->ipsacq_lock);
4318 	}
4319 
4320 	mutex_exit(&bucket->iacqf_lock);
4321 
4322 	/*
4323 	 * This assert looks silly for now, but we may need to enter newbie's
4324 	 * mutex during a search.
4325 	 */
4326 	ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
4327 
4328 	mp->b_next = NULL;
4329 	/* Queue up packet.  Use b_next. */
4330 	if (newbie->ipsacq_numpackets == 0) {
4331 		/* First one. */
4332 		newbie->ipsacq_mp = mp;
4333 		newbie->ipsacq_numpackets = 1;
4334 		(void) drv_getparm(TIME, &newbie->ipsacq_expire);
4335 		/*
4336 		 * Extended ACQUIRE with both AH+ESP will use ESP's timeout
4337 		 * value.
4338 		 */
4339 		newbie->ipsacq_expire += *spp->s_acquire_timeout;
4340 		newbie->ipsacq_seq = seq;
4341 		newbie->ipsacq_addrfam = af;
4342 
4343 		newbie->ipsacq_srcport = io->ipsec_out_src_port;
4344 		newbie->ipsacq_dstport = io->ipsec_out_dst_port;
4345 		newbie->ipsacq_icmp_type = io->ipsec_out_icmp_type;
4346 		newbie->ipsacq_icmp_code = io->ipsec_out_icmp_code;
4347 		if (tunnel_mode) {
4348 			newbie->ipsacq_inneraddrfam = io->ipsec_out_inaf;
4349 			newbie->ipsacq_proto = io->ipsec_out_inaf == AF_INET6 ?
4350 			    IPPROTO_IPV6 : IPPROTO_ENCAP;
4351 			newbie->ipsacq_innersrcpfx = io->ipsec_out_insrcpfx;
4352 			newbie->ipsacq_innerdstpfx = io->ipsec_out_indstpfx;
4353 			IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
4354 			    io->ipsec_out_insrc, io->ipsec_out_inaf);
4355 			IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
4356 			    io->ipsec_out_indst, io->ipsec_out_inaf);
4357 		} else {
4358 			newbie->ipsacq_proto = io->ipsec_out_proto;
4359 		}
4360 		newbie->ipsacq_unique_id = unique_id;
4361 	} else {
4362 		/* Scan to the end of the list & insert. */
4363 		mblk_t *lastone = newbie->ipsacq_mp;
4364 
4365 		while (lastone->b_next != NULL)
4366 			lastone = lastone->b_next;
4367 		lastone->b_next = mp;
4368 		if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
4369 			newbie->ipsacq_numpackets = ipsacq_maxpackets;
4370 			lastone = newbie->ipsacq_mp;
4371 			newbie->ipsacq_mp = lastone->b_next;
4372 			lastone->b_next = NULL;
4373 			ip_drop_packet(lastone, B_FALSE, NULL, NULL,
4374 			    &ipdrops_sadb_acquire_toofull, &sadb_dropper);
4375 		} else {
4376 			IP_ACQUIRE_STAT(qhiwater, newbie->ipsacq_numpackets);
4377 		}
4378 	}
4379 
4380 	/*
4381 	 * Reset addresses.  Set them to the most recently added mblk chain,
4382 	 * so that the address pointers in the acquire record will point
4383 	 * at an mblk still attached to the acquire list.
4384 	 */
4385 
4386 	newbie->ipsacq_srcaddr = src;
4387 	newbie->ipsacq_dstaddr = dst;
4388 
4389 	/*
4390 	 * If the acquire record has more than one queued packet, we've
4391 	 * already sent an ACQUIRE, and don't need to repeat ourself.
4392 	 */
4393 	if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
4394 		/* I have an acquire outstanding already! */
4395 		mutex_exit(&newbie->ipsacq_lock);
4396 		return;
4397 	}
4398 
4399 	if (keysock_extended_reg()) {
4400 		/*
4401 		 * Construct an extended ACQUIRE.  There are logging
4402 		 * opportunities here in failure cases.
4403 		 */
4404 
4405 		(void) memset(&sel, 0, sizeof (sel));
4406 		sel.ips_isv4 = io->ipsec_out_v4;
4407 		if (tunnel_mode) {
4408 			sel.ips_protocol = (io->ipsec_out_inaf == AF_INET) ?
4409 			    IPPROTO_ENCAP : IPPROTO_IPV6;
4410 		} else {
4411 			sel.ips_protocol = io->ipsec_out_proto;
4412 			sel.ips_local_port = io->ipsec_out_src_port;
4413 			sel.ips_remote_port = io->ipsec_out_dst_port;
4414 		}
4415 		sel.ips_icmp_type = io->ipsec_out_icmp_type;
4416 		sel.ips_icmp_code = io->ipsec_out_icmp_code;
4417 		sel.ips_is_icmp_inv_acq = 0;
4418 		if (af == AF_INET) {
4419 			sel.ips_local_addr_v4 = ipha->ipha_src;
4420 			sel.ips_remote_addr_v4 = ipha->ipha_dst;
4421 		} else {
4422 			sel.ips_local_addr_v6 = ip6h->ip6_src;
4423 			sel.ips_remote_addr_v6 = ip6h->ip6_dst;
4424 		}
4425 
4426 		extended = sadb_keysock_out(0);
4427 		if (extended != NULL) {
4428 			extended->b_cont = sadb_extended_acquire(&sel, pp, ap,
4429 			    tunnel_mode, seq, 0);
4430 			if (extended->b_cont == NULL) {
4431 				freeb(extended);
4432 				extended = NULL;
4433 			}
4434 		}
4435 	} else
4436 		extended = NULL;
4437 
4438 	/*
4439 	 * Send an ACQUIRE message (and possible an extended ACQUIRE) based on
4440 	 * this new record.  The send-acquire callback assumes that acqrec is
4441 	 * already locked.
4442 	 */
4443 	(*spp->s_acqfn)(newbie, extended);
4444 }
4445 
4446 /*
4447  * Unlink and free an acquire record.
4448  */
4449 void
4450 sadb_destroy_acquire(ipsacq_t *acqrec)
4451 {
4452 	mblk_t *mp;
4453 
4454 	ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
4455 
4456 	if (acqrec->ipsacq_policy != NULL) {
4457 		IPPOL_REFRELE(acqrec->ipsacq_policy);
4458 	}
4459 	if (acqrec->ipsacq_act != NULL) {
4460 		IPACT_REFRELE(acqrec->ipsacq_act);
4461 	}
4462 
4463 	/* Unlink */
4464 	*(acqrec->ipsacq_ptpn) = acqrec->ipsacq_next;
4465 	if (acqrec->ipsacq_next != NULL)
4466 		acqrec->ipsacq_next->ipsacq_ptpn = acqrec->ipsacq_ptpn;
4467 
4468 	/*
4469 	 * Free hanging mp's.
4470 	 *
4471 	 * XXX Instead of freemsg(), perhaps use IPSEC_REQ_FAILED.
4472 	 */
4473 
4474 	mutex_enter(&acqrec->ipsacq_lock);
4475 	while (acqrec->ipsacq_mp != NULL) {
4476 		mp = acqrec->ipsacq_mp;
4477 		acqrec->ipsacq_mp = mp->b_next;
4478 		mp->b_next = NULL;
4479 		ip_drop_packet(mp, B_FALSE, NULL, NULL,
4480 		    &ipdrops_sadb_acquire_timeout, &sadb_dropper);
4481 	}
4482 	mutex_exit(&acqrec->ipsacq_lock);
4483 
4484 	/* Free */
4485 	mutex_destroy(&acqrec->ipsacq_lock);
4486 	kmem_free(acqrec, sizeof (*acqrec));
4487 }
4488 
4489 /*
4490  * Destroy an acquire list fanout.
4491  */
4492 static void
4493 sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever)
4494 {
4495 	int i;
4496 	iacqf_t *list = *listp;
4497 
4498 	if (list == NULL)
4499 		return;
4500 
4501 	for (i = 0; i < numentries; i++) {
4502 		mutex_enter(&(list[i].iacqf_lock));
4503 		while (list[i].iacqf_ipsacq != NULL)
4504 			sadb_destroy_acquire(list[i].iacqf_ipsacq);
4505 		mutex_exit(&(list[i].iacqf_lock));
4506 		if (forever)
4507 			mutex_destroy(&(list[i].iacqf_lock));
4508 	}
4509 
4510 	if (forever) {
4511 		*listp = NULL;
4512 		kmem_free(list, numentries * sizeof (*list));
4513 	}
4514 }
4515 
4516 /*
4517  * Create an algorithm descriptor for an extended ACQUIRE.  Filter crypto
4518  * framework's view of reality vs. IPsec's.  EF's wins, BTW.
4519  */
4520 static uint8_t *
4521 sadb_new_algdesc(uint8_t *start, uint8_t *limit,
4522     sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
4523     uint8_t alg, uint16_t minbits, uint16_t maxbits)
4524 {
4525 	uint8_t *cur = start;
4526 	ipsec_alginfo_t *algp;
4527 	sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
4528 
4529 	cur += sizeof (*algdesc);
4530 	if (cur >= limit)
4531 		return (NULL);
4532 
4533 	ecomb->sadb_x_ecomb_numalgs++;
4534 
4535 	/*
4536 	 * Normalize vs. crypto framework's limits.  This way, you can specify
4537 	 * a stronger policy, and when the framework loads a stronger version,
4538 	 * you can just keep plowing w/o rewhacking your SPD.
4539 	 */
4540 	mutex_enter(&alg_lock);
4541 	algp = ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
4542 	    IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
4543 	if (minbits < algp->alg_ef_minbits)
4544 		minbits = algp->alg_ef_minbits;
4545 	if (maxbits > algp->alg_ef_maxbits)
4546 		maxbits = algp->alg_ef_maxbits;
4547 	mutex_exit(&alg_lock);
4548 
4549 	algdesc->sadb_x_algdesc_satype = satype;
4550 	algdesc->sadb_x_algdesc_algtype = algtype;
4551 	algdesc->sadb_x_algdesc_alg = alg;
4552 	algdesc->sadb_x_algdesc_minbits = minbits;
4553 	algdesc->sadb_x_algdesc_maxbits = maxbits;
4554 	algdesc->sadb_x_algdesc_reserved = 0;
4555 	return (cur);
4556 }
4557 
4558 /*
4559  * Convert the given ipsec_action_t into an ecomb starting at *ecomb
4560  * which must fit before *limit
4561  *
4562  * return NULL if we ran out of room or a pointer to the end of the ecomb.
4563  */
4564 static uint8_t *
4565 sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act)
4566 {
4567 	uint8_t *cur = start;
4568 	sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
4569 	ipsec_prot_t *ipp;
4570 
4571 	cur += sizeof (*ecomb);
4572 	if (cur >= limit)
4573 		return (NULL);
4574 
4575 	ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
4576 
4577 	ipp = &act->ipa_act.ipa_apply;
4578 
4579 	ecomb->sadb_x_ecomb_numalgs = 0;
4580 	ecomb->sadb_x_ecomb_reserved = 0;
4581 	ecomb->sadb_x_ecomb_reserved2 = 0;
4582 	/*
4583 	 * No limits on allocations, since we really don't support that
4584 	 * concept currently.
4585 	 */
4586 	ecomb->sadb_x_ecomb_soft_allocations = 0;
4587 	ecomb->sadb_x_ecomb_hard_allocations = 0;
4588 
4589 	/*
4590 	 * XXX TBD: Policy or global parameters will eventually be
4591 	 * able to fill in some of these.
4592 	 */
4593 	ecomb->sadb_x_ecomb_flags = 0;
4594 	ecomb->sadb_x_ecomb_soft_bytes = 0;
4595 	ecomb->sadb_x_ecomb_hard_bytes = 0;
4596 	ecomb->sadb_x_ecomb_soft_addtime = 0;
4597 	ecomb->sadb_x_ecomb_hard_addtime = 0;
4598 	ecomb->sadb_x_ecomb_soft_usetime = 0;
4599 	ecomb->sadb_x_ecomb_hard_usetime = 0;
4600 
4601 	if (ipp->ipp_use_ah) {
4602 		cur = sadb_new_algdesc(cur, limit, ecomb,
4603 		    SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
4604 		    ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits);
4605 		if (cur == NULL)
4606 			return (NULL);
4607 		ipsecah_fill_defs(ecomb);
4608 	}
4609 
4610 	if (ipp->ipp_use_esp) {
4611 		if (ipp->ipp_use_espa) {
4612 			cur = sadb_new_algdesc(cur, limit, ecomb,
4613 			    SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
4614 			    ipp->ipp_esp_auth_alg,
4615 			    ipp->ipp_espa_minbits,
4616 			    ipp->ipp_espa_maxbits);
4617 			if (cur == NULL)
4618 				return (NULL);
4619 		}
4620 
4621 		cur = sadb_new_algdesc(cur, limit, ecomb,
4622 		    SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
4623 		    ipp->ipp_encr_alg,
4624 		    ipp->ipp_espe_minbits,
4625 		    ipp->ipp_espe_maxbits);
4626 		if (cur == NULL)
4627 			return (NULL);
4628 		/* Fill in lifetimes if and only if AH didn't already... */
4629 		if (!ipp->ipp_use_ah)
4630 			ipsecesp_fill_defs(ecomb);
4631 	}
4632 
4633 	return (cur);
4634 }
4635 
4636 /*
4637  * Construct an extended ACQUIRE message based on a selector and the resulting
4638  * IPsec action.
4639  *
4640  * NOTE: This is used by both inverse ACQUIRE and actual ACQUIRE
4641  * generation. As a consequence, expect this function to evolve
4642  * rapidly.
4643  */
4644 static mblk_t *
4645 sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol,
4646     ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid)
4647 {
4648 	mblk_t *mp;
4649 	sadb_msg_t *samsg;
4650 	uint8_t *start, *cur, *end;
4651 	uint32_t *saddrptr, *daddrptr;
4652 	sa_family_t af;
4653 	sadb_prop_t *eprop;
4654 	ipsec_action_t *ap, *an;
4655 	ipsec_selkey_t *ipsl;
4656 	uint8_t proto, pfxlen;
4657 	uint16_t lport, rport;
4658 	uint32_t kmp, kmc;
4659 
4660 	/*
4661 	 * Find the action we want sooner rather than later..
4662 	 */
4663 	an = NULL;
4664 	if (pol == NULL) {
4665 		ap = act;
4666 	} else {
4667 		ap = pol->ipsp_act;
4668 
4669 		if (ap != NULL)
4670 			an = ap->ipa_next;
4671 	}
4672 
4673 	/*
4674 	 * Just take a swag for the allocation for now.	 We can always
4675 	 * alter it later.
4676 	 */
4677 #define	SADB_EXTENDED_ACQUIRE_SIZE	2048
4678 	mp = allocb(SADB_EXTENDED_ACQUIRE_SIZE, BPRI_HI);
4679 	if (mp == NULL)
4680 		return (NULL);
4681 
4682 	start = mp->b_rptr;
4683 	end = start + SADB_EXTENDED_ACQUIRE_SIZE;
4684 
4685 	cur = start;
4686 
4687 	samsg = (sadb_msg_t *)cur;
4688 	cur += sizeof (*samsg);
4689 
4690 	samsg->sadb_msg_version = PF_KEY_V2;
4691 	samsg->sadb_msg_type = SADB_ACQUIRE;
4692 	samsg->sadb_msg_errno = 0;
4693 	samsg->sadb_msg_reserved = 0;
4694 	samsg->sadb_msg_satype = 0;
4695 	samsg->sadb_msg_seq = seq;
4696 	samsg->sadb_msg_pid = pid;
4697 
4698 	if (tunnel_mode) {
4699 		/*
4700 		 * Form inner address extensions based NOT on the inner
4701 		 * selectors (i.e. the packet data), but on the policy's
4702 		 * selector key (i.e. the policy's selector information).
4703 		 *
4704 		 * NOTE:  The position of IPv4 and IPv6 addresses is the
4705 		 * same in ipsec_selkey_t (unless the compiler does very
4706 		 * strange things with unions, consult your local C language
4707 		 * lawyer for details).
4708 		 */
4709 		ipsl = &(pol->ipsp_sel->ipsl_key);
4710 		if (ipsl->ipsl_valid & IPSL_IPV4) {
4711 			af = AF_INET;
4712 			ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
4713 			ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
4714 		} else {
4715 			af = AF_INET6;
4716 			ASSERT(sel->ips_protocol == IPPROTO_IPV6);
4717 			ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
4718 		}
4719 
4720 		if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
4721 			saddrptr = (uint32_t *)(&ipsl->ipsl_local);
4722 			pfxlen = ipsl->ipsl_local_pfxlen;
4723 		} else {
4724 			saddrptr = (uint32_t *)(&ipv6_all_zeros);
4725 			pfxlen = 0;
4726 		}
4727 		/* XXX What about ICMP type/code? */
4728 		lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
4729 		    ipsl->ipsl_lport : 0;
4730 		proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
4731 		    ipsl->ipsl_proto : 0;
4732 
4733 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
4734 		    af, saddrptr, lport, proto, pfxlen);
4735 		if (cur == NULL) {
4736 			freeb(mp);
4737 			return (NULL);
4738 		}
4739 
4740 		if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
4741 			daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
4742 			pfxlen = ipsl->ipsl_remote_pfxlen;
4743 		} else {
4744 			daddrptr = (uint32_t *)(&ipv6_all_zeros);
4745 			pfxlen = 0;
4746 		}
4747 		/* XXX What about ICMP type/code? */
4748 		rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
4749 		    ipsl->ipsl_rport : 0;
4750 
4751 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
4752 		    af, daddrptr, rport, proto, pfxlen);
4753 		if (cur == NULL) {
4754 			freeb(mp);
4755 			return (NULL);
4756 		}
4757 		/*
4758 		 * TODO  - if we go to 3408's dream of transport mode IP-in-IP
4759 		 * _with_ inner-packet address selectors, we'll need to further
4760 		 * distinguish tunnel mode here.  For now, having inner
4761 		 * addresses and/or ports is sufficient.
4762 		 *
4763 		 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
4764 		 * outer addresses.
4765 		 */
4766 		proto = sel->ips_protocol;	/* Either _ENCAP or _IPV6 */
4767 		lport = rport = 0;
4768 	} else if ((ap != NULL) && (!ap->ipa_want_unique)) {
4769 		proto = 0;
4770 		lport = 0;
4771 		rport = 0;
4772 		if (pol != NULL) {
4773 			ipsl = &(pol->ipsp_sel->ipsl_key);
4774 			if (ipsl->ipsl_valid & IPSL_PROTOCOL)
4775 				proto = ipsl->ipsl_proto;
4776 			if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
4777 				rport = ipsl->ipsl_rport;
4778 			if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
4779 				lport = ipsl->ipsl_lport;
4780 		}
4781 	} else {
4782 		proto = sel->ips_protocol;
4783 		lport = sel->ips_local_port;
4784 		rport = sel->ips_remote_port;
4785 	}
4786 
4787 	af = sel->ips_isv4 ? AF_INET : AF_INET6;
4788 
4789 	/*
4790 	 * NOTE:  The position of IPv4 and IPv6 addresses is the same in
4791 	 * ipsec_selector_t.
4792 	 */
4793 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
4794 	    (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
4795 
4796 	if (cur == NULL) {
4797 		freeb(mp);
4798 		return (NULL);
4799 	}
4800 
4801 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
4802 	    (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
4803 
4804 	if (cur == NULL) {
4805 		freeb(mp);
4806 		return (NULL);
4807 	}
4808 
4809 	/*
4810 	 * This section will change a lot as policy evolves.
4811 	 * For now, it'll be relatively simple.
4812 	 */
4813 	eprop = (sadb_prop_t *)cur;
4814 	cur += sizeof (*eprop);
4815 	if (cur > end) {
4816 		/* no space left */
4817 		freeb(mp);
4818 		return (NULL);
4819 	}
4820 
4821 	eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
4822 	eprop->sadb_x_prop_ereserved = 0;
4823 	eprop->sadb_x_prop_numecombs = 0;
4824 	eprop->sadb_prop_replay = 32;	/* default */
4825 
4826 	kmc = kmp = 0;
4827 
4828 	for (; ap != NULL; ap = an) {
4829 		an = (pol != NULL) ? ap->ipa_next : NULL;
4830 
4831 		/*
4832 		 * Skip non-IPsec policies
4833 		 */
4834 		if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
4835 			continue;
4836 
4837 		if (ap->ipa_act.ipa_apply.ipp_km_proto)
4838 			kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
4839 		if (ap->ipa_act.ipa_apply.ipp_km_cookie)
4840 			kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
4841 		if (ap->ipa_act.ipa_apply.ipp_replay_depth) {
4842 			eprop->sadb_prop_replay =
4843 			    ap->ipa_act.ipa_apply.ipp_replay_depth;
4844 		}
4845 
4846 		cur = sadb_action_to_ecomb(cur, end, ap);
4847 		if (cur == NULL) { /* no space */
4848 			freeb(mp);
4849 			return (NULL);
4850 		}
4851 		eprop->sadb_x_prop_numecombs++;
4852 	}
4853 
4854 	if (eprop->sadb_x_prop_numecombs == 0) {
4855 		/*
4856 		 * This will happen if we fail to find a policy
4857 		 * allowing for IPsec processing.
4858 		 * Construct an error message.
4859 		 */
4860 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
4861 		samsg->sadb_msg_errno = ENOENT;
4862 		samsg->sadb_x_msg_diagnostic = 0;
4863 		return (mp);
4864 	}
4865 
4866 	if ((kmp != 0) || (kmc != 0)) {
4867 		cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
4868 		if (cur == NULL) {
4869 			freeb(mp);
4870 			return (NULL);
4871 		}
4872 	}
4873 
4874 	eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)eprop);
4875 	samsg->sadb_msg_len = SADB_8TO64(cur - start);
4876 	mp->b_wptr = cur;
4877 
4878 	return (mp);
4879 }
4880 
4881 /*
4882  * Generic setup of an RFC 2367 ACQUIRE message.  Caller sets satype.
4883  *
4884  * NOTE: This function acquires alg_lock as a side-effect if-and-only-if we
4885  * succeed (i.e. return non-NULL).  Caller MUST release it.  This is to
4886  * maximize code consolidation while preventing algorithm changes from messing
4887  * with the callers finishing touches on the ACQUIRE itself.
4888  */
4889 mblk_t *
4890 sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype)
4891 {
4892 	uint_t allocsize;
4893 	mblk_t *pfkeymp, *msgmp;
4894 	sa_family_t af;
4895 	uint8_t *cur, *end;
4896 	sadb_msg_t *samsg;
4897 	uint16_t sport_typecode;
4898 	uint16_t dport_typecode;
4899 	uint8_t check_proto;
4900 	boolean_t tunnel_mode = (acqrec->ipsacq_inneraddrfam != 0);
4901 
4902 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
4903 
4904 	pfkeymp = sadb_keysock_out(0);
4905 	if (pfkeymp == NULL)
4906 		return (NULL);
4907 
4908 	/*
4909 	 * First, allocate a basic ACQUIRE message
4910 	 */
4911 	allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
4912 	    sizeof (sadb_address_t) + sizeof (sadb_prop_t);
4913 
4914 	/* Make sure there's enough to cover both AF_INET and AF_INET6. */
4915 	allocsize += 2 * sizeof (struct sockaddr_in6);
4916 
4917 	mutex_enter(&alg_lock);
4918 	/* NOTE:  The lock is now held through to this function's return. */
4919 	allocsize += ipsec_nalgs[IPSEC_ALG_AUTH] *
4920 	    ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t);
4921 
4922 	if (tunnel_mode) {
4923 		/* Tunnel mode! */
4924 		allocsize += 2 * sizeof (sadb_address_t);
4925 		/* Enough to cover both AF_INET and AF_INET6. */
4926 		allocsize += 2 * sizeof (struct sockaddr_in6);
4927 	}
4928 
4929 	msgmp = allocb(allocsize, BPRI_HI);
4930 	if (msgmp == NULL) {
4931 		freeb(pfkeymp);
4932 		mutex_exit(&alg_lock);
4933 		return (NULL);
4934 	}
4935 
4936 	pfkeymp->b_cont = msgmp;
4937 	cur = msgmp->b_rptr;
4938 	end = cur + allocsize;
4939 	samsg = (sadb_msg_t *)cur;
4940 	cur += sizeof (sadb_msg_t);
4941 
4942 	af = acqrec->ipsacq_addrfam;
4943 	switch (af) {
4944 	case AF_INET:
4945 		check_proto = IPPROTO_ICMP;
4946 		break;
4947 	case AF_INET6:
4948 		check_proto = IPPROTO_ICMPV6;
4949 		break;
4950 	default:
4951 		/* This should never happen unless we have kernel bugs. */
4952 		cmn_err(CE_WARN,
4953 		    "sadb_setup_acquire:  corrupt ACQUIRE record.\n");
4954 		ASSERT(0);
4955 		mutex_exit(&alg_lock);
4956 		return (NULL);
4957 	}
4958 
4959 	samsg->sadb_msg_version = PF_KEY_V2;
4960 	samsg->sadb_msg_type = SADB_ACQUIRE;
4961 	samsg->sadb_msg_satype = satype;
4962 	samsg->sadb_msg_errno = 0;
4963 	samsg->sadb_msg_pid = 0;
4964 	samsg->sadb_msg_reserved = 0;
4965 	samsg->sadb_msg_seq = acqrec->ipsacq_seq;
4966 
4967 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
4968 
4969 	if ((acqrec->ipsacq_proto == check_proto) || tunnel_mode) {
4970 		sport_typecode = dport_typecode = 0;
4971 	} else {
4972 		sport_typecode = acqrec->ipsacq_srcport;
4973 		dport_typecode = acqrec->ipsacq_dstport;
4974 	}
4975 
4976 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
4977 	    acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto, 0);
4978 
4979 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
4980 	    acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto, 0);
4981 
4982 	if (tunnel_mode) {
4983 		sport_typecode = acqrec->ipsacq_srcport;
4984 		dport_typecode = acqrec->ipsacq_dstport;
4985 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
4986 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innersrc,
4987 		    sport_typecode, acqrec->ipsacq_inner_proto,
4988 		    acqrec->ipsacq_innersrcpfx);
4989 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
4990 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innerdst,
4991 		    dport_typecode, acqrec->ipsacq_inner_proto,
4992 		    acqrec->ipsacq_innerdstpfx);
4993 	}
4994 
4995 	/* XXX Insert identity information here. */
4996 
4997 	/* XXXMLS Insert sensitivity information here. */
4998 
4999 	if (cur != NULL)
5000 		samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr);
5001 	else
5002 		mutex_exit(&alg_lock);
5003 
5004 	return (pfkeymp);
5005 }
5006 
5007 /*
5008  * Given an SADB_GETSPI message, find an appropriately ranged SA and
5009  * allocate an SA.  If there are message improprieties, return (ipsa_t *)-1.
5010  * If there was a memory allocation error, return NULL.	 (Assume NULL !=
5011  * (ipsa_t *)-1).
5012  *
5013  * master_spi is passed in host order.
5014  */
5015 ipsa_t *
5016 sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic)
5017 {
5018 	sadb_address_t *src =
5019 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
5020 	    *dst = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
5021 	sadb_spirange_t *range =
5022 	    (sadb_spirange_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
5023 	struct sockaddr_in *ssa, *dsa;
5024 	struct sockaddr_in6 *ssa6, *dsa6;
5025 	uint32_t *srcaddr, *dstaddr;
5026 	sa_family_t af;
5027 	uint32_t add, min, max;
5028 
5029 	if (src == NULL) {
5030 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
5031 		return ((ipsa_t *)-1);
5032 	}
5033 	if (dst == NULL) {
5034 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
5035 		return ((ipsa_t *)-1);
5036 	}
5037 	if (range == NULL) {
5038 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_RANGE;
5039 		return ((ipsa_t *)-1);
5040 	}
5041 
5042 	min = ntohl(range->sadb_spirange_min);
5043 	max = ntohl(range->sadb_spirange_max);
5044 	dsa = (struct sockaddr_in *)(dst + 1);
5045 	dsa6 = (struct sockaddr_in6 *)dsa;
5046 
5047 	ssa = (struct sockaddr_in *)(src + 1);
5048 	ssa6 = (struct sockaddr_in6 *)ssa;
5049 	ASSERT(dsa->sin_family == ssa->sin_family);
5050 
5051 	srcaddr = ALL_ZEROES_PTR;
5052 	af = dsa->sin_family;
5053 	switch (af) {
5054 	case AF_INET:
5055 		if (src != NULL)
5056 			srcaddr = (uint32_t *)(&ssa->sin_addr);
5057 		dstaddr = (uint32_t *)(&dsa->sin_addr);
5058 		break;
5059 	case AF_INET6:
5060 		if (src != NULL)
5061 			srcaddr = (uint32_t *)(&ssa6->sin6_addr);
5062 		dstaddr = (uint32_t *)(&dsa6->sin6_addr);
5063 		break;
5064 	default:
5065 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
5066 		return ((ipsa_t *)-1);
5067 	}
5068 
5069 	if (master_spi < min || master_spi > max) {
5070 		/* Return a random value in the range. */
5071 		(void) random_get_pseudo_bytes((uint8_t *)&add, sizeof (add));
5072 		master_spi = min + (add % (max - min + 1));
5073 	}
5074 
5075 	/*
5076 	 * Since master_spi is passed in host order, we need to htonl() it
5077 	 * for the purposes of creating a new SA.
5078 	 */
5079 	return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af));
5080 }
5081 
5082 /*
5083  *
5084  * Locate an ACQUIRE and nuke it.  If I have an samsg that's larger than the
5085  * base header, just ignore it.	 Otherwise, lock down the whole ACQUIRE list
5086  * and scan for the sequence number in question.  I may wish to accept an
5087  * address pair with it, for easier searching.
5088  *
5089  * Caller frees the message, so we don't have to here.
5090  *
5091  * NOTE:	The ip_q parameter may be used in the future for ACQUIRE
5092  *		failures.
5093  */
5094 /* ARGSUSED */
5095 void
5096 sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *ip_q)
5097 {
5098 	int i;
5099 	ipsacq_t *acqrec;
5100 	iacqf_t *bucket;
5101 
5102 	/*
5103 	 * I only accept the base header for this!
5104 	 * Though to be honest, requiring the dst address would help
5105 	 * immensely.
5106 	 *
5107 	 * XXX	There are already cases where I can get the dst address.
5108 	 */
5109 	if (samsg->sadb_msg_len > SADB_8TO64(sizeof (*samsg)))
5110 		return;
5111 
5112 	/*
5113 	 * Using the samsg->sadb_msg_seq, find the ACQUIRE record, delete it,
5114 	 * (and in the future send a message to IP with the appropriate error
5115 	 * number).
5116 	 *
5117 	 * Q: Do I want to reject if pid != 0?
5118 	 */
5119 
5120 	for (i = 0; i < sp->s_v4.sdb_hashsize; i++) {
5121 		bucket = &sp->s_v4.sdb_acq[i];
5122 		mutex_enter(&bucket->iacqf_lock);
5123 		for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5124 		    acqrec = acqrec->ipsacq_next) {
5125 			if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5126 				break;	/* for acqrec... loop. */
5127 		}
5128 		if (acqrec != NULL)
5129 			break;	/* for i = 0... loop. */
5130 
5131 		mutex_exit(&bucket->iacqf_lock);
5132 	}
5133 
5134 	if (acqrec == NULL) {
5135 		for (i = 0; i < sp->s_v6.sdb_hashsize; i++) {
5136 			bucket = &sp->s_v6.sdb_acq[i];
5137 			mutex_enter(&bucket->iacqf_lock);
5138 			for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5139 			    acqrec = acqrec->ipsacq_next) {
5140 				if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5141 					break;	/* for acqrec... loop. */
5142 			}
5143 			if (acqrec != NULL)
5144 				break;	/* for i = 0... loop. */
5145 
5146 			mutex_exit(&bucket->iacqf_lock);
5147 		}
5148 	}
5149 
5150 
5151 	if (acqrec == NULL)
5152 		return;
5153 
5154 	/*
5155 	 * What do I do with the errno and IP?	I may need mp's services a
5156 	 * little more.	 See sadb_destroy_acquire() for future directions
5157 	 * beyond free the mblk chain on the acquire record.
5158 	 */
5159 
5160 	ASSERT(&bucket->iacqf_lock == acqrec->ipsacq_linklock);
5161 	sadb_destroy_acquire(acqrec);
5162 	/* Have to exit mutex here, because of breaking out of for loop. */
5163 	mutex_exit(&bucket->iacqf_lock);
5164 }
5165 
5166 /*
5167  * The following functions work with the replay windows of an SA.  They assume
5168  * the ipsa->ipsa_replay_arr is an array of uint64_t, and that the bit vector
5169  * represents the highest sequence number packet received, and back
5170  * (ipsa->ipsa_replay_wsize) packets.
5171  */
5172 
5173 /*
5174  * Is the replay bit set?
5175  */
5176 static boolean_t
5177 ipsa_is_replay_set(ipsa_t *ipsa, uint32_t offset)
5178 {
5179 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
5180 
5181 	return ((bit & ipsa->ipsa_replay_arr[offset >> 6]) ? B_TRUE : B_FALSE);
5182 }
5183 
5184 /*
5185  * Shift the bits of the replay window over.
5186  */
5187 static void
5188 ipsa_shift_replay(ipsa_t *ipsa, uint32_t shift)
5189 {
5190 	int i;
5191 	int jump = ((shift - 1) >> 6) + 1;
5192 
5193 	if (shift == 0)
5194 		return;
5195 
5196 	for (i = (ipsa->ipsa_replay_wsize - 1) >> 6; i >= 0; i--) {
5197 		if (i + jump <= (ipsa->ipsa_replay_wsize - 1) >> 6) {
5198 			ipsa->ipsa_replay_arr[i + jump] |=
5199 			    ipsa->ipsa_replay_arr[i] >> (64 - (shift & 63));
5200 		}
5201 		ipsa->ipsa_replay_arr[i] <<= shift;
5202 	}
5203 }
5204 
5205 /*
5206  * Set a bit in the bit vector.
5207  */
5208 static void
5209 ipsa_set_replay(ipsa_t *ipsa, uint32_t offset)
5210 {
5211 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
5212 
5213 	ipsa->ipsa_replay_arr[offset >> 6] |= bit;
5214 }
5215 
5216 #define	SADB_MAX_REPLAY_VALUE 0xffffffff
5217 
5218 /*
5219  * Assume caller has NOT done ntohl() already on seq.  Check to see
5220  * if replay sequence number "seq" has been seen already.
5221  */
5222 boolean_t
5223 sadb_replay_check(ipsa_t *ipsa, uint32_t seq)
5224 {
5225 	boolean_t rc;
5226 	uint32_t diff;
5227 
5228 	if (ipsa->ipsa_replay_wsize == 0)
5229 		return (B_TRUE);
5230 
5231 	/*
5232 	 * NOTE:  I've already checked for 0 on the wire in sadb_replay_peek().
5233 	 */
5234 
5235 	/* Convert sequence number into host order before holding the mutex. */
5236 	seq = ntohl(seq);
5237 
5238 	mutex_enter(&ipsa->ipsa_lock);
5239 
5240 	/* Initialize inbound SA's ipsa_replay field to last one received. */
5241 	if (ipsa->ipsa_replay == 0)
5242 		ipsa->ipsa_replay = 1;
5243 
5244 	if (seq > ipsa->ipsa_replay) {
5245 		/*
5246 		 * I have received a new "highest value received".  Shift
5247 		 * the replay window over.
5248 		 */
5249 		diff = seq - ipsa->ipsa_replay;
5250 		if (diff < ipsa->ipsa_replay_wsize) {
5251 			/* In replay window, shift bits over. */
5252 			ipsa_shift_replay(ipsa, diff);
5253 		} else {
5254 			/* WAY FAR AHEAD, clear bits and start again. */
5255 			bzero(ipsa->ipsa_replay_arr,
5256 			    sizeof (ipsa->ipsa_replay_arr));
5257 		}
5258 		ipsa_set_replay(ipsa, 0);
5259 		ipsa->ipsa_replay = seq;
5260 		rc = B_TRUE;
5261 		goto done;
5262 	}
5263 	diff = ipsa->ipsa_replay - seq;
5264 	if (diff >= ipsa->ipsa_replay_wsize || ipsa_is_replay_set(ipsa, diff)) {
5265 		rc = B_FALSE;
5266 		goto done;
5267 	}
5268 	/* Set this packet as seen. */
5269 	ipsa_set_replay(ipsa, diff);
5270 
5271 	rc = B_TRUE;
5272 done:
5273 	mutex_exit(&ipsa->ipsa_lock);
5274 	return (rc);
5275 }
5276 
5277 /*
5278  * "Peek" and see if we should even bother going through the effort of
5279  * running an authentication check on the sequence number passed in.
5280  * this takes into account packets that are below the replay window,
5281  * and collisions with already replayed packets.  Return B_TRUE if it
5282  * is okay to proceed, B_FALSE if this packet should be dropped immediately.
5283  * Assume same byte-ordering as sadb_replay_check.
5284  */
5285 boolean_t
5286 sadb_replay_peek(ipsa_t *ipsa, uint32_t seq)
5287 {
5288 	boolean_t rc = B_FALSE;
5289 	uint32_t diff;
5290 
5291 	if (ipsa->ipsa_replay_wsize == 0)
5292 		return (B_TRUE);
5293 
5294 	/*
5295 	 * 0 is 0, regardless of byte order... :)
5296 	 *
5297 	 * If I get 0 on the wire (and there is a replay window) then the
5298 	 * sender most likely wrapped.	This ipsa may need to be marked or
5299 	 * something.
5300 	 */
5301 	if (seq == 0)
5302 		return (B_FALSE);
5303 
5304 	seq = ntohl(seq);
5305 	mutex_enter(&ipsa->ipsa_lock);
5306 	if (seq < ipsa->ipsa_replay - ipsa->ipsa_replay_wsize &&
5307 	    ipsa->ipsa_replay >= ipsa->ipsa_replay_wsize)
5308 		goto done;
5309 
5310 	/*
5311 	 * If I've hit 0xffffffff, then quite honestly, I don't need to
5312 	 * bother with formalities.  I'm not accepting any more packets
5313 	 * on this SA.
5314 	 */
5315 	if (ipsa->ipsa_replay == SADB_MAX_REPLAY_VALUE) {
5316 		/*
5317 		 * Since we're already holding the lock, update the
5318 		 * expire time ala. sadb_replay_delete() and return.
5319 		 */
5320 		ipsa->ipsa_hardexpiretime = (time_t)1;
5321 		goto done;
5322 	}
5323 
5324 	if (seq <= ipsa->ipsa_replay) {
5325 		/*
5326 		 * This seq is in the replay window.  I'm not below it,
5327 		 * because I already checked for that above!
5328 		 */
5329 		diff = ipsa->ipsa_replay - seq;
5330 		if (ipsa_is_replay_set(ipsa, diff))
5331 			goto done;
5332 	}
5333 	/* Else return B_TRUE, I'm going to advance the window. */
5334 
5335 	rc = B_TRUE;
5336 done:
5337 	mutex_exit(&ipsa->ipsa_lock);
5338 	return (rc);
5339 }
5340 
5341 /*
5342  * Delete a single SA.
5343  *
5344  * For now, use the quick-and-dirty trick of making the association's
5345  * hard-expire lifetime (time_t)1, ensuring deletion by the *_ager().
5346  */
5347 void
5348 sadb_replay_delete(ipsa_t *assoc)
5349 {
5350 	mutex_enter(&assoc->ipsa_lock);
5351 	assoc->ipsa_hardexpiretime = (time_t)1;
5352 	mutex_exit(&assoc->ipsa_lock);
5353 }
5354 
5355 /*
5356  * Given a queue that presumably points to IP, send a T_BIND_REQ for _proto_
5357  * down.  The caller will handle the T_BIND_ACK locally.
5358  */
5359 boolean_t
5360 sadb_t_bind_req(queue_t *q, int proto)
5361 {
5362 	struct T_bind_req *tbr;
5363 	mblk_t *mp;
5364 
5365 	mp = allocb(sizeof (struct T_bind_req) + 1, BPRI_HI);
5366 	if (mp == NULL) {
5367 		/* cmn_err(CE_WARN, */
5368 		/* "sadb_t_bind_req(%d): couldn't allocate mblk\n", proto); */
5369 		return (B_FALSE);
5370 	}
5371 	mp->b_datap->db_type = M_PCPROTO;
5372 	tbr = (struct T_bind_req *)mp->b_rptr;
5373 	mp->b_wptr += sizeof (struct T_bind_req);
5374 	tbr->PRIM_type = T_BIND_REQ;
5375 	tbr->ADDR_length = 0;
5376 	tbr->ADDR_offset = 0;
5377 	tbr->CONIND_number = 0;
5378 	*mp->b_wptr = (uint8_t)proto;
5379 	mp->b_wptr++;
5380 
5381 	putnext(q, mp);
5382 	return (B_TRUE);
5383 }
5384 
5385 /*
5386  * Special front-end to ipsec_rl_strlog() dealing with SA failure.
5387  * this is designed to take only a format string with "* %x * %s *", so
5388  * that "spi" is printed first, then "addr" is converted using inet_pton().
5389  *
5390  * This is abstracted out to save the stack space for only when inet_pton()
5391  * is called.  Make sure "spi" is in network order; it usually is when this
5392  * would get called.
5393  */
5394 void
5395 ipsec_assocfailure(short mid, short sid, char level, ushort_t sl, char *fmt,
5396     uint32_t spi, void *addr, int af)
5397 {
5398 	char buf[INET6_ADDRSTRLEN];
5399 
5400 	ASSERT(af == AF_INET6 || af == AF_INET);
5401 
5402 	ipsec_rl_strlog(mid, sid, level, sl, fmt, ntohl(spi),
5403 	    inet_ntop(af, addr, buf, sizeof (buf)));
5404 }
5405 
5406 /*
5407  * Fills in a reference to the policy, if any, from the conn, in *ppp
5408  * Releases a reference to the passed conn_t.
5409  */
5410 static void
5411 ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
5412 {
5413 	ipsec_policy_t	*pp;
5414 	ipsec_latch_t	*ipl = connp->conn_latch;
5415 
5416 	if ((ipl != NULL) && (ipl->ipl_out_policy != NULL)) {
5417 		pp = ipl->ipl_out_policy;
5418 		IPPOL_REFHOLD(pp);
5419 	} else {
5420 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, NULL, sel);
5421 	}
5422 	*ppp = pp;
5423 	CONN_DEC_REF(connp);
5424 }
5425 
5426 /*
5427  * The following functions scan through active conn_t structures
5428  * and return a reference to the best-matching policy it can find.
5429  * Caller must release the reference.
5430  */
5431 static void
5432 ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp)
5433 {
5434 	connf_t *connfp;
5435 	conn_t *connp = NULL;
5436 	ipsec_selector_t portonly;
5437 
5438 	bzero((void*)&portonly, sizeof (portonly));
5439 
5440 	if (sel->ips_local_port == 0)
5441 		return;
5442 
5443 	connfp = &ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port)];
5444 	mutex_enter(&connfp->connf_lock);
5445 
5446 	if (sel->ips_isv4) {
5447 		connp = connfp->connf_head;
5448 		while (connp != NULL) {
5449 			if (IPCL_UDP_MATCH(connp, sel->ips_local_port,
5450 			    sel->ips_local_addr_v4, sel->ips_remote_port,
5451 			    sel->ips_remote_addr_v4))
5452 				break;
5453 			connp = connp->conn_next;
5454 		}
5455 
5456 		if (connp == NULL) {
5457 			/* Try port-only match in IPv6. */
5458 			portonly.ips_local_port = sel->ips_local_port;
5459 			sel = &portonly;
5460 		}
5461 	}
5462 
5463 	if (connp == NULL) {
5464 		connp = connfp->connf_head;
5465 		while (connp != NULL) {
5466 			if (IPCL_UDP_MATCH_V6(connp, sel->ips_local_port,
5467 			    sel->ips_local_addr_v6, sel->ips_remote_port,
5468 			    sel->ips_remote_addr_v6))
5469 				break;
5470 			connp = connp->conn_next;
5471 		}
5472 
5473 		if (connp == NULL) {
5474 			mutex_exit(&connfp->connf_lock);
5475 			return;
5476 		}
5477 	}
5478 
5479 	CONN_INC_REF(connp);
5480 	mutex_exit(&connfp->connf_lock);
5481 
5482 	ipsec_conn_pol(sel, connp, ppp);
5483 }
5484 
5485 static conn_t *
5486 ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel)
5487 {
5488 	connf_t *connfp;
5489 	conn_t *connp = NULL;
5490 	const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
5491 
5492 	if (sel->ips_local_port == 0)
5493 		return (NULL);
5494 
5495 	connfp = &ipcl_bind_fanout[IPCL_BIND_HASH(sel->ips_local_port)];
5496 	mutex_enter(&connfp->connf_lock);
5497 
5498 	if (sel->ips_isv4) {
5499 		connp = connfp->connf_head;
5500 		while (connp != NULL) {
5501 			if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
5502 			    sel->ips_local_addr_v4, pptr[1]))
5503 				break;
5504 			connp = connp->conn_next;
5505 		}
5506 
5507 		if (connp == NULL) {
5508 			/* Match to all-zeroes. */
5509 			v6addrmatch = &ipv6_all_zeros;
5510 		}
5511 	}
5512 
5513 	if (connp == NULL) {
5514 		connp = connfp->connf_head;
5515 		while (connp != NULL) {
5516 			if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
5517 			    *v6addrmatch, pptr[1]))
5518 				break;
5519 			connp = connp->conn_next;
5520 		}
5521 
5522 		if (connp == NULL) {
5523 			mutex_exit(&connfp->connf_lock);
5524 			return (NULL);
5525 		}
5526 	}
5527 
5528 	CONN_INC_REF(connp);
5529 	mutex_exit(&connfp->connf_lock);
5530 	return (connp);
5531 }
5532 
5533 static void
5534 ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp)
5535 {
5536 	connf_t 	*connfp;
5537 	conn_t		*connp;
5538 	uint32_t	ports;
5539 	uint16_t	*pptr = (uint16_t *)&ports;
5540 
5541 	/*
5542 	 * Find TCP state in the following order:
5543 	 * 1.) Connected conns.
5544 	 * 2.) Listeners.
5545 	 *
5546 	 * Even though #2 will be the common case for inbound traffic, only
5547 	 * following this order insures correctness.
5548 	 */
5549 
5550 	if (sel->ips_local_port == 0)
5551 		return;
5552 
5553 	/*
5554 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
5555 	 * See ipsec_construct_inverse_acquire() for details.
5556 	 */
5557 	pptr[0] = sel->ips_remote_port;
5558 	pptr[1] = sel->ips_local_port;
5559 
5560 	connfp = &ipcl_conn_fanout[IPCL_CONN_HASH(sel->ips_remote_addr_v4,
5561 	    ports)];
5562 	mutex_enter(&connfp->connf_lock);
5563 	connp = connfp->connf_head;
5564 
5565 	if (sel->ips_isv4) {
5566 		while (connp != NULL) {
5567 			if (IPCL_CONN_MATCH(connp, IPPROTO_TCP,
5568 			    sel->ips_remote_addr_v4, sel->ips_local_addr_v4,
5569 			    ports))
5570 				break;
5571 			connp = connp->conn_next;
5572 		}
5573 	} else {
5574 		while (connp != NULL) {
5575 			if (IPCL_CONN_MATCH_V6(connp, IPPROTO_TCP,
5576 			    sel->ips_remote_addr_v6, sel->ips_local_addr_v6,
5577 			    ports))
5578 				break;
5579 			connp = connp->conn_next;
5580 		}
5581 	}
5582 
5583 	if (connp != NULL) {
5584 		CONN_INC_REF(connp);
5585 		mutex_exit(&connfp->connf_lock);
5586 	} else {
5587 		mutex_exit(&connfp->connf_lock);
5588 
5589 		/* Try the listen hash. */
5590 		if ((connp = ipsec_find_listen_conn(pptr, sel)) == NULL)
5591 			return;
5592 	}
5593 
5594 	ipsec_conn_pol(sel, connp, ppp);
5595 }
5596 
5597 static void
5598 ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp)
5599 {
5600 	conn_t		*connp;
5601 	uint32_t	ports;
5602 	uint16_t	*pptr = (uint16_t *)&ports;
5603 
5604 	/*
5605 	 * Find SCP state in the following order:
5606 	 * 1.) Connected conns.
5607 	 * 2.) Listeners.
5608 	 *
5609 	 * Even though #2 will be the common case for inbound traffic, only
5610 	 * following this order insures correctness.
5611 	 */
5612 
5613 	if (sel->ips_local_port == 0)
5614 		return;
5615 
5616 	/*
5617 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
5618 	 * See ipsec_construct_inverse_acquire() for details.
5619 	 */
5620 	pptr[0] = sel->ips_remote_port;
5621 	pptr[1] = sel->ips_local_port;
5622 
5623 	if (sel->ips_isv4) {
5624 		in6_addr_t	src, dst;
5625 
5626 		IN6_IPADDR_TO_V4MAPPED(sel->ips_remote_addr_v4, &dst);
5627 		IN6_IPADDR_TO_V4MAPPED(sel->ips_local_addr_v4, &src);
5628 		connp = sctp_find_conn(&dst, &src, ports, 0, ALL_ZONES);
5629 	} else {
5630 		connp = sctp_find_conn(&sel->ips_remote_addr_v6,
5631 		    &sel->ips_local_addr_v6, ports, 0, ALL_ZONES);
5632 	}
5633 	if (connp == NULL)
5634 		return;
5635 	ipsec_conn_pol(sel, connp, ppp);
5636 }
5637 
5638 /*
5639  * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
5640  * Returns 0 or errno, and always sets *diagnostic to something appropriate
5641  * to PF_KEY.
5642  *
5643  * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
5644  * ignore prefix lengths in the address extension.  Since we match on first-
5645  * entered policies, this shouldn't matter.  Also, since we normalize prefix-
5646  * set addresses to mask out the lower bits, we should get a suitable search
5647  * key for the SPD anyway.  This is the function to change if the assumption
5648  * about suitable search keys is wrong.
5649  */
5650 static int
5651 ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
5652     sadb_address_t *dstext, int *diagnostic)
5653 {
5654 	struct sockaddr_in *src, *dst;
5655 	struct sockaddr_in6 *src6, *dst6;
5656 
5657 	*diagnostic = 0;
5658 
5659 	bzero(sel, sizeof (*sel));
5660 	sel->ips_protocol = srcext->sadb_address_proto;
5661 	dst = (struct sockaddr_in *)(dstext + 1);
5662 	if (dst->sin_family == AF_INET6) {
5663 		dst6 = (struct sockaddr_in6 *)dst;
5664 		src6 = (struct sockaddr_in6 *)(srcext + 1);
5665 		if (src6->sin6_family != AF_INET6) {
5666 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
5667 			return (EINVAL);
5668 		}
5669 		sel->ips_remote_addr_v6 = dst6->sin6_addr;
5670 		sel->ips_local_addr_v6 = src6->sin6_addr;
5671 		if (sel->ips_protocol == IPPROTO_ICMPV6) {
5672 			sel->ips_is_icmp_inv_acq = 1;
5673 		} else {
5674 			sel->ips_remote_port = dst6->sin6_port;
5675 			sel->ips_local_port = src6->sin6_port;
5676 		}
5677 		sel->ips_isv4 = B_FALSE;
5678 	} else {
5679 		src = (struct sockaddr_in *)(srcext + 1);
5680 		if (src->sin_family != AF_INET) {
5681 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
5682 			return (EINVAL);
5683 		}
5684 		sel->ips_remote_addr_v4 = dst->sin_addr.s_addr;
5685 		sel->ips_local_addr_v4 = src->sin_addr.s_addr;
5686 		if (sel->ips_protocol == IPPROTO_ICMP) {
5687 			sel->ips_is_icmp_inv_acq = 1;
5688 		} else {
5689 			sel->ips_remote_port = dst->sin_port;
5690 			sel->ips_local_port = src->sin_port;
5691 		}
5692 		sel->ips_isv4 = B_TRUE;
5693 	}
5694 	return (0);
5695 }
5696 
5697 /*
5698  * We have encapsulation.
5699  * - Lookup tun_t by address and look for an associated
5700  *   tunnel policy
5701  * - If there are inner selectors
5702  *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
5703  *   - Look up tunnel policy based on selectors
5704  * - Else
5705  *   - Sanity check the negotation
5706  *   - If appropriate, fall through to global policy
5707  */
5708 static int
5709 ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
5710     sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
5711     int *diagnostic)
5712 {
5713 	int err;
5714 	ipsec_policy_head_t *polhead;
5715 
5716 	/* Check for inner selectors and act appropriately */
5717 
5718 	if (innsrcext != NULL) {
5719 		/* Inner selectors present */
5720 		ASSERT(inndstext != NULL);
5721 		if ((itp == NULL) ||
5722 		    (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
5723 		    (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
5724 			/*
5725 			 * If inner packet selectors, we must have negotiate
5726 			 * tunnel and active policy.  If the tunnel has
5727 			 * transport-mode policy set on it, or has no policy,
5728 			 * fail.
5729 			 */
5730 			return (ENOENT);
5731 		} else {
5732 			/*
5733 			 * Reset "sel" to indicate inner selectors.  Pass
5734 			 * inner PF_KEY address extensions for this to happen.
5735 			 */
5736 			err = ipsec_get_inverse_acquire_sel(sel,
5737 			    innsrcext, inndstext, diagnostic);
5738 			if (err != 0) {
5739 				ITP_REFRELE(itp);
5740 				return (err);
5741 			}
5742 			/*
5743 			 * Now look for a tunnel policy based on those inner
5744 			 * selectors.  (Common code is below.)
5745 			 */
5746 		}
5747 	} else {
5748 		/* No inner selectors present */
5749 		if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
5750 			/*
5751 			 * Transport mode negotiation with no tunnel policy
5752 			 * configured - return to indicate a global policy
5753 			 * check is needed.
5754 			 */
5755 			if (itp != NULL) {
5756 				ITP_REFRELE(itp);
5757 			}
5758 			return (0);
5759 		} else if (itp->itp_flags & ITPF_P_TUNNEL) {
5760 			/* Tunnel mode set with no inner selectors. */
5761 			ITP_REFRELE(itp);
5762 			return (ENOENT);
5763 		}
5764 		/*
5765 		 * Else, this is a tunnel policy configured with ifconfig(1m)
5766 		 * or "negotiate transport" with ipsecconf(1m).  We have an
5767 		 * itp with policy set based on any match, so don't bother
5768 		 * changing fields in "sel".
5769 		 */
5770 	}
5771 
5772 	ASSERT(itp != NULL);
5773 	polhead = itp->itp_policy;
5774 	ASSERT(polhead != NULL);
5775 	rw_enter(&polhead->iph_lock, RW_READER);
5776 	*ppp = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, sel);
5777 	rw_exit(&polhead->iph_lock);
5778 	ITP_REFRELE(itp);
5779 
5780 	/*
5781 	 * Don't default to global if we didn't find a matching policy entry.
5782 	 * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
5783 	 */
5784 	if (*ppp == NULL)
5785 		return (ENOENT);
5786 
5787 	return (0);
5788 }
5789 
5790 static void
5791 ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp)
5792 {
5793 	boolean_t	isv4 = sel->ips_isv4;
5794 	connf_t		*connfp;
5795 	conn_t		*connp;
5796 
5797 	if (isv4) {
5798 		connfp = &ipcl_proto_fanout[sel->ips_protocol];
5799 	} else {
5800 		connfp = &ipcl_proto_fanout_v6[sel->ips_protocol];
5801 	}
5802 
5803 	mutex_enter(&connfp->connf_lock);
5804 	for (connp = connfp->connf_head; connp != NULL;
5805 	    connp = connp->conn_next) {
5806 		if (!((isv4 && !((connp->conn_src == 0 ||
5807 		    connp->conn_src == sel->ips_local_addr_v4) &&
5808 		    (connp->conn_rem == 0 ||
5809 		    connp->conn_rem == sel->ips_remote_addr_v4))) ||
5810 		    (!isv4 && !((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
5811 		    IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6,
5812 		    &sel->ips_local_addr_v6)) &&
5813 		    (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) ||
5814 		    IN6_ARE_ADDR_EQUAL(&connp->conn_remv6,
5815 			&sel->ips_remote_addr_v6)))))) {
5816 			break;
5817 		}
5818 	}
5819 	if (connp == NULL) {
5820 		mutex_exit(&connfp->connf_lock);
5821 		return;
5822 	}
5823 
5824 	CONN_INC_REF(connp);
5825 	mutex_exit(&connfp->connf_lock);
5826 
5827 	ipsec_conn_pol(sel, connp, ppp);
5828 }
5829 
5830 /*
5831  * Construct an inverse ACQUIRE reply based on:
5832  *
5833  * 1.) Current global policy.
5834  * 2.) An conn_t match depending on what all was passed in the extv[].
5835  * 3.) A tunnel's policy head.
5836  * ...
5837  * N.) Other stuff TBD (e.g. identities)
5838  *
5839  * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
5840  * in this function so the caller can extract them where appropriately.
5841  *
5842  * The SRC address is the local one - just like an outbound ACQUIRE message.
5843  */
5844 mblk_t *
5845 ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[])
5846 {
5847 	int err;
5848 	int diagnostic;
5849 	sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
5850 	    *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
5851 	    *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
5852 	    *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
5853 	struct sockaddr_in6 *src, *dst;
5854 	struct sockaddr_in6 *isrc, *idst;
5855 	ipsec_tun_pol_t *itp = NULL;
5856 	ipsec_policy_t *pp = NULL;
5857 	ipsec_selector_t sel, isel;
5858 	mblk_t *retmp;
5859 
5860 	/* Normalize addresses */
5861 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0) ==
5862 	    KS_IN_ADDR_UNKNOWN) {
5863 		err = EINVAL;
5864 		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
5865 		goto bail;
5866 	}
5867 	src = (struct sockaddr_in6 *)(srcext + 1);
5868 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0) ==
5869 	    KS_IN_ADDR_UNKNOWN) {
5870 		err = EINVAL;
5871 		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
5872 		goto bail;
5873 	}
5874 	dst = (struct sockaddr_in6 *)(dstext + 1);
5875 	if (src->sin6_family != dst->sin6_family) {
5876 		err = EINVAL;
5877 		diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
5878 		goto bail;
5879 	}
5880 
5881 	/* Check for tunnel mode and act appropriately */
5882 	if (innsrcext != NULL) {
5883 		if (inndstext == NULL) {
5884 			err = EINVAL;
5885 			diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
5886 			goto bail;
5887 		}
5888 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
5889 			(sadb_ext_t *)innsrcext, 0) == KS_IN_ADDR_UNKNOWN) {
5890 			err = EINVAL;
5891 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
5892 			goto bail;
5893 		}
5894 		isrc = (struct sockaddr_in6 *)(innsrcext + 1);
5895 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
5896 			(sadb_ext_t *)inndstext, 0) == KS_IN_ADDR_UNKNOWN) {
5897 			err = EINVAL;
5898 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
5899 			goto bail;
5900 		}
5901 		idst = (struct sockaddr_in6 *)(inndstext + 1);
5902 		if (isrc->sin6_family != idst->sin6_family) {
5903 			err = EINVAL;
5904 			diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
5905 			goto bail;
5906 		}
5907 		if (isrc->sin6_family != AF_INET &&
5908 		    isrc->sin6_family != AF_INET6) {
5909 			err = EINVAL;
5910 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
5911 			goto bail;
5912 		}
5913 	} else if (inndstext != NULL) {
5914 			err = EINVAL;
5915 			diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
5916 			goto bail;
5917 	}
5918 
5919 	/* Get selectors first, based on outer addresses */
5920 	err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
5921 	if (err != 0)
5922 		goto bail;
5923 
5924 	/* Check for tunnel mode mismatches. */
5925 	if (innsrcext != NULL &&
5926 	    ((isrc->sin6_family == AF_INET &&
5927 		sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
5928 		(isrc->sin6_family == AF_INET6 &&
5929 		    sel.ips_protocol != IPPROTO_IPV6 &&
5930 		    sel.ips_protocol != 0))) {
5931 		err = EPROTOTYPE;
5932 		goto bail;
5933 	}
5934 
5935 	/*
5936 	 * Okay, we have the addresses and other selector information.
5937 	 * Let's first find a conn...
5938 	 */
5939 	pp = NULL;
5940 	switch (sel.ips_protocol) {
5941 	case IPPROTO_TCP:
5942 		ipsec_tcp_pol(&sel, &pp);
5943 		break;
5944 	case IPPROTO_UDP:
5945 		ipsec_udp_pol(&sel, &pp);
5946 		break;
5947 	case IPPROTO_SCTP:
5948 		ipsec_sctp_pol(&sel, &pp);
5949 		break;
5950 	case IPPROTO_ENCAP:
5951 	case IPPROTO_IPV6:
5952 		rw_enter(&itp_get_byaddr_rw_lock, RW_READER);
5953 		/*
5954 		 * Assume sel.ips_remote_addr_* has the right address at
5955 		 * that exact position.
5956 		 */
5957 		itp = itp_get_byaddr((uint32_t *)(&sel.ips_local_addr_v6),
5958 		    (uint32_t *)(&sel.ips_remote_addr_v6), src->sin6_family);
5959 		rw_exit(&itp_get_byaddr_rw_lock);
5960 		if (innsrcext == NULL) {
5961 			/*
5962 			 * Transport-mode tunnel, make sure we fake out isel
5963 			 * to contain something based on the outer protocol.
5964 			 */
5965 			bzero(&isel, sizeof (isel));
5966 			isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
5967 		} /* Else isel is initialized by ipsec_tun_pol(). */
5968 		err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
5969 		    &diagnostic);
5970 		/*
5971 		 * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
5972 		 * may be.
5973 		 */
5974 		if (err != 0)
5975 			goto bail;
5976 		break;
5977 	default:
5978 		ipsec_oth_pol(&sel, &pp);
5979 		break;
5980 	}
5981 
5982 	/*
5983 	 * If we didn't find a matching conn_t or other policy head, take a
5984 	 * look in the global policy.
5985 	 */
5986 	if (pp == NULL) {
5987 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, NULL, &sel);
5988 		if (pp == NULL) {
5989 			/* There's no global policy. */
5990 			err = ENOENT;
5991 			diagnostic = 0;
5992 			goto bail;
5993 		}
5994 	}
5995 
5996 	/*
5997 	 * Now that we have a policy entry/widget, construct an ACQUIRE
5998 	 * message based on that, fix fields where appropriate,
5999 	 * and return the message.
6000 	 */
6001 	retmp = sadb_extended_acquire(&sel, pp, NULL,
6002 	    (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)),
6003 	    samsg->sadb_msg_seq, samsg->sadb_msg_pid);
6004 	if (pp != NULL) {
6005 		IPPOL_REFRELE(pp);
6006 	}
6007 	if (retmp != NULL) {
6008 		return (retmp);
6009 	} else {
6010 		err = ENOMEM;
6011 		diagnostic = 0;
6012 	}
6013 bail:
6014 	samsg->sadb_msg_errno = (uint8_t)err;
6015 	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
6016 	return (NULL);
6017 }
6018 
6019 /*
6020  * ipsa_lpkt is a one-element queue, only manipulated by casptr within
6021  * the next two functions.
6022  *
6023  * These functions loop calling casptr() until the swap "happens",
6024  * turning a compare-and-swap op into an atomic swap operation.
6025  */
6026 
6027 /*
6028  * sadb_set_lpkt: Atomically swap in a value to ipsa->ipsa_lpkt and
6029  * freemsg the previous value.  free clue: freemsg(NULL) is safe.
6030  */
6031 
6032 void
6033 sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt)
6034 {
6035 	mblk_t *opkt;
6036 
6037 	membar_producer();
6038 	do
6039 		opkt = ipsa->ipsa_lpkt;
6040 	while (casptr(&ipsa->ipsa_lpkt, opkt, npkt) != opkt);
6041 
6042 	ip_drop_packet(opkt, B_TRUE, NULL, NULL, &ipdrops_sadb_inlarval_replace,
6043 	    &sadb_dropper);
6044 }
6045 
6046 /*
6047  * sadb_clear_lpkt: Atomically clear ipsa->ipsa_lpkt and return the
6048  * previous value.
6049  */
6050 
6051 mblk_t *
6052 sadb_clear_lpkt(ipsa_t *ipsa)
6053 {
6054 	mblk_t *opkt;
6055 
6056 	do
6057 		opkt = ipsa->ipsa_lpkt;
6058 	while (casptr(&ipsa->ipsa_lpkt, opkt, NULL) != opkt);
6059 
6060 	return (opkt);
6061 }
6062 
6063 /*
6064  * Walker callback used by sadb_alg_update() to free/create crypto
6065  * context template when a crypto software provider is removed or
6066  * added.
6067  */
6068 
6069 struct sadb_update_alg_state {
6070 	ipsec_algtype_t alg_type;
6071 	uint8_t alg_id;
6072 	boolean_t is_added;
6073 };
6074 
6075 static void
6076 sadb_alg_update_cb(isaf_t *head, ipsa_t *entry, void *cookie)
6077 {
6078 	struct sadb_update_alg_state *update_state =
6079 	    (struct sadb_update_alg_state *)cookie;
6080 	crypto_ctx_template_t *ctx_tmpl = NULL;
6081 
6082 	ASSERT(MUTEX_HELD(&head->isaf_lock));
6083 
6084 	if (entry->ipsa_state == IPSA_STATE_LARVAL)
6085 		return;
6086 
6087 	mutex_enter(&entry->ipsa_lock);
6088 
6089 	switch (update_state->alg_type) {
6090 	case IPSEC_ALG_AUTH:
6091 		if (entry->ipsa_auth_alg == update_state->alg_id)
6092 			ctx_tmpl = &entry->ipsa_authtmpl;
6093 		break;
6094 	case IPSEC_ALG_ENCR:
6095 		if (entry->ipsa_encr_alg == update_state->alg_id)
6096 			ctx_tmpl = &entry->ipsa_encrtmpl;
6097 		break;
6098 	default:
6099 		ctx_tmpl = NULL;
6100 	}
6101 
6102 	if (ctx_tmpl == NULL) {
6103 		mutex_exit(&entry->ipsa_lock);
6104 		return;
6105 	}
6106 
6107 	/*
6108 	 * The context template of the SA may be affected by the change
6109 	 * of crypto provider.
6110 	 */
6111 	if (update_state->is_added) {
6112 		/* create the context template if not already done */
6113 		if (*ctx_tmpl == NULL) {
6114 			(void) ipsec_create_ctx_tmpl(entry,
6115 			    update_state->alg_type);
6116 		}
6117 	} else {
6118 		/*
6119 		 * The crypto provider was removed. If the context template
6120 		 * exists but it is no longer valid, free it.
6121 		 */
6122 		if (*ctx_tmpl != NULL)
6123 			ipsec_destroy_ctx_tmpl(entry, update_state->alg_type);
6124 	}
6125 
6126 	mutex_exit(&entry->ipsa_lock);
6127 }
6128 
6129 /*
6130  * Invoked by IP when an software crypto provider has been updated.
6131  * The type and id of the corresponding algorithm is passed as argument.
6132  * is_added is B_TRUE if the provider was added, B_FALSE if it was
6133  * removed. The function updates the SADB and free/creates the
6134  * context templates associated with SAs if needed.
6135  */
6136 
6137 #define	SADB_ALG_UPDATE_WALK(sadb, table) \
6138     sadb_walker((sadb).table, (sadb).sdb_hashsize, sadb_alg_update_cb, \
6139 	&update_state)
6140 
6141 void
6142 sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added)
6143 {
6144 	struct sadb_update_alg_state update_state;
6145 
6146 	update_state.alg_type = alg_type;
6147 	update_state.alg_id = alg_id;
6148 	update_state.is_added = is_added;
6149 
6150 	if (alg_type == IPSEC_ALG_AUTH) {
6151 		/* walk the AH tables only for auth. algorithm changes */
6152 		SADB_ALG_UPDATE_WALK(ah_sadb.s_v4, sdb_of);
6153 		SADB_ALG_UPDATE_WALK(ah_sadb.s_v4, sdb_if);
6154 		SADB_ALG_UPDATE_WALK(ah_sadb.s_v6, sdb_of);
6155 		SADB_ALG_UPDATE_WALK(ah_sadb.s_v6, sdb_if);
6156 	}
6157 
6158 	/* walk the ESP tables */
6159 	SADB_ALG_UPDATE_WALK(esp_sadb.s_v4, sdb_of);
6160 	SADB_ALG_UPDATE_WALK(esp_sadb.s_v4, sdb_if);
6161 	SADB_ALG_UPDATE_WALK(esp_sadb.s_v6, sdb_of);
6162 	SADB_ALG_UPDATE_WALK(esp_sadb.s_v6, sdb_if);
6163 }
6164 
6165 /*
6166  * Creates a context template for the specified SA. This function
6167  * is called when an SA is created and when a context template needs
6168  * to be created due to a change of software provider.
6169  */
6170 int
6171 ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
6172 {
6173 	ipsec_alginfo_t *alg;
6174 	crypto_mechanism_t mech;
6175 	crypto_key_t *key;
6176 	crypto_ctx_template_t *sa_tmpl;
6177 	int rv;
6178 
6179 	ASSERT(MUTEX_HELD(&alg_lock));
6180 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
6181 
6182 	/* get pointers to the algorithm info, context template, and key */
6183 	switch (alg_type) {
6184 	case IPSEC_ALG_AUTH:
6185 		key = &sa->ipsa_kcfauthkey;
6186 		sa_tmpl = &sa->ipsa_authtmpl;
6187 		alg = ipsec_alglists[alg_type][sa->ipsa_auth_alg];
6188 		break;
6189 	case IPSEC_ALG_ENCR:
6190 		key = &sa->ipsa_kcfencrkey;
6191 		sa_tmpl = &sa->ipsa_encrtmpl;
6192 		alg = ipsec_alglists[alg_type][sa->ipsa_encr_alg];
6193 		break;
6194 	default:
6195 		alg = NULL;
6196 	}
6197 
6198 	if (alg == NULL || !ALG_VALID(alg))
6199 		return (EINVAL);
6200 
6201 	/* initialize the mech info structure for the framework */
6202 	ASSERT(alg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
6203 	mech.cm_type = alg->alg_mech_type;
6204 	mech.cm_param = NULL;
6205 	mech.cm_param_len = 0;
6206 
6207 	/* create a new context template */
6208 	rv = crypto_create_ctx_template(&mech, key, sa_tmpl, KM_NOSLEEP);
6209 
6210 	/*
6211 	 * CRYPTO_MECH_NOT_SUPPORTED can be returned if only hardware
6212 	 * providers are available for that mechanism. In that case
6213 	 * we don't fail, and will generate the context template from
6214 	 * the framework callback when a software provider for that
6215 	 * mechanism registers.
6216 	 *
6217 	 * The context template is assigned the special value
6218 	 * IPSEC_CTX_TMPL_ALLOC if the allocation failed due to a
6219 	 * lack of memory. No attempt will be made to use
6220 	 * the context template if it is set to this value.
6221 	 */
6222 	if (rv == CRYPTO_HOST_MEMORY) {
6223 		*sa_tmpl = IPSEC_CTX_TMPL_ALLOC;
6224 	} else if (rv != CRYPTO_SUCCESS) {
6225 		*sa_tmpl = NULL;
6226 		if (rv != CRYPTO_MECH_NOT_SUPPORTED)
6227 			return (EINVAL);
6228 	}
6229 
6230 	return (0);
6231 }
6232 
6233 /*
6234  * Destroy the context template of the specified algorithm type
6235  * of the specified SA. Must be called while holding the SA lock.
6236  */
6237 void
6238 ipsec_destroy_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
6239 {
6240 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
6241 
6242 	if (alg_type == IPSEC_ALG_AUTH) {
6243 		if (sa->ipsa_authtmpl == IPSEC_CTX_TMPL_ALLOC)
6244 			sa->ipsa_authtmpl = NULL;
6245 		else if (sa->ipsa_authtmpl != NULL) {
6246 			crypto_destroy_ctx_template(sa->ipsa_authtmpl);
6247 			sa->ipsa_authtmpl = NULL;
6248 		}
6249 	} else {
6250 		ASSERT(alg_type == IPSEC_ALG_ENCR);
6251 		if (sa->ipsa_encrtmpl == IPSEC_CTX_TMPL_ALLOC)
6252 			sa->ipsa_encrtmpl = NULL;
6253 		else if (sa->ipsa_encrtmpl != NULL) {
6254 			crypto_destroy_ctx_template(sa->ipsa_encrtmpl);
6255 			sa->ipsa_encrtmpl = NULL;
6256 		}
6257 	}
6258 }
6259 
6260 /*
6261  * Use the kernel crypto framework to check the validity of a key received
6262  * via keysock. Returns 0 if the key is OK, -1 otherwise.
6263  */
6264 int
6265 ipsec_check_key(crypto_mech_type_t mech_type, sadb_key_t *sadb_key,
6266     boolean_t is_auth, int *diag)
6267 {
6268 	crypto_mechanism_t mech;
6269 	crypto_key_t crypto_key;
6270 	int crypto_rc;
6271 
6272 	mech.cm_type = mech_type;
6273 	mech.cm_param = NULL;
6274 	mech.cm_param_len = 0;
6275 
6276 	crypto_key.ck_format = CRYPTO_KEY_RAW;
6277 	crypto_key.ck_data = sadb_key + 1;
6278 	crypto_key.ck_length = sadb_key->sadb_key_bits;
6279 
6280 	crypto_rc = crypto_key_check(&mech, &crypto_key);
6281 
6282 	switch (crypto_rc) {
6283 	case CRYPTO_SUCCESS:
6284 		return (0);
6285 	case CRYPTO_MECHANISM_INVALID:
6286 	case CRYPTO_MECH_NOT_SUPPORTED:
6287 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AALG :
6288 		    SADB_X_DIAGNOSTIC_BAD_EALG;
6289 		break;
6290 	case CRYPTO_KEY_SIZE_RANGE:
6291 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AKEYBITS :
6292 		    SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
6293 		break;
6294 	case CRYPTO_WEAK_KEY:
6295 		*diag = is_auth ? SADB_X_DIAGNOSTIC_WEAK_AKEY :
6296 		    SADB_X_DIAGNOSTIC_WEAK_EKEY;
6297 		break;
6298 	}
6299 
6300 	return (-1);
6301 }
6302 
6303 /* ARGSUSED */
6304 static void
6305 sadb_clear_timeouts_walker(isaf_t *head, ipsa_t *ipsa, void *q)
6306 {
6307 	if (!(ipsa->ipsa_flags & IPSA_F_NATT))
6308 		return;
6309 
6310 	mutex_enter(&ipsa->ipsa_lock);
6311 	if (ipsa->ipsa_natt_q != q) {
6312 		mutex_exit(&ipsa->ipsa_lock);
6313 		return;
6314 	}
6315 
6316 	(void) quntimeout(ipsa->ipsa_natt_q, ipsa->ipsa_natt_ka_timer);
6317 
6318 	ipsa->ipsa_natt_ka_timer = 0;
6319 	ipsa->ipsa_natt_q = NULL;
6320 	mutex_exit(&ipsa->ipsa_lock);
6321 }
6322 
6323 void
6324 sadb_clear_timeouts(queue_t *q)
6325 {
6326 	sadb_t *sp = &esp_sadb.s_v4;
6327 
6328 	sadb_walker(sp->sdb_if, sp->sdb_hashsize,
6329 	    sadb_clear_timeouts_walker, q);
6330 }
6331