xref: /openbsd/sys/netinet/ip_spd.c (revision d415bd75)
1 /* $OpenBSD: ip_spd.c,v 1.118 2023/04/22 20:51:56 mvs Exp $ */
2 /*
3  * The author of this code is Angelos D. Keromytis (angelos@cis.upenn.edu)
4  *
5  * Copyright (c) 2000-2001 Angelos D. Keromytis.
6  *
7  * Permission to use, copy, and modify this software with or without fee
8  * is hereby granted, provided that this entire notice is included in
9  * all copies of any software which is or includes a copy or
10  * modification of this software.
11  * You may use this code under the GNU public license if you so wish. Please
12  * contribute changes back to the authors under this freer than GPL license
13  * so that we may further the use of strong encryption without limitations to
14  * all.
15  *
16  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
17  * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
18  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
19  * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
20  * PURPOSE.
21  */
22 
23 #include <sys/param.h>
24 #include <sys/systm.h>
25 #include <sys/mbuf.h>
26 #include <sys/socket.h>
27 #include <sys/kernel.h>
28 #include <sys/socketvar.h>
29 #include <sys/pool.h>
30 #include <sys/timeout.h>
31 
32 #include <net/route.h>
33 #include <net/netisr.h>
34 
35 #include <netinet/in.h>
36 #include <netinet/ip.h>
37 #include <netinet/ip_var.h>
38 #include <netinet/in_pcb.h>
39 #include <netinet/ip_ipsp.h>
40 #include <net/pfkeyv2.h>
41 
42 int	ipsp_spd_inp(struct mbuf *, struct inpcb *, struct ipsec_policy *,
43 	    struct tdb **);
44 int	ipsp_acquire_sa(struct ipsec_policy *, union sockaddr_union *,
45 	    union sockaddr_union *, struct sockaddr_encap *, struct mbuf *);
46 int	ipsp_pending_acquire(struct ipsec_policy *, union sockaddr_union *);
47 void	ipsp_delete_acquire_timer(void *);
48 void	ipsp_delete_acquire_locked(struct ipsec_acquire *);
49 void	ipsp_delete_acquire(struct ipsec_acquire *);
50 void	ipsp_unref_acquire_locked(struct ipsec_acquire *);
51 
52 struct pool ipsec_policy_pool;
53 struct pool ipsec_acquire_pool;
54 
55 /*
56  * For tdb_walk() calling tdb_delete_locked() we need lock order
57  * tdb_sadb_mtx before ipo_tdb_mtx.
58  */
59 struct mutex ipo_tdb_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
60 
61 /* Protected by the NET_LOCK(). */
62 struct radix_node_head **spd_tables;
63 unsigned int spd_table_max;
64 
65 struct mutex ipsec_acquire_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
66 struct ipsec_acquire_head ipsec_acquire_head =
67     TAILQ_HEAD_INITIALIZER(ipsec_acquire_head);
68 
69 struct radix_node_head *
70 spd_table_get(unsigned int rtableid)
71 {
72 	unsigned int rdomain;
73 
74 	NET_ASSERT_LOCKED();
75 
76 	if (spd_tables == NULL)
77 		return (NULL);
78 
79 	rdomain = rtable_l2(rtableid);
80 	if (rdomain > spd_table_max)
81 		return (NULL);
82 
83 	return (spd_tables[rdomain]);
84 }
85 
86 struct radix_node_head *
87 spd_table_add(unsigned int rtableid)
88 {
89 	struct radix_node_head *rnh = NULL;
90 	unsigned int rdomain;
91 	void *p;
92 
93 	NET_ASSERT_LOCKED_EXCLUSIVE();
94 
95 	rdomain = rtable_l2(rtableid);
96 	if (spd_tables == NULL || rdomain > spd_table_max) {
97 		if ((p = mallocarray(rdomain + 1, sizeof(*rnh),
98 		    M_RTABLE, M_NOWAIT|M_ZERO)) == NULL)
99 			return (NULL);
100 
101 		if (spd_tables != NULL) {
102 			memcpy(p, spd_tables, sizeof(*rnh) * (spd_table_max+1));
103 			free(spd_tables, M_RTABLE,
104 			    sizeof(*rnh) * (spd_table_max+1));
105 		}
106 		spd_tables = p;
107 		spd_table_max = rdomain;
108 	}
109 
110 	if (spd_tables[rdomain] == NULL) {
111 		if (rn_inithead((void **)&rnh,
112 		    offsetof(struct sockaddr_encap, sen_type)) == 0)
113 			rnh = NULL;
114 		spd_tables[rdomain] = rnh;
115 	}
116 
117 	return (spd_tables[rdomain]);
118 }
119 
120 int
121 spd_table_walk(unsigned int rtableid,
122     int (*func)(struct ipsec_policy *, void *, unsigned int), void *arg)
123 {
124 	struct radix_node_head *rnh;
125 	int (*walker)(struct radix_node *, void *, u_int) = (void *)func;
126 	int error;
127 
128 	rnh = spd_table_get(rtableid);
129 	if (rnh == NULL)
130 		return (0);
131 
132 	/* EGAIN means the tree changed. */
133 	while ((error = rn_walktree(rnh, walker, arg)) == EAGAIN)
134 		continue;
135 
136 	return (error);
137 }
138 
139 /*
140  * Lookup at the SPD based on the headers contained on the mbuf. The second
141  * argument indicates what protocol family the header at the beginning of
142  * the mbuf is. hlen is the offset of the transport protocol header
143  * in the mbuf.
144  *
145  * Return combinations (of return value and *tdbout):
146  * - -EINVAL -> silently drop the packet
147  * - errno   -> drop packet and return error
148  * - 0/NULL  -> no IPsec required on packet
149  * - 0/TDB   -> do IPsec
150  *
151  * In the case of incoming flows, only the first three combinations are
152  * returned.
153  */
154 int
155 ipsp_spd_lookup(struct mbuf *m, int af, int hlen, int direction,
156     struct tdb *tdbin, struct inpcb *inp, struct tdb **tdbout,
157     struct ipsec_ids *ipsecflowinfo_ids)
158 {
159 	struct radix_node_head *rnh;
160 	struct radix_node *rn;
161 	union sockaddr_union sdst, ssrc;
162 	struct sockaddr_encap *ddst, dst;
163 	struct ipsec_policy *ipo;
164 	struct ipsec_ids *ids = NULL;
165 	int error, signore = 0, dignore = 0;
166 	u_int rdomain;
167 
168 	NET_ASSERT_LOCKED();
169 
170 	/*
171 	 * If there are no flows in place, there's no point
172 	 * continuing with the SPD lookup.
173 	 */
174 	if (!ipsec_in_use)
175 		return ipsp_spd_inp(m, inp, NULL, tdbout);
176 
177 	/*
178 	 * If an input packet is destined to a BYPASS socket, just accept it.
179 	 */
180 	if ((inp != NULL) && (direction == IPSP_DIRECTION_IN) &&
181 	    (inp->inp_seclevel[SL_ESP_TRANS] == IPSEC_LEVEL_BYPASS) &&
182 	    (inp->inp_seclevel[SL_ESP_NETWORK] == IPSEC_LEVEL_BYPASS) &&
183 	    (inp->inp_seclevel[SL_AUTH] == IPSEC_LEVEL_BYPASS)) {
184 		if (tdbout != NULL)
185 			*tdbout = NULL;
186 		return 0;
187 	}
188 
189 	memset(&dst, 0, sizeof(dst));
190 	memset(&sdst, 0, sizeof(union sockaddr_union));
191 	memset(&ssrc, 0, sizeof(union sockaddr_union));
192 	ddst = (struct sockaddr_encap *)&dst;
193 	ddst->sen_family = PF_KEY;
194 	ddst->sen_len = SENT_LEN;
195 
196 	switch (af) {
197 	case AF_INET:
198 		if (hlen < sizeof (struct ip) || m->m_pkthdr.len < hlen)
199 			return EINVAL;
200 
201 		ddst->sen_direction = direction;
202 		ddst->sen_type = SENT_IP4;
203 
204 		m_copydata(m, offsetof(struct ip, ip_src),
205 		    sizeof(struct in_addr), (caddr_t) &(ddst->sen_ip_src));
206 		m_copydata(m, offsetof(struct ip, ip_dst),
207 		    sizeof(struct in_addr), (caddr_t) &(ddst->sen_ip_dst));
208 		m_copydata(m, offsetof(struct ip, ip_p), sizeof(u_int8_t),
209 		    (caddr_t) &(ddst->sen_proto));
210 
211 		sdst.sin.sin_family = ssrc.sin.sin_family = AF_INET;
212 		sdst.sin.sin_len = ssrc.sin.sin_len =
213 		    sizeof(struct sockaddr_in);
214 		ssrc.sin.sin_addr = ddst->sen_ip_src;
215 		sdst.sin.sin_addr = ddst->sen_ip_dst;
216 
217 		/*
218 		 * If TCP/UDP, extract the port numbers to use in the lookup.
219 		 */
220 		switch (ddst->sen_proto) {
221 		case IPPROTO_UDP:
222 		case IPPROTO_TCP:
223 			/* Make sure there's enough data in the packet. */
224 			if (m->m_pkthdr.len < hlen + 2 * sizeof(u_int16_t))
225 				return EINVAL;
226 
227 			/*
228 			 * Luckily, the offset of the src/dst ports in
229 			 * both the UDP and TCP headers is the same (first
230 			 * two 16-bit values in the respective headers),
231 			 * so we can just copy them.
232 			 */
233 			m_copydata(m, hlen, sizeof(u_int16_t),
234 			    (caddr_t) &(ddst->sen_sport));
235 			m_copydata(m, hlen + sizeof(u_int16_t),
236 			    sizeof(u_int16_t),
237 			    (caddr_t) &(ddst->sen_dport));
238 			break;
239 
240 		default:
241 			ddst->sen_sport = 0;
242 			ddst->sen_dport = 0;
243 		}
244 
245 		break;
246 
247 #ifdef INET6
248 	case AF_INET6:
249 		if (hlen < sizeof (struct ip6_hdr) || m->m_pkthdr.len < hlen)
250 			return EINVAL;
251 
252 		ddst->sen_type = SENT_IP6;
253 		ddst->sen_ip6_direction = direction;
254 
255 		m_copydata(m, offsetof(struct ip6_hdr, ip6_src),
256 		    sizeof(struct in6_addr),
257 		    (caddr_t) &(ddst->sen_ip6_src));
258 		m_copydata(m, offsetof(struct ip6_hdr, ip6_dst),
259 		    sizeof(struct in6_addr),
260 		    (caddr_t) &(ddst->sen_ip6_dst));
261 		m_copydata(m, offsetof(struct ip6_hdr, ip6_nxt),
262 		    sizeof(u_int8_t),
263 		    (caddr_t) &(ddst->sen_ip6_proto));
264 
265 		sdst.sin6.sin6_family = ssrc.sin6.sin6_family = AF_INET6;
266 		sdst.sin6.sin6_len = ssrc.sin6.sin6_len =
267 		    sizeof(struct sockaddr_in6);
268 		in6_recoverscope(&ssrc.sin6, &ddst->sen_ip6_src);
269 		in6_recoverscope(&sdst.sin6, &ddst->sen_ip6_dst);
270 
271 		/*
272 		 * If TCP/UDP, extract the port numbers to use in the lookup.
273 		 */
274 		switch (ddst->sen_ip6_proto) {
275 		case IPPROTO_UDP:
276 		case IPPROTO_TCP:
277 			/* Make sure there's enough data in the packet. */
278 			if (m->m_pkthdr.len < hlen + 2 * sizeof(u_int16_t))
279 				return EINVAL;
280 
281 			/*
282 			 * Luckily, the offset of the src/dst ports in
283 			 * both the UDP and TCP headers is the same
284 			 * (first two 16-bit values in the respective
285 			 * headers), so we can just copy them.
286 			 */
287 			m_copydata(m, hlen, sizeof(u_int16_t),
288 			    (caddr_t) &(ddst->sen_ip6_sport));
289 			m_copydata(m, hlen + sizeof(u_int16_t),
290 			    sizeof(u_int16_t),
291 			    (caddr_t) &(ddst->sen_ip6_dport));
292 			break;
293 
294 		default:
295 			ddst->sen_ip6_sport = 0;
296 			ddst->sen_ip6_dport = 0;
297 		}
298 
299 		break;
300 #endif /* INET6 */
301 
302 	default:
303 		return EAFNOSUPPORT;
304 	}
305 
306 	/* Actual SPD lookup. */
307 	rdomain = rtable_l2(m->m_pkthdr.ph_rtableid);
308 	if ((rnh = spd_table_get(rdomain)) == NULL ||
309 	    (rn = rn_match((caddr_t)&dst, rnh)) == NULL) {
310 		/*
311 		 * Return whatever the socket requirements are, there are no
312 		 * system-wide policies.
313 		 */
314 		return ipsp_spd_inp(m, inp, NULL, tdbout);
315 	}
316 	ipo = (struct ipsec_policy *)rn;
317 
318 	switch (ipo->ipo_type) {
319 	case IPSP_PERMIT:
320 		return ipsp_spd_inp(m, inp, ipo, tdbout);
321 
322 	case IPSP_DENY:
323 		return EHOSTUNREACH;
324 
325 	case IPSP_IPSEC_USE:
326 	case IPSP_IPSEC_ACQUIRE:
327 	case IPSP_IPSEC_REQUIRE:
328 	case IPSP_IPSEC_DONTACQ:
329 		/* Nothing more needed here. */
330 		break;
331 
332 	default:
333 		return EINVAL;
334 	}
335 
336 	/* Check for non-specific destination in the policy. */
337 	switch (ipo->ipo_dst.sa.sa_family) {
338 	case AF_INET:
339 		if ((ipo->ipo_dst.sin.sin_addr.s_addr == INADDR_ANY) ||
340 		    (ipo->ipo_dst.sin.sin_addr.s_addr == INADDR_BROADCAST))
341 			dignore = 1;
342 		break;
343 
344 #ifdef INET6
345 	case AF_INET6:
346 		if ((IN6_IS_ADDR_UNSPECIFIED(&ipo->ipo_dst.sin6.sin6_addr)) ||
347 		    (memcmp(&ipo->ipo_dst.sin6.sin6_addr, &in6mask128,
348 		    sizeof(in6mask128)) == 0))
349 			dignore = 1;
350 		break;
351 #endif /* INET6 */
352 	}
353 
354 	/* Likewise for source. */
355 	switch (ipo->ipo_src.sa.sa_family) {
356 	case AF_INET:
357 		if (ipo->ipo_src.sin.sin_addr.s_addr == INADDR_ANY)
358 			signore = 1;
359 		break;
360 
361 #ifdef INET6
362 	case AF_INET6:
363 		if (IN6_IS_ADDR_UNSPECIFIED(&ipo->ipo_src.sin6.sin6_addr))
364 			signore = 1;
365 		break;
366 #endif /* INET6 */
367 	}
368 
369 	/* Do we have a cached entry ? If so, check if it's still valid. */
370 	mtx_enter(&ipo_tdb_mtx);
371 	if (ipo->ipo_tdb != NULL &&
372 	    (ipo->ipo_tdb->tdb_flags & TDBF_INVALID)) {
373 		TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
374 		    ipo_tdb_next);
375 		tdb_unref(ipo->ipo_tdb);
376 		ipo->ipo_tdb = NULL;
377 	}
378 	mtx_leave(&ipo_tdb_mtx);
379 
380 	/* Outgoing packet policy check. */
381 	if (direction == IPSP_DIRECTION_OUT) {
382 		/*
383 		 * If the packet is destined for the policy-specified
384 		 * gateway/endhost, and the socket has the BYPASS
385 		 * option set, skip IPsec processing.
386 		 */
387 		if ((inp != NULL) &&
388 		    (inp->inp_seclevel[SL_ESP_TRANS] == IPSEC_LEVEL_BYPASS) &&
389 		    (inp->inp_seclevel[SL_ESP_NETWORK] ==
390 			IPSEC_LEVEL_BYPASS) &&
391 		    (inp->inp_seclevel[SL_AUTH] == IPSEC_LEVEL_BYPASS)) {
392 			/* Direct match. */
393 			if (dignore ||
394 			    !memcmp(&sdst, &ipo->ipo_dst, sdst.sa.sa_len)) {
395 				if (tdbout != NULL)
396 					*tdbout = NULL;
397 				return 0;
398 			}
399 		}
400 
401 		/* Check that the cached TDB (if present), is appropriate. */
402 		mtx_enter(&ipo_tdb_mtx);
403 		if (ipo->ipo_tdb != NULL) {
404 			if ((ipo->ipo_last_searched <= ipsec_last_added) ||
405 			    (ipo->ipo_sproto != ipo->ipo_tdb->tdb_sproto) ||
406 			    memcmp(dignore ? &sdst : &ipo->ipo_dst,
407 			    &ipo->ipo_tdb->tdb_dst,
408 			    ipo->ipo_tdb->tdb_dst.sa.sa_len))
409 				goto nomatchout;
410 
411 			if (!ipsp_aux_match(ipo->ipo_tdb,
412 			    ipsecflowinfo_ids? ipsecflowinfo_ids: ipo->ipo_ids,
413 			    &ipo->ipo_addr, &ipo->ipo_mask))
414 				goto nomatchout;
415 
416 			/* Cached entry is good. */
417 			error = ipsp_spd_inp(m, inp, ipo, tdbout);
418 			mtx_leave(&ipo_tdb_mtx);
419 			return error;
420 
421   nomatchout:
422 			/* Cached TDB was not good. */
423 			TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
424 			    ipo_tdb_next);
425 			tdb_unref(ipo->ipo_tdb);
426 			ipo->ipo_tdb = NULL;
427 			ipo->ipo_last_searched = 0;
428 		}
429 
430 		/*
431 		 * If no SA has been added since the last time we did a
432 		 * lookup, there's no point searching for one. However, if the
433 		 * destination gateway is left unspecified (or is all-1's),
434 		 * always lookup since this is a generic-match rule
435 		 * (otherwise, we can have situations where SAs to some
436 		 * destinations exist but are not used, possibly leading to an
437 		 * explosion in the number of acquired SAs).
438 		 */
439 		if (ipo->ipo_last_searched <= ipsec_last_added)	{
440 			struct tdb *tdbp_new;
441 
442 			/* "Touch" the entry. */
443 			if (dignore == 0)
444 				ipo->ipo_last_searched = getuptime();
445 
446 			/* gettdb() takes tdb_sadb_mtx, preserve lock order */
447 			mtx_leave(&ipo_tdb_mtx);
448 			/* Find an appropriate SA from the existing ones. */
449 			tdbp_new = gettdbbydst(rdomain,
450 			    dignore ? &sdst : &ipo->ipo_dst,
451 			    ipo->ipo_sproto,
452 			    ipsecflowinfo_ids? ipsecflowinfo_ids: ipo->ipo_ids,
453 			    &ipo->ipo_addr, &ipo->ipo_mask);
454 			ids = NULL;
455 			mtx_enter(&ipo_tdb_mtx);
456 			if ((tdbp_new != NULL) &&
457 			    (tdbp_new->tdb_flags & TDBF_DELETED)) {
458 				/*
459 				 * After tdb_delete() has released ipo_tdb_mtx
460 				 * in tdb_unlink(), never add a new one.
461 				 * tdb_cleanspd() has to catch all of them.
462 				 */
463 				tdb_unref(tdbp_new);
464 				tdbp_new = NULL;
465 			}
466 			if (ipo->ipo_tdb != NULL) {
467 				/* Remove cached TDB from parallel thread. */
468 				TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head,
469 				    ipo, ipo_tdb_next);
470 				tdb_unref(ipo->ipo_tdb);
471 			}
472 			ipo->ipo_tdb = tdbp_new;
473 			if (ipo->ipo_tdb != NULL) {
474 				/* gettdbbydst() has already refcounted tdb */
475 				TAILQ_INSERT_TAIL(
476 				    &ipo->ipo_tdb->tdb_policy_head,
477 				    ipo, ipo_tdb_next);
478 				error = ipsp_spd_inp(m, inp, ipo, tdbout);
479 				mtx_leave(&ipo_tdb_mtx);
480 				return error;
481 			}
482 		}
483 		mtx_leave(&ipo_tdb_mtx);
484 
485 		/* So, we don't have an SA -- just a policy. */
486 		switch (ipo->ipo_type) {
487 		case IPSP_IPSEC_REQUIRE:
488 			/* Acquire SA through key management. */
489 			if (ipsp_acquire_sa(ipo,
490 			    dignore ? &sdst : &ipo->ipo_dst,
491 			    signore ? NULL : &ipo->ipo_src, ddst, m) != 0) {
492 				return EACCES;
493 			}
494 
495 			/* FALLTHROUGH */
496 		case IPSP_IPSEC_DONTACQ:
497 			return -EINVAL;  /* Silently drop packet. */
498 
499 		case IPSP_IPSEC_ACQUIRE:
500 			/* Acquire SA through key management. */
501 			ipsp_acquire_sa(ipo, dignore ? &sdst : &ipo->ipo_dst,
502 			    signore ? NULL : &ipo->ipo_src, ddst, NULL);
503 
504 			/* FALLTHROUGH */
505 		case IPSP_IPSEC_USE:
506 			return ipsp_spd_inp(m, inp, ipo, tdbout);
507 		}
508 	} else { /* IPSP_DIRECTION_IN */
509 		if (tdbin != NULL) {
510 			/*
511 			 * Special case for bundled IPcomp/ESP SAs:
512 			 * 1) only IPcomp flows are loaded into kernel
513 			 * 2) input processing processes ESP SA first
514 			 * 3) then optional IPcomp processing happens
515 			 * 4) we only update m_tag for ESP
516 			 * => 'tdbin' is always set to ESP SA
517 			 * => flow has ipo_proto for IPcomp
518 			 * So if 'tdbin' points to an ESP SA and this 'tdbin' is
519 			 * bundled with an IPcomp SA, then we replace 'tdbin'
520 			 * with the IPcomp SA at tdbin->tdb_inext.
521 			 */
522 			if (ipo->ipo_sproto == IPPROTO_IPCOMP &&
523 			    tdbin->tdb_sproto == IPPROTO_ESP &&
524 			    tdbin->tdb_inext != NULL &&
525 			    tdbin->tdb_inext->tdb_sproto == IPPROTO_IPCOMP)
526 				tdbin = tdbin->tdb_inext;
527 
528 			/* Direct match in the cache. */
529 			mtx_enter(&ipo_tdb_mtx);
530 			if (ipo->ipo_tdb == tdbin) {
531 				error = ipsp_spd_inp(m, inp, ipo, tdbout);
532 				mtx_leave(&ipo_tdb_mtx);
533 				return error;
534 			}
535 			mtx_leave(&ipo_tdb_mtx);
536 
537 			if (memcmp(dignore ? &ssrc : &ipo->ipo_dst,
538 			    &tdbin->tdb_src, tdbin->tdb_src.sa.sa_len) ||
539 			    (ipo->ipo_sproto != tdbin->tdb_sproto))
540 				goto nomatchin;
541 
542 			/* Match source/dest IDs. */
543 			if (ipo->ipo_ids)
544 				if (tdbin->tdb_ids == NULL ||
545 				    !ipsp_ids_match(ipo->ipo_ids,
546 				    tdbin->tdb_ids))
547 					goto nomatchin;
548 
549 			/* Add it to the cache. */
550 			mtx_enter(&ipo_tdb_mtx);
551 			if (ipo->ipo_tdb != NULL) {
552 				TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head,
553 				    ipo, ipo_tdb_next);
554 				tdb_unref(ipo->ipo_tdb);
555 			}
556 			ipo->ipo_tdb = tdb_ref(tdbin);
557 			TAILQ_INSERT_TAIL(&tdbin->tdb_policy_head, ipo,
558 			    ipo_tdb_next);
559 			error = ipsp_spd_inp(m, inp, ipo, tdbout);
560 			mtx_leave(&ipo_tdb_mtx);
561 			return error;
562 
563   nomatchin: /* Nothing needed here, falling through */
564 	;
565 		}
566 
567 		/* Check whether cached entry applies. */
568 		mtx_enter(&ipo_tdb_mtx);
569 		if (ipo->ipo_tdb != NULL) {
570 			/*
571 			 * We only need to check that the correct
572 			 * security protocol and security gateway are
573 			 * set; IDs will be the same since the cached
574 			 * entry is linked on this policy.
575 			 */
576 			if (ipo->ipo_sproto == ipo->ipo_tdb->tdb_sproto &&
577 			    !memcmp(&ipo->ipo_tdb->tdb_src,
578 			    dignore ? &ssrc : &ipo->ipo_dst,
579 			    ipo->ipo_tdb->tdb_src.sa.sa_len))
580 				goto skipinputsearch;
581 
582 			/* Not applicable, unlink. */
583 			TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
584 			    ipo_tdb_next);
585 			tdb_unref(ipo->ipo_tdb);
586 			ipo->ipo_tdb = NULL;
587 			ipo->ipo_last_searched = 0;
588 		}
589 
590 		/* Find whether there exists an appropriate SA. */
591 		if (ipo->ipo_last_searched <= ipsec_last_added)	{
592 			struct tdb *tdbp_new;
593 
594 			if (dignore == 0)
595 				ipo->ipo_last_searched = getuptime();
596 
597 			/* gettdb() takes tdb_sadb_mtx, preserve lock order */
598 			mtx_leave(&ipo_tdb_mtx);
599 			tdbp_new = gettdbbysrc(rdomain,
600 			    dignore ? &ssrc : &ipo->ipo_dst,
601 			    ipo->ipo_sproto, ipo->ipo_ids,
602 			    &ipo->ipo_addr, &ipo->ipo_mask);
603 			mtx_enter(&ipo_tdb_mtx);
604 			if ((tdbp_new != NULL) &&
605 			    (tdbp_new->tdb_flags & TDBF_DELETED)) {
606 				/*
607 				 * After tdb_delete() has released ipo_tdb_mtx
608 				 * in tdb_unlink(), never add a new one.
609 				 * tdb_cleanspd() has to catch all of them.
610 				 */
611 				tdb_unref(tdbp_new);
612 				tdbp_new = NULL;
613 			}
614 			if (ipo->ipo_tdb != NULL) {
615 				/* Remove cached TDB from parallel thread. */
616 				TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head,
617 				    ipo, ipo_tdb_next);
618 				tdb_unref(ipo->ipo_tdb);
619 			}
620 			ipo->ipo_tdb = tdbp_new;
621 			if (ipo->ipo_tdb != NULL) {
622 				/* gettdbbysrc() has already refcounted tdb */
623 				TAILQ_INSERT_TAIL(
624 				    &ipo->ipo_tdb->tdb_policy_head,
625 				    ipo, ipo_tdb_next);
626 			}
627 		}
628   skipinputsearch:
629 		mtx_leave(&ipo_tdb_mtx);
630 
631 		switch (ipo->ipo_type) {
632 		case IPSP_IPSEC_REQUIRE:
633 			/* If appropriate SA exists, don't acquire another. */
634 			if (ipo->ipo_tdb != NULL)
635 				return -EINVAL;  /* Silently drop packet. */
636 
637 			/* Acquire SA through key management. */
638 			if ((error = ipsp_acquire_sa(ipo,
639 			    dignore ? &ssrc : &ipo->ipo_dst,
640 			    signore ? NULL : &ipo->ipo_src, ddst, m)) != 0)
641 				return error;
642 
643 			/* FALLTHROUGH */
644 		case IPSP_IPSEC_DONTACQ:
645 			return -EINVAL;  /* Silently drop packet. */
646 
647 		case IPSP_IPSEC_ACQUIRE:
648 			/* If appropriate SA exists, don't acquire another. */
649 			if (ipo->ipo_tdb != NULL)
650 				return ipsp_spd_inp(m, inp, ipo, tdbout);
651 
652 			/* Acquire SA through key management. */
653 			ipsp_acquire_sa(ipo, dignore ? &ssrc : &ipo->ipo_dst,
654 			    signore ? NULL : &ipo->ipo_src, ddst, NULL);
655 
656 			/* FALLTHROUGH */
657 		case IPSP_IPSEC_USE:
658 			return ipsp_spd_inp(m, inp, ipo, tdbout);
659 		}
660 	}
661 
662 	/* Shouldn't ever get this far. */
663 	return EINVAL;
664 }
665 
666 /*
667  * Delete a policy from the SPD.
668  */
669 int
670 ipsec_delete_policy(struct ipsec_policy *ipo)
671 {
672 	struct ipsec_acquire *ipa;
673 	struct radix_node_head *rnh;
674 	struct radix_node *rn = (struct radix_node *)ipo;
675 
676 	NET_ASSERT_LOCKED_EXCLUSIVE();
677 
678 	if (refcnt_rele(&ipo->ipo_refcnt) == 0)
679 		return 0;
680 
681 	/* Delete from SPD. */
682 	if ((rnh = spd_table_get(ipo->ipo_rdomain)) == NULL ||
683 	    rn_delete(&ipo->ipo_addr, &ipo->ipo_mask, rnh, rn) == NULL)
684 		return (ESRCH);
685 
686 	mtx_enter(&ipo_tdb_mtx);
687 	if (ipo->ipo_tdb != NULL) {
688 		TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
689 		    ipo_tdb_next);
690 		tdb_unref(ipo->ipo_tdb);
691 		ipo->ipo_tdb = NULL;
692 	}
693 	mtx_leave(&ipo_tdb_mtx);
694 
695 	mtx_enter(&ipsec_acquire_mtx);
696 	while ((ipa = TAILQ_FIRST(&ipo->ipo_acquires)) != NULL)
697 		ipsp_delete_acquire_locked(ipa);
698 	mtx_leave(&ipsec_acquire_mtx);
699 
700 	TAILQ_REMOVE(&ipsec_policy_head, ipo, ipo_list);
701 
702 	if (ipo->ipo_ids)
703 		ipsp_ids_free(ipo->ipo_ids);
704 
705 	ipsec_in_use--;
706 
707 	pool_put(&ipsec_policy_pool, ipo);
708 
709 	return 0;
710 }
711 
712 void
713 ipsp_delete_acquire_timer(void *v)
714 {
715 	struct ipsec_acquire *ipa = v;
716 
717 	mtx_enter(&ipsec_acquire_mtx);
718 	refcnt_rele(&ipa->ipa_refcnt);
719 	ipsp_delete_acquire_locked(ipa);
720 	mtx_leave(&ipsec_acquire_mtx);
721 }
722 
723 /*
724  * Delete a pending IPsec acquire record.
725  */
726 void
727 ipsp_delete_acquire(struct ipsec_acquire *ipa)
728 {
729 	mtx_enter(&ipsec_acquire_mtx);
730 	ipsp_delete_acquire_locked(ipa);
731 	mtx_leave(&ipsec_acquire_mtx);
732 }
733 
734 void
735 ipsp_delete_acquire_locked(struct ipsec_acquire *ipa)
736 {
737 	if (timeout_del(&ipa->ipa_timeout) == 1)
738 		refcnt_rele(&ipa->ipa_refcnt);
739 	ipsp_unref_acquire_locked(ipa);
740 }
741 
742 void
743 ipsec_unref_acquire(struct ipsec_acquire *ipa)
744 {
745 	mtx_enter(&ipsec_acquire_mtx);
746 	ipsp_unref_acquire_locked(ipa);
747 	mtx_leave(&ipsec_acquire_mtx);
748 }
749 
750 void
751 ipsp_unref_acquire_locked(struct ipsec_acquire *ipa)
752 {
753 	MUTEX_ASSERT_LOCKED(&ipsec_acquire_mtx);
754 
755 	if (refcnt_rele(&ipa->ipa_refcnt) == 0)
756 		return;
757 	TAILQ_REMOVE(&ipsec_acquire_head, ipa, ipa_next);
758 	TAILQ_REMOVE(&ipa->ipa_policy->ipo_acquires, ipa, ipa_ipo_next);
759 	ipa->ipa_policy = NULL;
760 
761 	pool_put(&ipsec_acquire_pool, ipa);
762 }
763 
764 /*
765  * Find out if there's an ACQUIRE pending.
766  * XXX Need a better structure.
767  */
768 int
769 ipsp_pending_acquire(struct ipsec_policy *ipo, union sockaddr_union *gw)
770 {
771 	struct ipsec_acquire *ipa;
772 
773 	NET_ASSERT_LOCKED();
774 
775 	mtx_enter(&ipsec_acquire_mtx);
776 	TAILQ_FOREACH(ipa, &ipo->ipo_acquires, ipa_ipo_next) {
777 		if (!memcmp(gw, &ipa->ipa_addr, gw->sa.sa_len))
778 			break;
779 	}
780 	mtx_leave(&ipsec_acquire_mtx);
781 
782 	return (ipa != NULL);
783 }
784 
785 /*
786  * Signal key management that we need an SA.
787  * XXX For outgoing policies, we could try to hold on to the mbuf.
788  */
789 int
790 ipsp_acquire_sa(struct ipsec_policy *ipo, union sockaddr_union *gw,
791     union sockaddr_union *laddr, struct sockaddr_encap *ddst, struct mbuf *m)
792 {
793 	struct ipsec_acquire *ipa;
794 
795 	NET_ASSERT_LOCKED();
796 
797 	/* Check whether request has been made already. */
798 	if (ipsp_pending_acquire(ipo, gw))
799 		return 0;
800 
801 	/* Add request in cache and proceed. */
802 	ipa = pool_get(&ipsec_acquire_pool, PR_NOWAIT|PR_ZERO);
803 	if (ipa == NULL)
804 		return ENOMEM;
805 
806 	ipa->ipa_addr = *gw;
807 
808 	refcnt_init(&ipa->ipa_refcnt);
809 	timeout_set(&ipa->ipa_timeout, ipsp_delete_acquire_timer, ipa);
810 
811 	ipa->ipa_info.sen_len = ipa->ipa_mask.sen_len = SENT_LEN;
812 	ipa->ipa_info.sen_family = ipa->ipa_mask.sen_family = PF_KEY;
813 
814 	/* Just copy the right information. */
815 	switch (ipo->ipo_addr.sen_type) {
816 	case SENT_IP4:
817 		ipa->ipa_info.sen_type = ipa->ipa_mask.sen_type = SENT_IP4;
818 		ipa->ipa_info.sen_direction = ipo->ipo_addr.sen_direction;
819 		ipa->ipa_mask.sen_direction = ipo->ipo_mask.sen_direction;
820 
821 		if (ipsp_is_unspecified(ipo->ipo_dst)) {
822 			ipa->ipa_info.sen_ip_src = ddst->sen_ip_src;
823 			ipa->ipa_mask.sen_ip_src.s_addr = INADDR_BROADCAST;
824 
825 			ipa->ipa_info.sen_ip_dst = ddst->sen_ip_dst;
826 			ipa->ipa_mask.sen_ip_dst.s_addr = INADDR_BROADCAST;
827 		} else {
828 			ipa->ipa_info.sen_ip_src = ipo->ipo_addr.sen_ip_src;
829 			ipa->ipa_mask.sen_ip_src = ipo->ipo_mask.sen_ip_src;
830 
831 			ipa->ipa_info.sen_ip_dst = ipo->ipo_addr.sen_ip_dst;
832 			ipa->ipa_mask.sen_ip_dst = ipo->ipo_mask.sen_ip_dst;
833 		}
834 
835 		ipa->ipa_info.sen_proto = ipo->ipo_addr.sen_proto;
836 		ipa->ipa_mask.sen_proto = ipo->ipo_mask.sen_proto;
837 
838 		if (ipo->ipo_addr.sen_proto) {
839 			ipa->ipa_info.sen_sport = ipo->ipo_addr.sen_sport;
840 			ipa->ipa_mask.sen_sport = ipo->ipo_mask.sen_sport;
841 
842 			ipa->ipa_info.sen_dport = ipo->ipo_addr.sen_dport;
843 			ipa->ipa_mask.sen_dport = ipo->ipo_mask.sen_dport;
844 		}
845 		break;
846 
847 #ifdef INET6
848 	case SENT_IP6:
849 		ipa->ipa_info.sen_type = ipa->ipa_mask.sen_type = SENT_IP6;
850 		ipa->ipa_info.sen_ip6_direction =
851 		    ipo->ipo_addr.sen_ip6_direction;
852 		ipa->ipa_mask.sen_ip6_direction =
853 		    ipo->ipo_mask.sen_ip6_direction;
854 
855 		if (ipsp_is_unspecified(ipo->ipo_dst)) {
856 			ipa->ipa_info.sen_ip6_src = ddst->sen_ip6_src;
857 			ipa->ipa_mask.sen_ip6_src = in6mask128;
858 
859 			ipa->ipa_info.sen_ip6_dst = ddst->sen_ip6_dst;
860 			ipa->ipa_mask.sen_ip6_dst = in6mask128;
861 		} else {
862 			ipa->ipa_info.sen_ip6_src = ipo->ipo_addr.sen_ip6_src;
863 			ipa->ipa_mask.sen_ip6_src = ipo->ipo_mask.sen_ip6_src;
864 
865 			ipa->ipa_info.sen_ip6_dst = ipo->ipo_addr.sen_ip6_dst;
866 			ipa->ipa_mask.sen_ip6_dst = ipo->ipo_mask.sen_ip6_dst;
867 		}
868 
869 		ipa->ipa_info.sen_ip6_proto = ipo->ipo_addr.sen_ip6_proto;
870 		ipa->ipa_mask.sen_ip6_proto = ipo->ipo_mask.sen_ip6_proto;
871 
872 		if (ipo->ipo_mask.sen_ip6_proto) {
873 			ipa->ipa_info.sen_ip6_sport =
874 			    ipo->ipo_addr.sen_ip6_sport;
875 			ipa->ipa_mask.sen_ip6_sport =
876 			    ipo->ipo_mask.sen_ip6_sport;
877 			ipa->ipa_info.sen_ip6_dport =
878 			    ipo->ipo_addr.sen_ip6_dport;
879 			ipa->ipa_mask.sen_ip6_dport =
880 			    ipo->ipo_mask.sen_ip6_dport;
881 		}
882 		break;
883 #endif /* INET6 */
884 
885 	default:
886 		pool_put(&ipsec_acquire_pool, ipa);
887 		return 0;
888 	}
889 
890 	mtx_enter(&ipsec_acquire_mtx);
891 #ifdef IPSEC
892 	if (timeout_add_sec(&ipa->ipa_timeout, ipsec_expire_acquire) == 1)
893 		refcnt_take(&ipa->ipa_refcnt);
894 #endif
895 	TAILQ_INSERT_TAIL(&ipsec_acquire_head, ipa, ipa_next);
896 	TAILQ_INSERT_TAIL(&ipo->ipo_acquires, ipa, ipa_ipo_next);
897 	ipa->ipa_policy = ipo;
898 	mtx_leave(&ipsec_acquire_mtx);
899 
900 	/* PF_KEYv2 notification message. */
901 	return pfkeyv2_acquire(ipo, gw, laddr, &ipa->ipa_seq, ddst);
902 }
903 
904 /*
905  * Deal with PCB security requirements.
906  */
907 int
908 ipsp_spd_inp(struct mbuf *m, struct inpcb *inp, struct ipsec_policy *ipo,
909     struct tdb **tdbout)
910 {
911 	/* Sanity check. */
912 	if (inp == NULL)
913 		goto justreturn;
914 
915 	/* We only support IPSEC_LEVEL_BYPASS or IPSEC_LEVEL_AVAIL */
916 
917 	if (inp->inp_seclevel[SL_ESP_TRANS] == IPSEC_LEVEL_BYPASS &&
918 	    inp->inp_seclevel[SL_ESP_NETWORK] == IPSEC_LEVEL_BYPASS &&
919 	    inp->inp_seclevel[SL_AUTH] == IPSEC_LEVEL_BYPASS)
920 		goto justreturn;
921 
922 	if (inp->inp_seclevel[SL_ESP_TRANS] == IPSEC_LEVEL_AVAIL &&
923 	    inp->inp_seclevel[SL_ESP_NETWORK] == IPSEC_LEVEL_AVAIL &&
924 	    inp->inp_seclevel[SL_AUTH] == IPSEC_LEVEL_AVAIL)
925 		goto justreturn;
926 
927 	return -EINVAL;  /* Silently drop packet. */
928 
929  justreturn:
930 	if (tdbout != NULL) {
931 		if (ipo != NULL)
932 			*tdbout = tdb_ref(ipo->ipo_tdb);
933 		else
934 			*tdbout = NULL;
935 	}
936 	return 0;
937 }
938 
939 /*
940  * Find a pending ACQUIRE record based on its sequence number.
941  * XXX Need to use a better data structure.
942  */
943 struct ipsec_acquire *
944 ipsec_get_acquire(u_int32_t seq)
945 {
946 	struct ipsec_acquire *ipa;
947 
948 	NET_ASSERT_LOCKED();
949 
950 	mtx_enter(&ipsec_acquire_mtx);
951 	TAILQ_FOREACH(ipa, &ipsec_acquire_head, ipa_next) {
952 		if (ipa->ipa_seq == seq) {
953 			refcnt_take(&ipa->ipa_refcnt);
954 			break;
955 		}
956 	}
957 	mtx_leave(&ipsec_acquire_mtx);
958 
959 	return ipa;
960 }
961