xref: /openbsd/sys/netinet/ipsec_input.c (revision 4cfece93)
1 /*	$OpenBSD: ipsec_input.c,v 1.171 2020/06/24 22:03:43 cheloha Exp $	*/
2 /*
3  * The authors of this code are John Ioannidis (ji@tla.org),
4  * Angelos D. Keromytis (kermit@csd.uch.gr) and
5  * Niels Provos (provos@physnet.uni-hamburg.de).
6  *
7  * This code was written by John Ioannidis for BSD/OS in Athens, Greece,
8  * in November 1995.
9  *
10  * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
11  * by Angelos D. Keromytis.
12  *
13  * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
14  * and Niels Provos.
15  *
16  * Additional features in 1999 by Angelos D. Keromytis.
17  *
18  * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
19  * Angelos D. Keromytis and Niels Provos.
20  * Copyright (c) 2001, Angelos D. Keromytis.
21  *
22  * Permission to use, copy, and modify this software with or without fee
23  * is hereby granted, provided that this entire notice is included in
24  * all copies of any software which is or includes a copy or
25  * modification of this software.
26  * You may use this code under the GNU public license if you so wish. Please
27  * contribute changes back to the authors under this freer than GPL license
28  * so that we may further the use of strong encryption without limitations to
29  * all.
30  *
31  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
32  * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
33  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
34  * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
35  * PURPOSE.
36  */
37 
38 #include "pf.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/protosw.h>
43 #include <sys/mbuf.h>
44 #include <sys/socket.h>
45 #include <sys/sysctl.h>
46 #include <sys/kernel.h>
47 #include <sys/timeout.h>
48 
49 #include <net/if.h>
50 #include <net/if_var.h>
51 #include <net/netisr.h>
52 #include <net/bpf.h>
53 #include <net/route.h>
54 
55 #include <netinet/in.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip_var.h>
58 #include <netinet/ip_icmp.h>
59 #include <netinet/tcp.h>
60 #include <netinet/udp.h>
61 
62 #if NPF > 0
63 #include <net/pfvar.h>
64 #endif
65 
66 #ifdef INET6
67 #include <netinet6/in6_var.h>
68 #include <netinet/ip6.h>
69 #include <netinet6/ip6_var.h>
70 #include <netinet6/ip6protosw.h>
71 #endif /* INET6 */
72 
73 #include <netinet/ip_ipsp.h>
74 #include <netinet/ip_esp.h>
75 #include <netinet/ip_ah.h>
76 #include <netinet/ip_ipcomp.h>
77 
78 #include <net/if_enc.h>
79 
80 #include <crypto/cryptodev.h>
81 #include <crypto/xform.h>
82 
83 #include "bpfilter.h"
84 
85 void ipsec_common_ctlinput(u_int, int, struct sockaddr *, void *, int);
86 
87 #ifdef ENCDEBUG
88 #define DPRINTF(x)	if (encdebug) printf x
89 #else
90 #define DPRINTF(x)
91 #endif
92 
93 /* sysctl variables */
94 int encdebug = 0;
95 int ipsec_keep_invalid = IPSEC_DEFAULT_EMBRYONIC_SA_TIMEOUT;
96 int ipsec_require_pfs = IPSEC_DEFAULT_PFS;
97 int ipsec_soft_allocations = IPSEC_DEFAULT_SOFT_ALLOCATIONS;
98 int ipsec_exp_allocations = IPSEC_DEFAULT_EXP_ALLOCATIONS;
99 int ipsec_soft_bytes = IPSEC_DEFAULT_SOFT_BYTES;
100 int ipsec_exp_bytes = IPSEC_DEFAULT_EXP_BYTES;
101 int ipsec_soft_timeout = IPSEC_DEFAULT_SOFT_TIMEOUT;
102 int ipsec_exp_timeout = IPSEC_DEFAULT_EXP_TIMEOUT;
103 int ipsec_soft_first_use = IPSEC_DEFAULT_SOFT_FIRST_USE;
104 int ipsec_exp_first_use = IPSEC_DEFAULT_EXP_FIRST_USE;
105 int ipsec_expire_acquire = IPSEC_DEFAULT_EXPIRE_ACQUIRE;
106 
107 int esp_enable = 1;
108 int ah_enable = 1;
109 int ipcomp_enable = 0;
110 
111 int *espctl_vars[ESPCTL_MAXID] = ESPCTL_VARS;
112 int *ahctl_vars[AHCTL_MAXID] = AHCTL_VARS;
113 int *ipcompctl_vars[IPCOMPCTL_MAXID] = IPCOMPCTL_VARS;
114 
115 struct cpumem *espcounters;
116 struct cpumem *ahcounters;
117 struct cpumem *ipcompcounters;
118 struct cpumem *ipseccounters;
119 
120 char ipsec_def_enc[20];
121 char ipsec_def_auth[20];
122 char ipsec_def_comp[20];
123 
124 int *ipsecctl_vars[IPSEC_MAXID] = IPSECCTL_VARS;
125 
126 int esp_sysctl_espstat(void *, size_t *, void *);
127 int ah_sysctl_ahstat(void *, size_t *, void *);
128 int ipcomp_sysctl_ipcompstat(void *, size_t *, void *);
129 int ipsec_sysctl_ipsecstat(void *, size_t *, void *);
130 
131 void
132 ipsec_init(void)
133 {
134 	espcounters = counters_alloc(esps_ncounters);
135 	ahcounters = counters_alloc(ahs_ncounters);
136 	ipcompcounters = counters_alloc(ipcomps_ncounters);
137 	ipseccounters = counters_alloc(ipsec_ncounters);
138 
139 	strlcpy(ipsec_def_enc, IPSEC_DEFAULT_DEF_ENC, sizeof(ipsec_def_enc));
140 	strlcpy(ipsec_def_auth, IPSEC_DEFAULT_DEF_AUTH, sizeof(ipsec_def_auth));
141 	strlcpy(ipsec_def_comp, IPSEC_DEFAULT_DEF_COMP, sizeof(ipsec_def_comp));
142 
143 }
144 
145 /*
146  * ipsec_common_input() gets called when we receive an IPsec-protected packet
147  * in IPv4 or IPv6. All it does is find the right TDB and call the appropriate
148  * transform. The callback takes care of further processing (like ingress
149  * filtering).
150  */
151 int
152 ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto,
153     int udpencap)
154 {
155 #define IPSEC_ISTAT(x,y,z) do {			\
156 	if (sproto == IPPROTO_ESP)		\
157 		espstat_inc(x);			\
158 	else if (sproto == IPPROTO_AH)		\
159 		ahstat_inc(y);			\
160 	else					\
161 		ipcompstat_inc(z);		\
162 } while (0)
163 
164 	union sockaddr_union dst_address;
165 	struct tdb *tdbp = NULL;
166 	struct ifnet *encif;
167 	u_int32_t spi;
168 	u_int16_t cpi;
169 	int error;
170 #ifdef ENCDEBUG
171 	char buf[INET6_ADDRSTRLEN];
172 #endif
173 
174 	NET_ASSERT_LOCKED();
175 
176 	ipsecstat_inc(ipsec_ipackets);
177 	ipsecstat_add(ipsec_ibytes, m->m_pkthdr.len);
178 	IPSEC_ISTAT(esps_input, ahs_input, ipcomps_input);
179 
180 	if (m == NULL) {
181 		DPRINTF(("%s: NULL packet received\n", __func__));
182 		IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
183 		return EINVAL;
184 	}
185 
186 	if ((sproto == IPPROTO_IPCOMP) && (m->m_flags & M_COMP)) {
187 		DPRINTF(("%s: repeated decompression\n", __func__));
188 		ipcompstat_inc(ipcomps_pdrops);
189 		error = EINVAL;
190 		goto drop;
191 	}
192 
193 	if (m->m_pkthdr.len - skip < 2 * sizeof(u_int32_t)) {
194 		DPRINTF(("%s: packet too small\n", __func__));
195 		IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
196 		error = EINVAL;
197 		goto drop;
198 	}
199 
200 	/* Retrieve the SPI from the relevant IPsec header */
201 	switch (sproto) {
202 	case IPPROTO_ESP:
203 		m_copydata(m, skip, sizeof(u_int32_t), (caddr_t) &spi);
204 		break;
205 	case IPPROTO_AH:
206 		m_copydata(m, skip + sizeof(u_int32_t), sizeof(u_int32_t),
207 		    (caddr_t) &spi);
208 		break;
209 	case IPPROTO_IPCOMP:
210 		m_copydata(m, skip + sizeof(u_int16_t), sizeof(u_int16_t),
211 		    (caddr_t) &cpi);
212 		spi = ntohl(htons(cpi));
213 		break;
214 	default:
215 		panic("%s: unknown/unsupported security protocol %d",
216 		    __func__, sproto);
217 	}
218 
219 	/*
220 	 * Find tunnel control block and (indirectly) call the appropriate
221 	 * kernel crypto routine. The resulting mbuf chain is a valid
222 	 * IP packet ready to go through input processing.
223 	 */
224 
225 	memset(&dst_address, 0, sizeof(dst_address));
226 	dst_address.sa.sa_family = af;
227 
228 	switch (af) {
229 	case AF_INET:
230 		dst_address.sin.sin_len = sizeof(struct sockaddr_in);
231 		m_copydata(m, offsetof(struct ip, ip_dst),
232 		    sizeof(struct in_addr),
233 		    (caddr_t) &(dst_address.sin.sin_addr));
234 		break;
235 
236 #ifdef INET6
237 	case AF_INET6:
238 		dst_address.sin6.sin6_len = sizeof(struct sockaddr_in6);
239 		m_copydata(m, offsetof(struct ip6_hdr, ip6_dst),
240 		    sizeof(struct in6_addr),
241 		    (caddr_t) &(dst_address.sin6.sin6_addr));
242 		in6_recoverscope(&dst_address.sin6,
243 		    &dst_address.sin6.sin6_addr);
244 		break;
245 #endif /* INET6 */
246 
247 	default:
248 		DPRINTF(("%s: unsupported protocol family %d\n", __func__, af));
249 		IPSEC_ISTAT(esps_nopf, ahs_nopf, ipcomps_nopf);
250 		error = EPFNOSUPPORT;
251 		goto drop;
252 	}
253 
254 	tdbp = gettdb(rtable_l2(m->m_pkthdr.ph_rtableid),
255 	    spi, &dst_address, sproto);
256 	if (tdbp == NULL) {
257 		DPRINTF(("%s: could not find SA for packet to %s, spi %08x\n",
258 		    __func__,
259 		    ipsp_address(&dst_address, buf, sizeof(buf)), ntohl(spi)));
260 		IPSEC_ISTAT(esps_notdb, ahs_notdb, ipcomps_notdb);
261 		error = ENOENT;
262 		goto drop;
263 	}
264 
265 	if (tdbp->tdb_flags & TDBF_INVALID) {
266 		DPRINTF(("%s: attempted to use invalid SA %s/%08x/%u\n",
267 		    __func__, ipsp_address(&dst_address, buf,
268 		    sizeof(buf)), ntohl(spi), tdbp->tdb_sproto));
269 		IPSEC_ISTAT(esps_invalid, ahs_invalid, ipcomps_invalid);
270 		error = EINVAL;
271 		goto drop;
272 	}
273 
274 	if (udpencap && !(tdbp->tdb_flags & TDBF_UDPENCAP)) {
275 		DPRINTF(("%s: attempted to use non-udpencap SA %s/%08x/%u\n",
276 		    __func__, ipsp_address(&dst_address, buf,
277 		    sizeof(buf)), ntohl(spi), tdbp->tdb_sproto));
278 		espstat_inc(esps_udpinval);
279 		error = EINVAL;
280 		goto drop;
281 	}
282 
283 	if (!udpencap && (tdbp->tdb_flags & TDBF_UDPENCAP)) {
284 		DPRINTF(("%s: attempted to use udpencap SA %s/%08x/%u\n",
285 		    __func__, ipsp_address(&dst_address, buf,
286 		    sizeof(buf)), ntohl(spi), tdbp->tdb_sproto));
287 		espstat_inc(esps_udpneeded);
288 		error = EINVAL;
289 		goto drop;
290 	}
291 
292 	if (tdbp->tdb_xform == NULL) {
293 		DPRINTF(("%s: attempted to use uninitialized SA %s/%08x/%u\n",
294 		    __func__, ipsp_address(&dst_address, buf,
295 		    sizeof(buf)), ntohl(spi), tdbp->tdb_sproto));
296 		IPSEC_ISTAT(esps_noxform, ahs_noxform, ipcomps_noxform);
297 		error = ENXIO;
298 		goto drop;
299 	}
300 
301 	if (sproto != IPPROTO_IPCOMP) {
302 		if ((encif = enc_getif(tdbp->tdb_rdomain_post,
303 		    tdbp->tdb_tap)) == NULL) {
304 			DPRINTF(("%s: no enc%u interface for SA %s/%08x/%u\n",
305 			    __func__,
306 			    tdbp->tdb_tap, ipsp_address(&dst_address, buf,
307 			    sizeof(buf)), ntohl(spi), tdbp->tdb_sproto));
308 			IPSEC_ISTAT(esps_pdrops, ahs_pdrops, ipcomps_pdrops);
309 			error = EACCES;
310 			goto drop;
311 		}
312 
313 		/* XXX This conflicts with the scoped nature of IPv6 */
314 		m->m_pkthdr.ph_ifidx = encif->if_index;
315 	}
316 
317 	/* Register first use, setup expiration timer. */
318 	if (tdbp->tdb_first_use == 0) {
319 		tdbp->tdb_first_use = gettime();
320 		if (tdbp->tdb_flags & TDBF_FIRSTUSE)
321 			timeout_add_sec(&tdbp->tdb_first_tmo,
322 			    tdbp->tdb_exp_first_use);
323 		if (tdbp->tdb_flags & TDBF_SOFT_FIRSTUSE)
324 			timeout_add_sec(&tdbp->tdb_sfirst_tmo,
325 			    tdbp->tdb_soft_first_use);
326 	}
327 
328 	tdbp->tdb_ipackets++;
329 	tdbp->tdb_ibytes += m->m_pkthdr.len;
330 
331 	/*
332 	 * Call appropriate transform and return -- callback takes care of
333 	 * everything else.
334 	 */
335 	error = (*(tdbp->tdb_xform->xf_input))(m, tdbp, skip, protoff);
336 	if (error) {
337 		ipsecstat_inc(ipsec_idrops);
338 		tdbp->tdb_idrops++;
339 	}
340 	return error;
341 
342  drop:
343 	ipsecstat_inc(ipsec_idrops);
344 	if (tdbp != NULL)
345 		tdbp->tdb_idrops++;
346 	m_freem(m);
347 	return error;
348 }
349 
350 void
351 ipsec_input_cb(struct cryptop *crp)
352 {
353 	struct tdb_crypto *tc = (struct tdb_crypto *) crp->crp_opaque;
354 	struct mbuf *m = (struct mbuf *) crp->crp_buf;
355 	struct tdb *tdb = NULL;
356 	int clen, error;
357 
358 	if (m == NULL) {
359 		DPRINTF(("%s: bogus returned buffer from crypto\n", __func__));
360 		ipsecstat_inc(ipsec_crypto);
361 		goto droponly;
362 	}
363 
364 
365 	NET_LOCK();
366 	tdb = gettdb(tc->tc_rdomain, tc->tc_spi, &tc->tc_dst, tc->tc_proto);
367 	if (tdb == NULL) {
368 		DPRINTF(("%s: TDB is expired while in crypto", __func__));
369 		ipsecstat_inc(ipsec_notdb);
370 		goto baddone;
371 	}
372 
373 	/* Check for crypto errors */
374 	if (crp->crp_etype) {
375 		if (crp->crp_etype == EAGAIN) {
376 			/* Reset the session ID */
377 			if (tdb->tdb_cryptoid != 0)
378 				tdb->tdb_cryptoid = crp->crp_sid;
379 			NET_UNLOCK();
380 			crypto_dispatch(crp);
381 			return;
382 		}
383 		DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype));
384 		ipsecstat_inc(ipsec_noxform);
385 		goto baddone;
386 	}
387 
388 	/* Length of data after processing */
389 	clen = crp->crp_olen;
390 
391 	/* Release the crypto descriptors */
392 	crypto_freereq(crp);
393 
394 	switch (tdb->tdb_sproto) {
395 	case IPPROTO_ESP:
396 		error = esp_input_cb(tdb, tc, m, clen);
397 		break;
398 	case IPPROTO_AH:
399 		error = ah_input_cb(tdb, tc, m, clen);
400 		break;
401 	case IPPROTO_IPCOMP:
402 		error = ipcomp_input_cb(tdb, tc, m, clen);
403 		break;
404 	default:
405 		panic("%s: unknown/unsupported security protocol %d",
406 		    __func__, tdb->tdb_sproto);
407 	}
408 
409 	NET_UNLOCK();
410 	if (error) {
411 		ipsecstat_inc(ipsec_idrops);
412 		tdb->tdb_idrops++;
413 	}
414 	return;
415 
416  baddone:
417 	NET_UNLOCK();
418  droponly:
419 	ipsecstat_inc(ipsec_idrops);
420 	if (tdb != NULL)
421 		tdb->tdb_idrops++;
422 	free(tc, M_XDATA, 0);
423 	m_freem(m);
424 	crypto_freereq(crp);
425 }
426 
427 /*
428  * IPsec input callback, called by the transform callback. Takes care of
429  * filtering and other sanity checks on the processed packet.
430  */
431 int
432 ipsec_common_input_cb(struct mbuf *m, struct tdb *tdbp, int skip, int protoff)
433 {
434 	int af, sproto;
435 	u_int8_t prot;
436 
437 #if NBPFILTER > 0
438 	struct ifnet *encif;
439 #endif
440 
441 	struct ip *ip, ipn;
442 
443 #ifdef INET6
444 	struct ip6_hdr *ip6, ip6n;
445 #endif /* INET6 */
446 	struct m_tag *mtag;
447 	struct tdb_ident *tdbi;
448 
449 #ifdef ENCDEBUG
450 	char buf[INET6_ADDRSTRLEN];
451 #endif
452 
453 	af = tdbp->tdb_dst.sa.sa_family;
454 	sproto = tdbp->tdb_sproto;
455 
456 	tdbp->tdb_last_used = gettime();
457 
458 	/* Sanity check */
459 	if (m == NULL) {
460 		/* The called routine will print a message if necessary */
461 		IPSEC_ISTAT(esps_badkcr, ahs_badkcr, ipcomps_badkcr);
462 		return -1;
463 	}
464 
465 	/* Fix IPv4 header */
466 	if (af == AF_INET) {
467 		if ((m->m_len < skip) && ((m = m_pullup(m, skip)) == NULL)) {
468 			DPRINTF(("%s: processing failed for SA %s/%08x\n",
469 			    __func__, ipsp_address(&tdbp->tdb_dst,
470 			    buf, sizeof(buf)), ntohl(tdbp->tdb_spi)));
471 			IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
472 			return -1;
473 		}
474 
475 		ip = mtod(m, struct ip *);
476 		ip->ip_len = htons(m->m_pkthdr.len);
477 		ip->ip_sum = 0;
478 		ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
479 		prot = ip->ip_p;
480 
481 		/* IP-in-IP encapsulation */
482 		if (prot == IPPROTO_IPIP) {
483 			if (m->m_pkthdr.len - skip < sizeof(struct ip)) {
484 				m_freem(m);
485 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
486 				    ipcomps_hdrops);
487 				return -1;
488 			}
489 			/* ipn will now contain the inner IPv4 header */
490 			m_copydata(m, skip, sizeof(struct ip),
491 			    (caddr_t) &ipn);
492 		}
493 
494 #ifdef INET6
495 		/* IPv6-in-IP encapsulation. */
496 		if (prot == IPPROTO_IPV6) {
497 			if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) {
498 				m_freem(m);
499 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
500 				    ipcomps_hdrops);
501 				return -1;
502 			}
503 			/* ip6n will now contain the inner IPv6 header. */
504 			m_copydata(m, skip, sizeof(struct ip6_hdr),
505 			    (caddr_t) &ip6n);
506 		}
507 #endif /* INET6 */
508 	}
509 
510 #ifdef INET6
511 	/* Fix IPv6 header */
512 	if (af == AF_INET6)
513 	{
514 		if (m->m_len < sizeof(struct ip6_hdr) &&
515 		    (m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
516 
517 			DPRINTF(("%s: processing failed for SA %s/%08x\n",
518 			    __func__, ipsp_address(&tdbp->tdb_dst,
519 			    buf, sizeof(buf)), ntohl(tdbp->tdb_spi)));
520 
521 			IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
522 			return -1;
523 		}
524 
525 		ip6 = mtod(m, struct ip6_hdr *);
526 		ip6->ip6_plen = htons(m->m_pkthdr.len - skip);
527 
528 		/* Save protocol */
529 		m_copydata(m, protoff, 1, (caddr_t) &prot);
530 
531 		/* IP-in-IP encapsulation */
532 		if (prot == IPPROTO_IPIP) {
533 			if (m->m_pkthdr.len - skip < sizeof(struct ip)) {
534 				m_freem(m);
535 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
536 				    ipcomps_hdrops);
537 				return -1;
538 			}
539 			/* ipn will now contain the inner IPv4 header */
540 			m_copydata(m, skip, sizeof(struct ip), (caddr_t) &ipn);
541 		}
542 
543 		/* IPv6-in-IP encapsulation */
544 		if (prot == IPPROTO_IPV6) {
545 			if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) {
546 				m_freem(m);
547 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
548 				    ipcomps_hdrops);
549 				return -1;
550 			}
551 			/* ip6n will now contain the inner IPv6 header. */
552 			m_copydata(m, skip, sizeof(struct ip6_hdr),
553 			    (caddr_t) &ip6n);
554 		}
555 	}
556 #endif /* INET6 */
557 
558 	/*
559 	 * Fix TCP/UDP checksum of UDP encapsulated transport mode ESP packet.
560 	 * (RFC3948 3.1.2)
561 	 */
562 	if ((af == AF_INET || af == AF_INET6) &&
563 	    (tdbp->tdb_flags & TDBF_UDPENCAP) &&
564 	    (tdbp->tdb_flags & TDBF_TUNNELING) == 0) {
565 		u_int16_t cksum;
566 
567 		switch (prot) {
568 		case IPPROTO_UDP:
569 			if (m->m_pkthdr.len < skip + sizeof(struct udphdr)) {
570 				m_freem(m);
571 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
572 				    ipcomps_hdrops);
573 				return -1;
574 			}
575 			cksum = 0;
576 			m_copyback(m, skip + offsetof(struct udphdr, uh_sum),
577 			    sizeof(cksum), &cksum, M_NOWAIT);
578 #ifdef INET6
579 			if (af == AF_INET6) {
580 				cksum = in6_cksum(m, IPPROTO_UDP, skip,
581 				    m->m_pkthdr.len - skip);
582 				m_copyback(m, skip + offsetof(struct udphdr,
583 				    uh_sum), sizeof(cksum), &cksum, M_NOWAIT);
584 			}
585 #endif
586 			break;
587 		case IPPROTO_TCP:
588 			if (m->m_pkthdr.len < skip + sizeof(struct tcphdr)) {
589 				m_freem(m);
590 				IPSEC_ISTAT(esps_hdrops, ahs_hdrops,
591 				    ipcomps_hdrops);
592 				return -1;
593 			}
594 			cksum = 0;
595 			m_copyback(m, skip + offsetof(struct tcphdr, th_sum),
596 			    sizeof(cksum), &cksum, M_NOWAIT);
597 			if (af == AF_INET)
598 				cksum = in4_cksum(m, IPPROTO_TCP, skip,
599 				    m->m_pkthdr.len - skip);
600 #ifdef INET6
601 			else if (af == AF_INET6)
602 				cksum = in6_cksum(m, IPPROTO_TCP, skip,
603 				    m->m_pkthdr.len - skip);
604 #endif
605 			m_copyback(m, skip + offsetof(struct tcphdr, th_sum),
606 			    sizeof(cksum), &cksum, M_NOWAIT);
607 			break;
608 		}
609 	}
610 
611 	/*
612 	 * Record what we've done to the packet (under what SA it was
613 	 * processed).
614 	 */
615 	if (tdbp->tdb_sproto != IPPROTO_IPCOMP) {
616 		mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE,
617 		    sizeof(struct tdb_ident), M_NOWAIT);
618 		if (mtag == NULL) {
619 			m_freem(m);
620 			DPRINTF(("%s: failed to get tag\n", __func__));
621 			IPSEC_ISTAT(esps_hdrops, ahs_hdrops, ipcomps_hdrops);
622 			return -1;
623 		}
624 
625 		tdbi = (struct tdb_ident *)(mtag + 1);
626 		tdbi->dst = tdbp->tdb_dst;
627 		tdbi->proto = tdbp->tdb_sproto;
628 		tdbi->spi = tdbp->tdb_spi;
629 		tdbi->rdomain = tdbp->tdb_rdomain;
630 
631 		m_tag_prepend(m, mtag);
632 	}
633 
634 	switch (sproto) {
635 	case IPPROTO_ESP:
636 		/* Packet is confidential ? */
637 		if (tdbp->tdb_encalgxform)
638 			m->m_flags |= M_CONF;
639 
640 		/* Check if we had authenticated ESP. */
641 		if (tdbp->tdb_authalgxform)
642 			m->m_flags |= M_AUTH;
643 		break;
644 	case IPPROTO_AH:
645 		m->m_flags |= M_AUTH;
646 		break;
647 	case IPPROTO_IPCOMP:
648 		m->m_flags |= M_COMP;
649 		break;
650 	default:
651 		panic("%s: unknown/unsupported security protocol %d",
652 		    __func__, sproto);
653 	}
654 
655 #if NPF > 0
656 	/* Add pf tag if requested. */
657 	pf_tag_packet(m, tdbp->tdb_tag, -1);
658 	pf_pkt_addr_changed(m);
659 #endif
660 	if (tdbp->tdb_rdomain != tdbp->tdb_rdomain_post)
661 		m->m_pkthdr.ph_rtableid = tdbp->tdb_rdomain_post;
662 
663 	if (tdbp->tdb_flags & TDBF_TUNNELING)
664 		m->m_flags |= M_TUNNEL;
665 
666 	ipsecstat_add(ipsec_idecompbytes, m->m_pkthdr.len);
667 	tdbp->tdb_idecompbytes += m->m_pkthdr.len;
668 
669 #if NBPFILTER > 0
670 	if ((encif = enc_getif(tdbp->tdb_rdomain_post, tdbp->tdb_tap)) != NULL) {
671 		encif->if_ipackets++;
672 		encif->if_ibytes += m->m_pkthdr.len;
673 
674 		if (encif->if_bpf) {
675 			struct enchdr hdr;
676 
677 			hdr.af = af;
678 			hdr.spi = tdbp->tdb_spi;
679 			hdr.flags = m->m_flags & (M_AUTH|M_CONF);
680 
681 			bpf_mtap_hdr(encif->if_bpf, (char *)&hdr,
682 			    ENC_HDRLEN, m, BPF_DIRECTION_IN);
683 		}
684 	}
685 #endif
686 
687 #if NPF > 0
688 	/*
689 	 * The ip_deliver() shortcut avoids running through ip_input() with the
690 	 * same IP header twice.  Packets in transport mode have to be be
691 	 * passed to pf explicitly.  In tunnel mode the inner IP header will
692 	 * run through ip_input() and pf anyway.
693 	 */
694 	if ((tdbp->tdb_flags & TDBF_TUNNELING) == 0) {
695 		struct ifnet *ifp;
696 
697 		/* This is the enc0 interface unless for ipcomp. */
698 		if ((ifp = if_get(m->m_pkthdr.ph_ifidx)) == NULL) {
699 			m_freem(m);
700 			return -1;
701 		}
702 		if (pf_test(af, PF_IN, ifp, &m) != PF_PASS) {
703 			if_put(ifp);
704 			m_freem(m);
705 			return -1;
706 		}
707 		if_put(ifp);
708 		if (m == NULL)
709 			return -1;
710 	}
711 #endif
712 	/* Call the appropriate IPsec transform callback. */
713 	ip_deliver(&m, &skip, prot, af);
714 	return 0;
715 #undef IPSEC_ISTAT
716 }
717 
718 int
719 ipsec_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
720     size_t newlen)
721 {
722 	int error;
723 
724 	switch (name[0]) {
725 	case IPCTL_IPSEC_ENC_ALGORITHM:
726 		NET_LOCK();
727 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
728 		    ipsec_def_enc, sizeof(ipsec_def_enc));
729 		NET_UNLOCK();
730 		return (error);
731 	case IPCTL_IPSEC_AUTH_ALGORITHM:
732 		NET_LOCK();
733 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
734 		    ipsec_def_auth, sizeof(ipsec_def_auth));
735 		NET_UNLOCK();
736 		return (error);
737 	case IPCTL_IPSEC_IPCOMP_ALGORITHM:
738 		NET_LOCK();
739 		error = sysctl_tstring(oldp, oldlenp, newp, newlen,
740 		    ipsec_def_comp, sizeof(ipsec_def_comp));
741 		NET_UNLOCK();
742 		return (error);
743 	case IPCTL_IPSEC_STATS:
744 		return (ipsec_sysctl_ipsecstat(oldp, oldlenp, newp));
745 	default:
746 		if (name[0] < IPSEC_MAXID) {
747 			NET_LOCK();
748 			error = sysctl_int_arr(ipsecctl_vars, name, namelen,
749 			    oldp, oldlenp, newp, newlen);
750 			NET_UNLOCK();
751 			return (error);
752 		}
753 		return (EOPNOTSUPP);
754 	}
755 }
756 
757 int
758 esp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
759     size_t newlen)
760 {
761 	int error;
762 
763 	/* All sysctl names at this level are terminal. */
764 	if (namelen != 1)
765 		return (ENOTDIR);
766 
767 	switch (name[0]) {
768 	case ESPCTL_STATS:
769 		return (esp_sysctl_espstat(oldp, oldlenp, newp));
770 	default:
771 		if (name[0] < ESPCTL_MAXID) {
772 			NET_LOCK();
773 			error = sysctl_int_arr(espctl_vars, name, namelen,
774 			    oldp, oldlenp, newp, newlen);
775 			NET_UNLOCK();
776 			return (error);
777 		}
778 		return (ENOPROTOOPT);
779 	}
780 }
781 
782 int
783 esp_sysctl_espstat(void *oldp, size_t *oldlenp, void *newp)
784 {
785 	struct espstat espstat;
786 
787 	CTASSERT(sizeof(espstat) == (esps_ncounters * sizeof(uint64_t)));
788 	memset(&espstat, 0, sizeof espstat);
789 	counters_read(espcounters, (uint64_t *)&espstat, esps_ncounters);
790 	return (sysctl_rdstruct(oldp, oldlenp, newp, &espstat,
791 	    sizeof(espstat)));
792 }
793 
794 int
795 ah_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
796     size_t newlen)
797 {
798 	int error;
799 
800 	/* All sysctl names at this level are terminal. */
801 	if (namelen != 1)
802 		return (ENOTDIR);
803 
804 	switch (name[0]) {
805 	case AHCTL_STATS:
806 		return ah_sysctl_ahstat(oldp, oldlenp, newp);
807 	default:
808 		if (name[0] < AHCTL_MAXID) {
809 			NET_LOCK();
810 			error = sysctl_int_arr(ahctl_vars, name, namelen,
811 			    oldp, oldlenp, newp, newlen);
812 			NET_UNLOCK();
813 			return (error);
814 		}
815 		return (ENOPROTOOPT);
816 	}
817 }
818 
819 int
820 ah_sysctl_ahstat(void *oldp, size_t *oldlenp, void *newp)
821 {
822 	struct ahstat ahstat;
823 
824 	CTASSERT(sizeof(ahstat) == (ahs_ncounters * sizeof(uint64_t)));
825 	memset(&ahstat, 0, sizeof ahstat);
826 	counters_read(ahcounters, (uint64_t *)&ahstat, ahs_ncounters);
827 	return (sysctl_rdstruct(oldp, oldlenp, newp, &ahstat, sizeof(ahstat)));
828 }
829 
830 int
831 ipcomp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
832     size_t newlen)
833 {
834 	int error;
835 
836 	/* All sysctl names at this level are terminal. */
837 	if (namelen != 1)
838 		return (ENOTDIR);
839 
840 	switch (name[0]) {
841 	case IPCOMPCTL_STATS:
842 		return ipcomp_sysctl_ipcompstat(oldp, oldlenp, newp);
843 	default:
844 		if (name[0] < IPCOMPCTL_MAXID) {
845 			NET_LOCK();
846 			error = sysctl_int_arr(ipcompctl_vars, name, namelen,
847 			    oldp, oldlenp, newp, newlen);
848 			NET_UNLOCK();
849 			return (error);
850 		}
851 		return (ENOPROTOOPT);
852 	}
853 }
854 
855 int
856 ipcomp_sysctl_ipcompstat(void *oldp, size_t *oldlenp, void *newp)
857 {
858 	struct ipcompstat ipcompstat;
859 
860 	CTASSERT(sizeof(ipcompstat) == (ipcomps_ncounters * sizeof(uint64_t)));
861 	memset(&ipcompstat, 0, sizeof ipcompstat);
862 	counters_read(ipcompcounters, (uint64_t *)&ipcompstat,
863 	    ipcomps_ncounters);
864 	return (sysctl_rdstruct(oldp, oldlenp, newp, &ipcompstat,
865 	    sizeof(ipcompstat)));
866 }
867 
868 int
869 ipsec_sysctl_ipsecstat(void *oldp, size_t *oldlenp, void *newp)
870 {
871 	struct ipsecstat ipsecstat;
872 
873 	CTASSERT(sizeof(ipsecstat) == (ipsec_ncounters * sizeof(uint64_t)));
874 	memset(&ipsecstat, 0, sizeof ipsecstat);
875 	counters_read(ipseccounters, (uint64_t *)&ipsecstat, ipsec_ncounters);
876 	return (sysctl_rdstruct(oldp, oldlenp, newp, &ipsecstat,
877 	    sizeof(ipsecstat)));
878 }
879 
880 /* IPv4 AH wrapper. */
881 int
882 ah4_input(struct mbuf **mp, int *offp, int proto, int af)
883 {
884 	if (
885 #if NPF > 0
886 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
887 #endif
888 	    !ah_enable)
889 		return rip_input(mp, offp, proto, af);
890 
891 	ipsec_common_input(*mp, *offp, offsetof(struct ip, ip_p), AF_INET,
892 	    proto, 0);
893 	return IPPROTO_DONE;
894 }
895 
896 void
897 ah4_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
898 {
899 	if (sa->sa_family != AF_INET ||
900 	    sa->sa_len != sizeof(struct sockaddr_in))
901 		return;
902 
903 	ipsec_common_ctlinput(rdomain, cmd, sa, v, IPPROTO_AH);
904 }
905 
906 /* IPv4 ESP wrapper. */
907 int
908 esp4_input(struct mbuf **mp, int *offp, int proto, int af)
909 {
910 	if (
911 #if NPF > 0
912 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
913 #endif
914 	    !esp_enable)
915 		return rip_input(mp, offp, proto, af);
916 
917 	ipsec_common_input(*mp, *offp, offsetof(struct ip, ip_p), AF_INET,
918 	    proto, 0);
919 	return IPPROTO_DONE;
920 }
921 
922 /* IPv4 IPCOMP wrapper */
923 int
924 ipcomp4_input(struct mbuf **mp, int *offp, int proto, int af)
925 {
926 	if (
927 #if NPF > 0
928 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
929 #endif
930 	    !ipcomp_enable)
931 		return rip_input(mp, offp, proto, af);
932 
933 	ipsec_common_input(*mp, *offp, offsetof(struct ip, ip_p), AF_INET,
934 	    proto, 0);
935 	return IPPROTO_DONE;
936 }
937 
938 void
939 ipsec_common_ctlinput(u_int rdomain, int cmd, struct sockaddr *sa,
940     void *v, int proto)
941 {
942 	struct ip *ip = v;
943 
944 	if (cmd == PRC_MSGSIZE && ip && ip_mtudisc && ip->ip_v == 4) {
945 		struct tdb *tdbp;
946 		struct sockaddr_in dst;
947 		struct icmp *icp;
948 		int hlen = ip->ip_hl << 2;
949 		u_int32_t spi, mtu;
950 		ssize_t adjust;
951 
952 		/* Find the right MTU. */
953 		icp = (struct icmp *)((caddr_t) ip -
954 		    offsetof(struct icmp, icmp_ip));
955 		mtu = ntohs(icp->icmp_nextmtu);
956 
957 		/*
958 		 * Ignore the packet, if we do not receive a MTU
959 		 * or the MTU is too small to be acceptable.
960 		 */
961 		if (mtu < 296)
962 			return;
963 
964 		memset(&dst, 0, sizeof(struct sockaddr_in));
965 		dst.sin_family = AF_INET;
966 		dst.sin_len = sizeof(struct sockaddr_in);
967 		dst.sin_addr.s_addr = ip->ip_dst.s_addr;
968 
969 		memcpy(&spi, (caddr_t)ip + hlen, sizeof(u_int32_t));
970 
971 		tdbp = gettdb_rev(rdomain, spi, (union sockaddr_union *)&dst,
972 		    proto);
973 		if (tdbp == NULL || tdbp->tdb_flags & TDBF_INVALID)
974 			return;
975 
976 		/* Walk the chain backwards to the first tdb */
977 		NET_ASSERT_LOCKED();
978 		for (; tdbp; tdbp = tdbp->tdb_inext) {
979 			if (tdbp->tdb_flags & TDBF_INVALID ||
980 			    (adjust = ipsec_hdrsz(tdbp)) == -1)
981 				return;
982 
983 			mtu -= adjust;
984 
985 			/* Store adjusted MTU in tdb */
986 			tdbp->tdb_mtu = mtu;
987 			tdbp->tdb_mtutimeout = gettime() +
988 			    ip_mtudisc_timeout;
989 			DPRINTF(("%s: spi %08x mtu %d adjust %ld\n", __func__,
990 			    ntohl(tdbp->tdb_spi), tdbp->tdb_mtu,
991 			    adjust));
992 		}
993 	}
994 }
995 
996 void
997 udpencap_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
998 {
999 	struct ip *ip = v;
1000 	struct tdb *tdbp;
1001 	struct icmp *icp;
1002 	u_int32_t mtu;
1003 	ssize_t adjust;
1004 	struct sockaddr_in dst, src;
1005 	union sockaddr_union *su_dst, *su_src;
1006 
1007 	NET_ASSERT_LOCKED();
1008 
1009 	icp = (struct icmp *)((caddr_t) ip - offsetof(struct icmp, icmp_ip));
1010 	mtu = ntohs(icp->icmp_nextmtu);
1011 
1012 	/*
1013 	 * Ignore the packet, if we do not receive a MTU
1014 	 * or the MTU is too small to be acceptable.
1015 	 */
1016 	if (mtu < 296)
1017 		return;
1018 
1019 	memset(&dst, 0, sizeof(dst));
1020 	dst.sin_family = AF_INET;
1021 	dst.sin_len = sizeof(struct sockaddr_in);
1022 	dst.sin_addr.s_addr = ip->ip_dst.s_addr;
1023 	su_dst = (union sockaddr_union *)&dst;
1024 	memset(&src, 0, sizeof(src));
1025 	src.sin_family = AF_INET;
1026 	src.sin_len = sizeof(struct sockaddr_in);
1027 	src.sin_addr.s_addr = ip->ip_src.s_addr;
1028 	su_src = (union sockaddr_union *)&src;
1029 
1030 	tdbp = gettdbbysrcdst_rev(rdomain, 0, su_src, su_dst,
1031 	    IPPROTO_ESP);
1032 
1033 	for (; tdbp != NULL; tdbp = tdbp->tdb_snext) {
1034 		if (tdbp->tdb_sproto == IPPROTO_ESP &&
1035 		    ((tdbp->tdb_flags & (TDBF_INVALID|TDBF_UDPENCAP)) ==
1036 		    TDBF_UDPENCAP) &&
1037 		    !memcmp(&tdbp->tdb_dst, &dst, su_dst->sa.sa_len) &&
1038 		    !memcmp(&tdbp->tdb_src, &src, su_src->sa.sa_len)) {
1039 			if ((adjust = ipsec_hdrsz(tdbp)) != -1) {
1040 				/* Store adjusted MTU in tdb */
1041 				tdbp->tdb_mtu = mtu - adjust;
1042 				tdbp->tdb_mtutimeout = gettime() +
1043 				    ip_mtudisc_timeout;
1044 				DPRINTF(("%s: spi %08x mtu %d adjust %ld\n",
1045 				    __func__,
1046 				    ntohl(tdbp->tdb_spi), tdbp->tdb_mtu,
1047 				    adjust));
1048 			}
1049 		}
1050 	}
1051 }
1052 
1053 void
1054 esp4_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
1055 {
1056 	if (sa->sa_family != AF_INET ||
1057 	    sa->sa_len != sizeof(struct sockaddr_in))
1058 		return;
1059 
1060 	ipsec_common_ctlinput(rdomain, cmd, sa, v, IPPROTO_ESP);
1061 }
1062 
1063 #ifdef INET6
1064 /* IPv6 AH wrapper. */
1065 int
1066 ah6_input(struct mbuf **mp, int *offp, int proto, int af)
1067 {
1068 	int l = 0;
1069 	int protoff, nxt;
1070 	struct ip6_ext ip6e;
1071 
1072 	if (
1073 #if NPF > 0
1074 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
1075 #endif
1076 	    !ah_enable)
1077 		return rip6_input(mp, offp, proto, af);
1078 
1079 	if (*offp < sizeof(struct ip6_hdr)) {
1080 		DPRINTF(("%s: bad offset\n", __func__));
1081 		ahstat_inc(ahs_hdrops);
1082 		m_freemp(mp);
1083 		return IPPROTO_DONE;
1084 	} else if (*offp == sizeof(struct ip6_hdr)) {
1085 		protoff = offsetof(struct ip6_hdr, ip6_nxt);
1086 	} else {
1087 		/* Chase down the header chain... */
1088 		protoff = sizeof(struct ip6_hdr);
1089 		nxt = (mtod(*mp, struct ip6_hdr *))->ip6_nxt;
1090 
1091 		do {
1092 			protoff += l;
1093 			m_copydata(*mp, protoff, sizeof(ip6e),
1094 			    (caddr_t) &ip6e);
1095 
1096 			if (nxt == IPPROTO_AH)
1097 				l = (ip6e.ip6e_len + 2) << 2;
1098 			else
1099 				l = (ip6e.ip6e_len + 1) << 3;
1100 #ifdef DIAGNOSTIC
1101 			if (l <= 0)
1102 				panic("ah6_input: l went zero or negative");
1103 #endif
1104 
1105 			nxt = ip6e.ip6e_nxt;
1106 		} while (protoff + l < *offp);
1107 
1108 		/* Malformed packet check */
1109 		if (protoff + l != *offp) {
1110 			DPRINTF(("%s: bad packet header chain\n", __func__));
1111 			ahstat_inc(ahs_hdrops);
1112 			m_freemp(mp);
1113 			return IPPROTO_DONE;
1114 		}
1115 		protoff += offsetof(struct ip6_ext, ip6e_nxt);
1116 	}
1117 	ipsec_common_input(*mp, *offp, protoff, AF_INET6, proto, 0);
1118 	return IPPROTO_DONE;
1119 }
1120 
1121 /* IPv6 ESP wrapper. */
1122 int
1123 esp6_input(struct mbuf **mp, int *offp, int proto, int af)
1124 {
1125 	int l = 0;
1126 	int protoff, nxt;
1127 	struct ip6_ext ip6e;
1128 
1129 	if (
1130 #if NPF > 0
1131 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
1132 #endif
1133 	    !esp_enable)
1134 		return rip6_input(mp, offp, proto, af);
1135 
1136 	if (*offp < sizeof(struct ip6_hdr)) {
1137 		DPRINTF(("%s: bad offset\n", __func__));
1138 		espstat_inc(esps_hdrops);
1139 		m_freemp(mp);
1140 		return IPPROTO_DONE;
1141 	} else if (*offp == sizeof(struct ip6_hdr)) {
1142 		protoff = offsetof(struct ip6_hdr, ip6_nxt);
1143 	} else {
1144 		/* Chase down the header chain... */
1145 		protoff = sizeof(struct ip6_hdr);
1146 		nxt = (mtod(*mp, struct ip6_hdr *))->ip6_nxt;
1147 
1148 		do {
1149 			protoff += l;
1150 			m_copydata(*mp, protoff, sizeof(ip6e),
1151 			    (caddr_t) &ip6e);
1152 
1153 			if (nxt == IPPROTO_AH)
1154 				l = (ip6e.ip6e_len + 2) << 2;
1155 			else
1156 				l = (ip6e.ip6e_len + 1) << 3;
1157 #ifdef DIAGNOSTIC
1158 			if (l <= 0)
1159 				panic("esp6_input: l went zero or negative");
1160 #endif
1161 
1162 			nxt = ip6e.ip6e_nxt;
1163 		} while (protoff + l < *offp);
1164 
1165 		/* Malformed packet check */
1166 		if (protoff + l != *offp) {
1167 			DPRINTF(("%s: bad packet header chain\n", __func__));
1168 			espstat_inc(esps_hdrops);
1169 			m_freemp(mp);
1170 			return IPPROTO_DONE;
1171 		}
1172 		protoff += offsetof(struct ip6_ext, ip6e_nxt);
1173 	}
1174 	ipsec_common_input(*mp, *offp, protoff, AF_INET6, proto, 0);
1175 	return IPPROTO_DONE;
1176 
1177 }
1178 
1179 /* IPv6 IPcomp wrapper */
1180 int
1181 ipcomp6_input(struct mbuf **mp, int *offp, int proto, int af)
1182 {
1183 	int l = 0;
1184 	int protoff, nxt;
1185 	struct ip6_ext ip6e;
1186 
1187 	if (
1188 #if NPF > 0
1189 	    ((*mp)->m_pkthdr.pf.flags & PF_TAG_DIVERTED) ||
1190 #endif
1191 	    !ipcomp_enable)
1192 		return rip6_input(mp, offp, proto, af);
1193 
1194 	if (*offp < sizeof(struct ip6_hdr)) {
1195 		DPRINTF(("%s: bad offset\n", __func__));
1196 		ipcompstat_inc(ipcomps_hdrops);
1197 		m_freemp(mp);
1198 		return IPPROTO_DONE;
1199 	} else if (*offp == sizeof(struct ip6_hdr)) {
1200 		protoff = offsetof(struct ip6_hdr, ip6_nxt);
1201 	} else {
1202 		/* Chase down the header chain... */
1203 		protoff = sizeof(struct ip6_hdr);
1204 		nxt = (mtod(*mp, struct ip6_hdr *))->ip6_nxt;
1205 
1206 		do {
1207 			protoff += l;
1208 			m_copydata(*mp, protoff, sizeof(ip6e),
1209 			    (caddr_t) &ip6e);
1210 			if (nxt == IPPROTO_AH)
1211 				l = (ip6e.ip6e_len + 2) << 2;
1212 			else
1213 				l = (ip6e.ip6e_len + 1) << 3;
1214 #ifdef DIAGNOSTIC
1215 			if (l <= 0)
1216 				panic("l went zero or negative");
1217 #endif
1218 
1219 			nxt = ip6e.ip6e_nxt;
1220 		} while (protoff + l < *offp);
1221 
1222 		/* Malformed packet check */
1223 		if (protoff + l != *offp) {
1224 			DPRINTF(("%s: bad packet header chain\n", __func__));
1225 			ipcompstat_inc(ipcomps_hdrops);
1226 			m_freemp(mp);
1227 			return IPPROTO_DONE;
1228 		}
1229 
1230 		protoff += offsetof(struct ip6_ext, ip6e_nxt);
1231 	}
1232 	ipsec_common_input(*mp, *offp, protoff, AF_INET6, proto, 0);
1233 	return IPPROTO_DONE;
1234 }
1235 #endif /* INET6 */
1236 
1237 int
1238 ipsec_forward_check(struct mbuf *m, int hlen, int af)
1239 {
1240 	struct tdb *tdb;
1241 	struct tdb_ident *tdbi;
1242 	struct m_tag *mtag;
1243 	int error = 0;
1244 
1245 	/*
1246 	 * IPsec policy check for forwarded packets. Look at
1247 	 * inner-most IPsec SA used.
1248 	 */
1249 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
1250 	if (mtag != NULL) {
1251 		tdbi = (struct tdb_ident *)(mtag + 1);
1252 		tdb = gettdb(tdbi->rdomain, tdbi->spi, &tdbi->dst, tdbi->proto);
1253 	} else
1254 		tdb = NULL;
1255 	ipsp_spd_lookup(m, af, hlen, &error, IPSP_DIRECTION_IN, tdb, NULL, 0);
1256 
1257 	return error;
1258 }
1259 
1260 int
1261 ipsec_local_check(struct mbuf *m, int hlen, int proto, int af)
1262 {
1263 	struct tdb *tdb;
1264 	struct tdb_ident *tdbi;
1265 	struct m_tag *mtag;
1266 	int error = 0;
1267 
1268 	/*
1269 	 * If it's a protected packet for us, skip the policy check.
1270 	 * That's because we really only care about the properties of
1271 	 * the protected packet, and not the intermediate versions.
1272 	 * While this is not the most paranoid setting, it allows
1273 	 * some flexibility in handling nested tunnels (in setting up
1274 	 * the policies).
1275 	 */
1276 	if ((proto == IPPROTO_ESP) || (proto == IPPROTO_AH) ||
1277 	    (proto == IPPROTO_IPCOMP))
1278 		return 0;
1279 
1280 	/*
1281 	 * If the protected packet was tunneled, then we need to
1282 	 * verify the protected packet's information, not the
1283 	 * external headers. Thus, skip the policy lookup for the
1284 	 * external packet, and keep the IPsec information linked on
1285 	 * the packet header (the encapsulation routines know how
1286 	 * to deal with that).
1287 	 */
1288 	if ((proto == IPPROTO_IPV4) || (proto == IPPROTO_IPV6))
1289 		return 0;
1290 
1291 	/*
1292 	 * When processing IPv6 header chains, do not look at the
1293 	 * outer header.  The inner protocol is relevant and will
1294 	 * be checked by the local delivery loop later.
1295 	 */
1296 	if ((af == AF_INET6) && ((proto == IPPROTO_DSTOPTS) ||
1297 	    (proto == IPPROTO_ROUTING) || (proto == IPPROTO_FRAGMENT)))
1298 		return 0;
1299 
1300 	/*
1301 	 * If the protected packet is TCP or UDP, we'll do the
1302 	 * policy check in the respective input routine, so we can
1303 	 * check for bypass sockets.
1304 	 */
1305 	if ((proto == IPPROTO_TCP) || (proto == IPPROTO_UDP))
1306 		return 0;
1307 
1308 	/*
1309 	 * IPsec policy check for local-delivery packets. Look at the
1310 	 * inner-most SA that protected the packet. This is in fact
1311 	 * a bit too restrictive (it could end up causing packets to
1312 	 * be dropped that semantically follow the policy, e.g., in
1313 	 * certain SA-bundle configurations); but the alternative is
1314 	 * very complicated (and requires keeping track of what
1315 	 * kinds of tunneling headers have been seen in-between the
1316 	 * IPsec headers), and I don't think we lose much functionality
1317 	 * that's needed in the real world (who uses bundles anyway ?).
1318 	 */
1319 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
1320 	if (mtag) {
1321 		tdbi = (struct tdb_ident *)(mtag + 1);
1322 		tdb = gettdb(tdbi->rdomain, tdbi->spi, &tdbi->dst,
1323 		    tdbi->proto);
1324 	} else
1325 		tdb = NULL;
1326 	ipsp_spd_lookup(m, af, hlen, &error, IPSP_DIRECTION_IN,
1327 	    tdb, NULL, 0);
1328 
1329 	return error;
1330 }
1331