xref: /openbsd/sys/netinet/ip_ipip.c (revision 3d8817e4)
1 /*	$OpenBSD: ip_ipip.c,v 1.47 2010/05/11 09:36:07 claudio Exp $ */
2 /*
3  * The authors of this code are John Ioannidis (ji@tla.org),
4  * Angelos D. Keromytis (kermit@csd.uch.gr) and
5  * Niels Provos (provos@physnet.uni-hamburg.de).
6  *
7  * The original version of this code was written by John Ioannidis
8  * for BSD/OS in Athens, Greece, in November 1995.
9  *
10  * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996,
11  * by Angelos D. Keromytis.
12  *
13  * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis
14  * and Niels Provos.
15  *
16  * Additional features in 1999 by Angelos D. Keromytis.
17  *
18  * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis,
19  * Angelos D. Keromytis and Niels Provos.
20  * Copyright (c) 2001, Angelos D. Keromytis.
21  *
22  * Permission to use, copy, and modify this software with or without fee
23  * is hereby granted, provided that this entire notice is included in
24  * all copies of any software which is or includes a copy or
25  * modification of this software.
26  * You may use this code under the GNU public license if you so wish. Please
27  * contribute changes back to the authors under this freer than GPL license
28  * so that we may further the use of strong encryption without limitations to
29  * all.
30  *
31  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
32  * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
33  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
34  * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
35  * PURPOSE.
36  */
37 
38 /*
39  * IP-inside-IP processing
40  */
41 
42 #include "pf.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/mbuf.h>
47 #include <sys/socket.h>
48 #include <sys/proc.h>
49 #include <sys/sysctl.h>
50 
51 #include <net/if.h>
52 #include <net/route.h>
53 #include <net/netisr.h>
54 #include <net/bpf.h>
55 
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/ip.h>
59 #include <netinet/in_pcb.h>
60 #include <netinet/in_var.h>
61 #include <netinet/ip_var.h>
62 #include <netinet/ip_ecn.h>
63 
64 #ifdef MROUTING
65 #include <netinet/ip_mroute.h>
66 #endif
67 
68 #include <netinet/ip_ipsp.h>
69 #include <netinet/ip_ipip.h>
70 
71 #include "bpfilter.h"
72 
73 #if NPF > 0
74 #include <net/pfvar.h>
75 #endif
76 
77 #ifdef ENCDEBUG
78 #define DPRINTF(x)	if (encdebug) printf x
79 #else
80 #define DPRINTF(x)
81 #endif
82 
83 /*
84  * We can control the acceptance of IP4 packets by altering the sysctl
85  * net.inet.ipip.allow value.  Zero means drop them, all else is acceptance.
86  */
87 int ipip_allow = 0;
88 
89 struct ipipstat ipipstat;
90 
91 #ifdef INET6
92 /*
93  * Really only a wrapper for ipip_input(), for use with IPv6.
94  */
95 int
96 ip4_input6(struct mbuf **m, int *offp, int proto)
97 {
98 	/* If we do not accept IP-in-IP explicitly, drop.  */
99 	if (!ipip_allow && ((*m)->m_flags & (M_AUTH|M_CONF)) == 0) {
100 		DPRINTF(("ip4_input6(): dropped due to policy\n"));
101 		ipipstat.ipips_pdrops++;
102 		m_freem(*m);
103 		return IPPROTO_DONE;
104 	}
105 
106 	ipip_input(*m, *offp, NULL, proto);
107 	return IPPROTO_DONE;
108 }
109 #endif /* INET6 */
110 
111 #ifdef INET
112 /*
113  * Really only a wrapper for ipip_input(), for use with IPv4.
114  */
115 void
116 ip4_input(struct mbuf *m, ...)
117 {
118 	struct ip *ip;
119 	va_list ap;
120 	int iphlen;
121 
122 	/* If we do not accept IP-in-IP explicitly, drop.  */
123 	if (!ipip_allow && (m->m_flags & (M_AUTH|M_CONF)) == 0) {
124 		DPRINTF(("ip4_input(): dropped due to policy\n"));
125 		ipipstat.ipips_pdrops++;
126 		m_freem(m);
127 		return;
128 	}
129 
130 	va_start(ap, m);
131 	iphlen = va_arg(ap, int);
132 	va_end(ap);
133 
134 	ip = mtod(m, struct ip *);
135 
136 	ipip_input(m, iphlen, NULL, ip->ip_p);
137 }
138 #endif /* INET */
139 
140 /*
141  * ipip_input gets called when we receive an IP{46} encapsulated packet,
142  * either because we got it at a real interface, or because AH or ESP
143  * were being used in tunnel mode (in which case the rcvif element will
144  * contain the address of the encX interface associated with the tunnel.
145  */
146 
147 void
148 ipip_input(struct mbuf *m, int iphlen, struct ifnet *gifp, int proto)
149 {
150 	struct sockaddr_in *sin;
151 	struct ifnet *ifp;
152 	struct ifaddr *ifa;
153 	struct ifqueue *ifq = NULL;
154 	struct ip *ipo;
155 	u_int rdomain;
156 #ifdef INET6
157 	struct sockaddr_in6 *sin6;
158 	struct ip6_hdr *ip6;
159 	u_int8_t itos;
160 #endif
161 	int isr;
162 	int hlen, s;
163 	u_int8_t otos;
164 	u_int8_t v;
165 	sa_family_t af;
166 
167 	ipipstat.ipips_ipackets++;
168 
169 	m_copydata(m, 0, 1, &v);
170 
171 	switch (v >> 4) {
172 #ifdef INET
173 	case 4:
174 		hlen = sizeof(struct ip);
175 		break;
176 #endif /* INET */
177 #ifdef INET6
178 	case 6:
179 		hlen = sizeof(struct ip6_hdr);
180 		break;
181 #endif
182 	default:
183 		ipipstat.ipips_family++;
184 		m_freem(m);
185 		return /* EAFNOSUPPORT */;
186 	}
187 
188 	/* Bring the IP header in the first mbuf, if not there already */
189 	if (m->m_len < hlen) {
190 		if ((m = m_pullup(m, hlen)) == NULL) {
191 			DPRINTF(("ipip_input(): m_pullup() failed\n"));
192 			ipipstat.ipips_hdrops++;
193 			return;
194 		}
195 	}
196 
197 
198 	/* Keep outer ecn field. */
199 	switch (v >> 4) {
200 #ifdef INET
201 	case 4:
202 		ipo = mtod(m, struct ip *);
203 		otos = ipo->ip_tos;
204 		break;
205 #endif /* INET */
206 #ifdef INET6
207 	case 6:
208 		ip6 = mtod(m, struct ip6_hdr *);
209 		otos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
210 		break;
211 #endif
212 	default:
213 		panic("ipip_input: should never reach here");
214 	}
215 
216 	/* Remove outer IP header */
217 	m_adj(m, iphlen);
218 
219 	/* Sanity check */
220 	if (m->m_pkthdr.len < sizeof(struct ip)) {
221 		ipipstat.ipips_hdrops++;
222 		m_freem(m);
223 		return;
224 	}
225 
226 	switch (proto) {
227 #ifdef INET
228 	case IPPROTO_IPV4:
229 		hlen = sizeof(struct ip);
230 		break;
231 #endif /* INET */
232 
233 #ifdef INET6
234 	case IPPROTO_IPV6:
235 		hlen = sizeof(struct ip6_hdr);
236 		break;
237 #endif
238 	default:
239 		ipipstat.ipips_family++;
240 		m_freem(m);
241 		return; /* EAFNOSUPPORT */
242 	}
243 
244 	/*
245 	 * Bring the inner header into the first mbuf, if not there already.
246 	 */
247 	if (m->m_len < hlen) {
248 		if ((m = m_pullup(m, hlen)) == NULL) {
249 			DPRINTF(("ipip_input(): m_pullup() failed\n"));
250 			ipipstat.ipips_hdrops++;
251 			return;
252 		}
253 	}
254 
255 	/*
256 	 * RFC 1853 specifies that the inner TTL should not be touched on
257 	 * decapsulation. There's no reason this comment should be here, but
258 	 * this is as good as any a position.
259 	 */
260 
261 	/* Some sanity checks in the inner IP header */
262 	switch (proto) {
263 #ifdef INET
264     	case IPPROTO_IPV4:
265 		ipo = mtod(m, struct ip *);
266 #ifdef INET6
267 		ip6 = NULL;
268 #endif
269 		if (!ip_ecn_egress(ECN_ALLOWED, &otos, &ipo->ip_tos)) {
270 			m_freem(m);
271 			return;
272 		}
273 		break;
274 #endif /* INET */
275 #ifdef INET6
276     	case IPPROTO_IPV6:
277 #ifdef INET
278 		ipo = NULL;
279 #endif
280 		ip6 = mtod(m, struct ip6_hdr *);
281 		itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
282 		if (!ip_ecn_egress(ECN_ALLOWED, &otos, &itos)) {
283 			m_freem(m);
284 			return;
285 		}
286 		ip6->ip6_flow &= ~htonl(0xff << 20);
287 		ip6->ip6_flow |= htonl((u_int32_t) itos << 20);
288 		break;
289 #endif
290 	default:
291 #ifdef INET
292 		ipo = NULL;
293 #endif
294 #ifdef INET6
295 		ip6 = NULL;
296 #endif
297 	}
298 
299 	/* Check for local address spoofing. */
300 	if ((m->m_pkthdr.rcvif == NULL ||
301 	    !(m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK)) &&
302 	    ipip_allow != 2) {
303 		rdomain = rtable_l2(m->m_pkthdr.rdomain);
304 		TAILQ_FOREACH(ifp, &ifnet, if_list) {
305 			if (ifp->if_rdomain != rdomain)
306 				continue;
307 			TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
308 #ifdef INET
309 				if (ipo) {
310 					if (ifa->ifa_addr->sa_family !=
311 					    AF_INET)
312 						continue;
313 
314 					sin = (struct sockaddr_in *)
315 					    ifa->ifa_addr;
316 					if (sin->sin_addr.s_addr ==
317 					    ipo->ip_src.s_addr)	{
318 						ipipstat.ipips_spoof++;
319 						m_freem(m);
320 						return;
321 					}
322 				}
323 #endif /* INET */
324 #ifdef INET6
325 				if (ip6) {
326 					if (ifa->ifa_addr->sa_family !=
327 					    AF_INET6)
328 						continue;
329 
330 					sin6 = (struct sockaddr_in6 *)
331 					    ifa->ifa_addr;
332 					if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
333 					    &ip6->ip6_src)) {
334 						ipipstat.ipips_spoof++;
335 						m_freem(m);
336 						return;
337 					}
338 
339 				}
340 #endif /* INET6 */
341 			}
342 		}
343 	}
344 
345 	/* Statistics */
346 	ipipstat.ipips_ibytes += m->m_pkthdr.len - iphlen;
347 
348 	/*
349 	 * Interface pointer stays the same; if no IPsec processing has
350 	 * been done (or will be done), this will point to a normal
351 	 * interface. Otherwise, it'll point to an enc interface, which
352 	 * will allow a packet filter to distinguish between secure and
353 	 * untrusted packets.
354 	 */
355 
356 	switch (proto) {
357 #ifdef INET
358 	case IPPROTO_IPV4:
359 		ifq = &ipintrq;
360 		isr = NETISR_IP;
361 		af = AF_INET;
362 		break;
363 #endif
364 #ifdef INET6
365 	case IPPROTO_IPV6:
366 		ifq = &ip6intrq;
367 		isr = NETISR_IPV6;
368 		af = AF_INET6;
369 		break;
370 #endif
371 	default:
372 		panic("ipip_input: should never reach here");
373 	}
374 
375 #if NBPFILTER > 0
376 	if (gifp && gifp->if_bpf)
377 		bpf_mtap_af(gifp->if_bpf, af, m, BPF_DIRECTION_IN);
378 #endif
379 #if NPF > 0
380 	pf_pkt_addr_changed(m);
381 #endif
382 
383 	s = splnet();			/* isn't it already? */
384 	if (IF_QFULL(ifq)) {
385 		IF_DROP(ifq);
386 		m_freem(m);
387 		ipipstat.ipips_qfull++;
388 
389 		splx(s);
390 
391 		DPRINTF(("ipip_input(): packet dropped because of full "
392 		    "queue\n"));
393 		return;
394 	}
395 
396 	IF_ENQUEUE(ifq, m);
397 	schednetisr(isr);
398 	splx(s);
399 	return;
400 }
401 
402 int
403 ipip_output(struct mbuf *m, struct tdb *tdb, struct mbuf **mp, int dummy,
404     int dummy2)
405 {
406 	u_int8_t tp, otos;
407 
408 #ifdef INET
409 	u_int8_t itos;
410 	struct ip *ipo;
411 #endif /* INET */
412 
413 #ifdef INET6
414 	struct ip6_hdr *ip6, *ip6o;
415 #endif /* INET6 */
416 
417 	/* XXX Deal with empty TDB source/destination addresses. */
418 
419 	m_copydata(m, 0, 1, &tp);
420 	tp = (tp >> 4) & 0xff;  /* Get the IP version number. */
421 
422 	switch (tdb->tdb_dst.sa.sa_family) {
423 #ifdef INET
424 	case AF_INET:
425 		if (tdb->tdb_src.sa.sa_family != AF_INET ||
426 		    tdb->tdb_src.sin.sin_addr.s_addr == INADDR_ANY ||
427 		    tdb->tdb_dst.sin.sin_addr.s_addr == INADDR_ANY) {
428 
429 			DPRINTF(("ipip_output(): unspecified tunnel endpoind "
430 			    "address in SA %s/%08x\n",
431 			    ipsp_address(tdb->tdb_dst), ntohl(tdb->tdb_spi)));
432 
433 			ipipstat.ipips_unspec++;
434 			m_freem(m);
435 			*mp = NULL;
436 			return EINVAL;
437 		}
438 
439 		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
440 		if (m == 0) {
441 			DPRINTF(("ipip_output(): M_PREPEND failed\n"));
442 			ipipstat.ipips_hdrops++;
443 			*mp = NULL;
444 			return ENOBUFS;
445 		}
446 
447 		ipo = mtod(m, struct ip *);
448 
449 		ipo->ip_v = IPVERSION;
450 		ipo->ip_hl = 5;
451 		ipo->ip_len = htons(m->m_pkthdr.len);
452 		ipo->ip_ttl = ip_defttl;
453 		ipo->ip_sum = 0;
454 		ipo->ip_src = tdb->tdb_src.sin.sin_addr;
455 		ipo->ip_dst = tdb->tdb_dst.sin.sin_addr;
456 
457 		/*
458 		 * We do the htons() to prevent snoopers from determining our
459 		 * endianness.
460 		 */
461 		ipo->ip_id = htons(ip_randomid());
462 
463 		/* If the inner protocol is IP... */
464 		if (tp == IPVERSION) {
465 			/* Save ECN notification */
466 			m_copydata(m, sizeof(struct ip) +
467 			    offsetof(struct ip, ip_tos),
468 			    sizeof(u_int8_t), (caddr_t) &itos);
469 
470 			ipo->ip_p = IPPROTO_IPIP;
471 
472 			/*
473 			 * We should be keeping tunnel soft-state and
474 			 * send back ICMPs if needed.
475 			 */
476 			m_copydata(m, sizeof(struct ip) +
477 			    offsetof(struct ip, ip_off),
478 			    sizeof(u_int16_t), (caddr_t) &ipo->ip_off);
479 			NTOHS(ipo->ip_off);
480 			ipo->ip_off &= ~(IP_DF | IP_MF | IP_OFFMASK);
481 			HTONS(ipo->ip_off);
482 		}
483 #ifdef INET6
484 		else if (tp == (IPV6_VERSION >> 4)) {
485 			u_int32_t itos32;
486 
487 			/* Save ECN notification. */
488 			m_copydata(m, sizeof(struct ip) +
489 			    offsetof(struct ip6_hdr, ip6_flow),
490 			    sizeof(u_int32_t), (caddr_t) &itos32);
491 			itos = ntohl(itos32) >> 20;
492 			ipo->ip_p = IPPROTO_IPV6;
493 			ipo->ip_off = 0;
494 		}
495 #endif /* INET6 */
496 		else {
497 			m_freem(m);
498 			*mp = NULL;
499 			ipipstat.ipips_family++;
500 			return EAFNOSUPPORT;
501 		}
502 
503 		otos = 0;
504 		ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
505 		ipo->ip_tos = otos;
506 		break;
507 #endif /* INET */
508 
509 #ifdef INET6
510 	case AF_INET6:
511 		if (IN6_IS_ADDR_UNSPECIFIED(&tdb->tdb_dst.sin6.sin6_addr) ||
512 		    tdb->tdb_src.sa.sa_family != AF_INET6 ||
513 		    IN6_IS_ADDR_UNSPECIFIED(&tdb->tdb_src.sin6.sin6_addr)) {
514 
515 			DPRINTF(("ipip_output(): unspecified tunnel endpoind "
516 			    "address in SA %s/%08x\n",
517 			    ipsp_address(tdb->tdb_dst), ntohl(tdb->tdb_spi)));
518 
519 			ipipstat.ipips_unspec++;
520 			m_freem(m);
521 			*mp = NULL;
522 			return ENOBUFS;
523 		}
524 
525 		/* If the inner protocol is IPv6, clear link local scope */
526 		if (tp == (IPV6_VERSION >> 4)) {
527 			/* scoped address handling */
528 			ip6 = mtod(m, struct ip6_hdr *);
529 			if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src))
530 				ip6->ip6_src.s6_addr16[1] = 0;
531 			if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst))
532 				ip6->ip6_dst.s6_addr16[1] = 0;
533 		}
534 
535 		M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT);
536 		if (m == 0) {
537 			DPRINTF(("ipip_output(): M_PREPEND failed\n"));
538 			ipipstat.ipips_hdrops++;
539 			*mp = NULL;
540 			return ENOBUFS;
541 		}
542 
543 		/* Initialize IPv6 header */
544 		ip6o = mtod(m, struct ip6_hdr *);
545 		ip6o->ip6_flow = 0;
546 		ip6o->ip6_vfc &= ~IPV6_VERSION_MASK;
547 		ip6o->ip6_vfc |= IPV6_VERSION;
548 		ip6o->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6o));
549 		ip6o->ip6_hlim = ip_defttl;
550 		in6_embedscope(&ip6o->ip6_src, &tdb->tdb_src.sin6, NULL, NULL);
551 		in6_embedscope(&ip6o->ip6_dst, &tdb->tdb_dst.sin6, NULL, NULL);
552 
553 #ifdef INET
554 		if (tp == IPVERSION) {
555 			/* Save ECN notification */
556 			m_copydata(m, sizeof(struct ip6_hdr) +
557 			    offsetof(struct ip, ip_tos), sizeof(u_int8_t),
558 			    (caddr_t) &itos);
559 
560 			/* This is really IPVERSION. */
561 			ip6o->ip6_nxt = IPPROTO_IPIP;
562 		}
563 		else
564 #endif /* INET */
565 			if (tp == (IPV6_VERSION >> 4)) {
566 				u_int32_t itos32;
567 
568 				/* Save ECN notification. */
569 				m_copydata(m, sizeof(struct ip6_hdr) +
570 				    offsetof(struct ip6_hdr, ip6_flow),
571 				    sizeof(u_int32_t), (caddr_t) &itos32);
572 				itos = ntohl(itos32) >> 20;
573 
574 				ip6o->ip6_nxt = IPPROTO_IPV6;
575 			} else {
576 				m_freem(m);
577 				*mp = NULL;
578 				ipipstat.ipips_family++;
579 				return EAFNOSUPPORT;
580 			}
581 
582 		otos = 0;
583 		ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
584 		ip6o->ip6_flow |= htonl((u_int32_t) otos << 20);
585 		break;
586 #endif /* INET6 */
587 
588 	default:
589 		DPRINTF(("ipip_output(): unsupported protocol family %d\n",
590 		    tdb->tdb_dst.sa.sa_family));
591 		m_freem(m);
592 		*mp = NULL;
593 		ipipstat.ipips_family++;
594 		return EAFNOSUPPORT;
595 	}
596 
597 	ipipstat.ipips_opackets++;
598 	*mp = m;
599 
600 #ifdef INET
601 	if (tdb->tdb_dst.sa.sa_family == AF_INET) {
602 		if (tdb->tdb_xform->xf_type == XF_IP4)
603 			tdb->tdb_cur_bytes +=
604 			    m->m_pkthdr.len - sizeof(struct ip);
605 
606 		ipipstat.ipips_obytes += m->m_pkthdr.len - sizeof(struct ip);
607 	}
608 #endif /* INET */
609 
610 #ifdef INET6
611 	if (tdb->tdb_dst.sa.sa_family == AF_INET6) {
612 		if (tdb->tdb_xform->xf_type == XF_IP4)
613 			tdb->tdb_cur_bytes +=
614 			    m->m_pkthdr.len - sizeof(struct ip6_hdr);
615 
616 		ipipstat.ipips_obytes +=
617 		    m->m_pkthdr.len - sizeof(struct ip6_hdr);
618 	}
619 #endif /* INET6 */
620 
621 	return 0;
622 }
623 
624 #ifdef IPSEC
625 int
626 ipe4_attach()
627 {
628 	return 0;
629 }
630 
631 int
632 ipe4_init(struct tdb *tdbp, struct xformsw *xsp, struct ipsecinit *ii)
633 {
634 	tdbp->tdb_xform = xsp;
635 	return 0;
636 }
637 
638 int
639 ipe4_zeroize(struct tdb *tdbp)
640 {
641 	return 0;
642 }
643 
644 void
645 ipe4_input(struct mbuf *m, ...)
646 {
647 	/* This is a rather serious mistake, so no conditional printing. */
648 	printf("ipe4_input(): should never be called\n");
649 	if (m)
650 		m_freem(m);
651 }
652 #endif	/* IPSEC */
653 
654 int
655 ipip_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
656     size_t newlen)
657 {
658 	/* All sysctl names at this level are terminal. */
659 	if (namelen != 1)
660 		return (ENOTDIR);
661 
662 	switch (name[0]) {
663 	case IPIPCTL_ALLOW:
664 		return (sysctl_int(oldp, oldlenp, newp, newlen, &ipip_allow));
665 	case IPIPCTL_STATS:
666 		if (newp != NULL)
667 			return (EPERM);
668 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
669 		    &ipipstat, sizeof(ipipstat)));
670 	default:
671 		return (ENOPROTOOPT);
672 	}
673 	/* NOTREACHED */
674 }
675