xref: /openbsd/sys/net/if_gre.c (revision cca36db2)
1 /*      $OpenBSD: if_gre.c,v 1.58 2012/04/14 09:39:47 yasuoka Exp $ */
2 /*	$NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */
3 
4 /*
5  * Copyright (c) 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Heiko W.Rupp <hwr@pilhuhn.de>
10  *
11  * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * Encapsulate L3 protocols into IP, per RFC 1701 and 1702.
37  * See gre(4) for more details.
38  * Also supported: IP in IP encapsulation (proto 55) per RFC 2004.
39  */
40 
41 #include "gre.h"
42 #if NGRE > 0
43 
44 #include "bpfilter.h"
45 #include "pf.h"
46 
47 #include <sys/param.h>
48 #include <sys/proc.h>
49 #include <sys/mbuf.h>
50 #include <sys/socket.h>
51 #include <sys/sockio.h>
52 #include <sys/kernel.h>
53 #include <sys/systm.h>
54 
55 #include <net/if.h>
56 #include <net/if_types.h>
57 #include <net/netisr.h>
58 #include <net/route.h>
59 
60 #ifdef INET
61 #include <netinet/in.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/in_var.h>
64 #include <netinet/ip.h>
65 #include <netinet/ip_var.h>
66 #include <netinet/if_ether.h>
67 #else
68 #error "if_gre used without inet"
69 #endif
70 
71 #if NBPFILTER > 0
72 #include <net/bpf.h>
73 #endif
74 
75 #if NPF > 0
76 #include <net/pfvar.h>
77 #endif
78 
79 #include <net/if_gre.h>
80 
81 #ifndef GRE_RECURSION_LIMIT
82 #define GRE_RECURSION_LIMIT	3   /* How many levels of recursion allowed */
83 #endif /* GRE_RECURSION_LIMIT */
84 
85 /*
86  * It is not easy to calculate the right value for a GRE MTU.
87  * We leave this task to the admin and use the same default that
88  * other vendors use.
89  */
90 #define GREMTU 1476
91 
92 int	gre_clone_create(struct if_clone *, int);
93 int	gre_clone_destroy(struct ifnet *);
94 
95 struct gre_softc_head gre_softc_list;
96 struct if_clone gre_cloner =
97     IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
98 
99 /*
100  * We can control the acceptance of GRE and MobileIP packets by
101  * altering the sysctl net.inet.gre.allow and net.inet.mobileip.allow values
102  * respectively. Zero means drop them, all else is acceptance.  We can also
103  * control acceptance of WCCPv1-style GRE packets through the
104  * net.inet.gre.wccp value, but be aware it depends upon normal GRE being
105  * allowed as well.
106  *
107  */
108 int gre_allow = 0;
109 int gre_wccp = 0;
110 int ip_mobile_allow = 0;
111 
112 void gre_keepalive(void *);
113 void gre_send_keepalive(void *);
114 void gre_link_state(struct gre_softc *);
115 
116 void
117 greattach(int n)
118 {
119 	LIST_INIT(&gre_softc_list);
120 	if_clone_attach(&gre_cloner);
121 }
122 
123 int
124 gre_clone_create(struct if_clone *ifc, int unit)
125 {
126 	struct gre_softc *sc;
127 	int s;
128 
129 	sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO);
130 	if (!sc)
131 		return (ENOMEM);
132 	snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d",
133 	    ifc->ifc_name, unit);
134 	sc->sc_if.if_softc = sc;
135 	sc->sc_if.if_type = IFT_TUNNEL;
136 	sc->sc_if.if_addrlen = 0;
137 	sc->sc_if.if_hdrlen = 24; /* IP + GRE */
138 	sc->sc_if.if_mtu = GREMTU;
139 	sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
140 	sc->sc_if.if_output = gre_output;
141 	sc->sc_if.if_ioctl = gre_ioctl;
142 	sc->sc_if.if_collisions = 0;
143 	sc->sc_if.if_ierrors = 0;
144 	sc->sc_if.if_oerrors = 0;
145 	sc->sc_if.if_ipackets = 0;
146 	sc->sc_if.if_opackets = 0;
147 	sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
148 	sc->g_proto = IPPROTO_GRE;
149 	sc->sc_if.if_flags |= IFF_LINK0;
150 	sc->sc_ka_state = GRE_STATE_UKNWN;
151 
152 	timeout_set(&sc->sc_ka_hold, gre_keepalive, sc);
153 	timeout_set(&sc->sc_ka_snd, gre_send_keepalive, sc);
154 
155 	if_attach(&sc->sc_if);
156 	if_alloc_sadl(&sc->sc_if);
157 
158 #if NBPFILTER > 0
159 	bpfattach(&sc->sc_if.if_bpf, &sc->sc_if, DLT_LOOP, sizeof(u_int32_t));
160 #endif
161 	s = splnet();
162 	LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
163 	splx(s);
164 
165 	return (0);
166 }
167 
168 int
169 gre_clone_destroy(struct ifnet *ifp)
170 {
171 	struct gre_softc *sc = ifp->if_softc;
172 	int s;
173 
174 	s = splnet();
175 	timeout_del(&sc->sc_ka_snd);
176 	timeout_del(&sc->sc_ka_hold);
177 	LIST_REMOVE(sc, sc_list);
178 	splx(s);
179 
180 	if_detach(ifp);
181 
182 	free(sc, M_DEVBUF);
183 	return (0);
184 }
185 
186 /*
187  * The output routine. Takes a packet and encapsulates it in the protocol
188  * given by sc->g_proto. See also RFC 1701 and RFC 2004.
189  */
190 
191 int
192 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
193 	   struct rtentry *rt)
194 {
195 	int error = 0;
196 	struct gre_softc *sc = (struct gre_softc *) (ifp->if_softc);
197 	struct greip *gh = NULL;
198 	struct ip *inp = NULL;
199 	u_int8_t ip_tos = 0;
200 	u_int16_t etype = 0;
201 	struct mobile_h mob_h;
202 	struct m_tag *mtag;
203 
204 	if ((ifp->if_flags & IFF_UP) == 0 ||
205 	    sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
206 		m_freem(m);
207 		error = ENETDOWN;
208 		goto end;
209 	}
210 
211 #ifdef DIAGNOSTIC
212 	if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.rdomain)) {
213 		printf("%s: trying to send packet on wrong domain. "
214 		    "if %d vs. mbuf %d, AF %d\n", ifp->if_xname,
215 		    ifp->if_rdomain, rtable_l2(m->m_pkthdr.rdomain),
216 		    dst->sa_family);
217 	}
218 #endif
219 
220 	/* Try to limit infinite recursion through misconfiguration. */
221 	for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
222 	     mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
223 		if (!bcmp((caddr_t)(mtag + 1), &ifp, sizeof(struct ifnet *))) {
224 			IF_DROP(&ifp->if_snd);
225 			m_freem(m);
226 			error = EIO;
227 			goto end;
228 		}
229 	}
230 
231 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(struct ifnet *), M_NOWAIT);
232 	if (mtag == NULL) {
233 		IF_DROP(&ifp->if_snd);
234 		m_freem(m);
235 		error = ENOBUFS;
236 		goto end;
237 	}
238 	bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
239 	m_tag_prepend(m, mtag);
240 
241 	m->m_flags &= ~(M_BCAST|M_MCAST);
242 
243 #if NBPFILTER > 0
244 	if (ifp->if_bpf)
245 		bpf_mtap_af(ifp->if_bpf, dst->sa_family, m, BPF_DIRECTION_OUT);
246 #endif
247 
248 	if (sc->g_proto == IPPROTO_MOBILE) {
249 		if (ip_mobile_allow == 0) {
250 			IF_DROP(&ifp->if_snd);
251 			m_freem(m);
252 			error = EACCES;
253 			goto end;
254 		}
255 
256 		if (dst->sa_family == AF_INET) {
257 			struct mbuf *m0;
258 			int msiz;
259 
260 			/*
261 			 * Make sure the complete IP header (with options)
262 			 * is in the first mbuf.
263 			 */
264 			if (m->m_len < sizeof(struct ip)) {
265 				m = m_pullup(m, sizeof(struct ip));
266 				if (m == NULL) {
267 					IF_DROP(&ifp->if_snd);
268 					error = ENOBUFS;
269 					goto end;
270 				} else
271 					inp = mtod(m, struct ip *);
272 
273 				if (m->m_len < inp->ip_hl << 2) {
274 					m = m_pullup(m, inp->ip_hl << 2);
275 					if (m == NULL) {
276 						IF_DROP(&ifp->if_snd);
277 						error = ENOBUFS;
278 						goto end;
279 					}
280 				}
281 			}
282 
283 			inp = mtod(m, struct ip *);
284 
285 			bzero(&mob_h, MOB_H_SIZ_L);
286 			mob_h.proto = (inp->ip_p) << 8;
287 			mob_h.odst = inp->ip_dst.s_addr;
288 			inp->ip_dst.s_addr = sc->g_dst.s_addr;
289 
290 			/*
291 			 * If the packet comes from our host, we only change
292 			 * the destination address in the IP header.
293 			 * Otherwise we need to save and change the source.
294 			 */
295 			if (inp->ip_src.s_addr == sc->g_src.s_addr) {
296 				msiz = MOB_H_SIZ_S;
297 			} else {
298 				mob_h.proto |= MOB_H_SBIT;
299 				mob_h.osrc = inp->ip_src.s_addr;
300 				inp->ip_src.s_addr = sc->g_src.s_addr;
301 				msiz = MOB_H_SIZ_L;
302 			}
303 
304 			HTONS(mob_h.proto);
305 			mob_h.hcrc = gre_in_cksum((u_int16_t *) &mob_h, msiz);
306 
307 			/* Squeeze in the mobility header */
308 			if ((m->m_data - msiz) < m->m_pktdat) {
309 				/* Need new mbuf */
310 				MGETHDR(m0, M_DONTWAIT, MT_HEADER);
311 				if (m0 == NULL) {
312 					IF_DROP(&ifp->if_snd);
313 					m_freem(m);
314 					error = ENOBUFS;
315 					goto end;
316 				}
317 				M_MOVE_HDR(m0, m);
318 
319 				m0->m_len = msiz + (inp->ip_hl << 2);
320 				m0->m_data += max_linkhdr;
321 				m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
322 				m->m_data += inp->ip_hl << 2;
323 				m->m_len -= inp->ip_hl << 2;
324 
325 				bcopy((caddr_t) inp, mtod(m0, caddr_t),
326 				    sizeof(struct ip));
327 
328 				m0->m_next = m;
329 				m = m0;
330 			} else {  /* we have some space left in the old one */
331 				m->m_data -= msiz;
332 				m->m_len += msiz;
333 				m->m_pkthdr.len += msiz;
334 				bcopy(inp, mtod(m, caddr_t),
335 				    inp->ip_hl << 2);
336 			}
337 
338 			/* Copy Mobility header */
339 			inp = mtod(m, struct ip *);
340 			bcopy(&mob_h, (caddr_t)(inp + 1), (unsigned) msiz);
341 			inp->ip_len = htons(ntohs(inp->ip_len) + msiz);
342 		} else {  /* AF_INET */
343 			IF_DROP(&ifp->if_snd);
344 			m_freem(m);
345 			error = EINVAL;
346 			goto end;
347 		}
348 	} else if (sc->g_proto == IPPROTO_GRE) {
349 		if (gre_allow == 0) {
350 			IF_DROP(&ifp->if_snd);
351 			m_freem(m);
352 			error = EACCES;
353 			goto end;
354 		}
355 
356 		switch(dst->sa_family) {
357 		case AF_INET:
358 			if (m->m_len < sizeof(struct ip)) {
359 				m = m_pullup(m, sizeof(struct ip));
360 				if (m == NULL) {
361 					IF_DROP(&ifp->if_snd);
362 					error = ENOBUFS;
363 					goto end;
364 				}
365 			}
366 
367 			inp = mtod(m, struct ip *);
368 			ip_tos = inp->ip_tos;
369 			etype = ETHERTYPE_IP;
370 			break;
371 #ifdef INET6
372 		case AF_INET6:
373 			etype = ETHERTYPE_IPV6;
374 			break;
375 #endif
376 #ifdef MPLS
377 		case AF_MPLS:
378 			if (m->m_flags & (M_BCAST | M_MCAST))
379 				etype = ETHERTYPE_MPLS_MCAST;
380 			else
381 				etype = ETHERTYPE_MPLS;
382 			break;
383 #endif
384 		default:
385 			IF_DROP(&ifp->if_snd);
386 			m_freem(m);
387 			error = EAFNOSUPPORT;
388 			goto end;
389 		}
390 
391 		M_PREPEND(m, sizeof(struct greip), M_DONTWAIT);
392 	} else {
393 		IF_DROP(&ifp->if_snd);
394 		m_freem(m);
395 		error = EINVAL;
396 		goto end;
397 	}
398 
399 	if (m == NULL) {
400 		IF_DROP(&ifp->if_snd);
401 		error = ENOBUFS;
402 		goto end;
403 	}
404 
405 	gh = mtod(m, struct greip *);
406 	if (sc->g_proto == IPPROTO_GRE) {
407 		/* We don't support any GRE flags for now */
408 
409 		bzero((void *) &gh->gi_g, sizeof(struct gre_h));
410 		gh->gi_ptype = htons(etype);
411 	}
412 
413 	gh->gi_pr = sc->g_proto;
414 	if (sc->g_proto != IPPROTO_MOBILE) {
415 		gh->gi_src = sc->g_src;
416 		gh->gi_dst = sc->g_dst;
417 		((struct ip *) gh)->ip_hl = (sizeof(struct ip)) >> 2;
418 		((struct ip *) gh)->ip_ttl = ip_defttl;
419 		((struct ip *) gh)->ip_tos = ip_tos;
420 		gh->gi_len = htons(m->m_pkthdr.len);
421 	}
422 
423 	ifp->if_opackets++;
424 	ifp->if_obytes += m->m_pkthdr.len;
425 
426 
427 	m->m_pkthdr.rdomain = sc->g_rtableid;
428 
429 #if NPF > 0
430 	pf_pkt_addr_changed(m);
431 #endif
432 
433 	/* Send it off */
434 	error = ip_output(m, (void *)NULL, &sc->route, 0, (void *)NULL, (void *)NULL);
435   end:
436 	if (error)
437 		ifp->if_oerrors++;
438 	return (error);
439 }
440 
441 int
442 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
443 {
444 
445 	struct ifreq *ifr = (struct ifreq *) data;
446 	struct if_laddrreq *lifr = (struct if_laddrreq *)data;
447 	struct ifkalivereq *ikar = (struct ifkalivereq *)data;
448 	struct gre_softc *sc = ifp->if_softc;
449 	int s;
450 	struct sockaddr_in si;
451 	struct sockaddr *sa = NULL;
452 	int error = 0;
453 	struct proc *prc = curproc;		/* XXX */
454 
455 	s = splnet();
456 	switch(cmd) {
457 	case SIOCSIFADDR:
458 		ifp->if_flags |= IFF_UP;
459 		break;
460 	case SIOCSIFDSTADDR:
461 		break;
462 	case SIOCSIFFLAGS:
463 		if ((ifr->ifr_flags & IFF_LINK0) != 0)
464 			sc->g_proto = IPPROTO_GRE;
465 		else
466 			sc->g_proto = IPPROTO_MOBILE;
467 		break;
468 	case SIOCSIFMTU:
469 		if (ifr->ifr_mtu < 576) {
470 			error = EINVAL;
471 			break;
472 		}
473 		ifp->if_mtu = ifr->ifr_mtu;
474 		break;
475 	case SIOCGIFMTU:
476 		ifr->ifr_mtu = sc->sc_if.if_mtu;
477 		break;
478 	case SIOCADDMULTI:
479 	case SIOCDELMULTI:
480 		if (ifr == 0) {
481 			error = EAFNOSUPPORT;
482 			break;
483 		}
484 		switch (ifr->ifr_addr.sa_family) {
485 #ifdef INET
486 		case AF_INET:
487 			break;
488 #endif
489 #ifdef INET6
490 		case AF_INET6:
491 			break;
492 #endif
493 		default:
494 			error = EAFNOSUPPORT;
495 			break;
496 		}
497 		break;
498 	case GRESPROTO:
499 		/* Check for superuser */
500 		if ((error = suser(prc, 0)) != 0)
501 			break;
502 
503 		sc->g_proto = ifr->ifr_flags;
504 		switch (sc->g_proto) {
505 		case IPPROTO_GRE:
506 			ifp->if_flags |= IFF_LINK0;
507 			break;
508 		case IPPROTO_MOBILE:
509 			ifp->if_flags &= ~IFF_LINK0;
510 			break;
511 		default:
512 			error = EPROTONOSUPPORT;
513 			break;
514 		}
515 		break;
516 	case GREGPROTO:
517 		ifr->ifr_flags = sc->g_proto;
518 		break;
519 	case GRESADDRS:
520 	case GRESADDRD:
521 		/* Check for superuser */
522 		if ((error = suser(prc, 0)) != 0)
523 			break;
524 
525 		/*
526 		 * set tunnel endpoints and mark if as up
527 		 */
528 		sa = &ifr->ifr_addr;
529 		if (cmd == GRESADDRS )
530 			sc->g_src = (satosin(sa))->sin_addr;
531 		if (cmd == GRESADDRD )
532 			sc->g_dst = (satosin(sa))->sin_addr;
533 recompute:
534 		if ((sc->g_src.s_addr != INADDR_ANY) &&
535 		    (sc->g_dst.s_addr != INADDR_ANY)) {
536 			if (sc->route.ro_rt != 0)
537 				RTFREE(sc->route.ro_rt);
538 			/* ip_output() will do the lookup */
539 			bzero(&sc->route, sizeof(sc->route));
540 			ifp->if_flags |= IFF_UP;
541 		}
542 		break;
543 	case GREGADDRS:
544 		bzero(&si, sizeof(si));
545 		si.sin_family = AF_INET;
546 		si.sin_len = sizeof(struct sockaddr_in);
547 		si.sin_addr.s_addr = sc->g_src.s_addr;
548 		sa = sintosa(&si);
549 		ifr->ifr_addr = *sa;
550 		break;
551 	case GREGADDRD:
552 		bzero(&si, sizeof(si));
553 		si.sin_family = AF_INET;
554 		si.sin_len = sizeof(struct sockaddr_in);
555 		si.sin_addr.s_addr = sc->g_dst.s_addr;
556 		sa = sintosa(&si);
557 		ifr->ifr_addr = *sa;
558 		break;
559 	case SIOCSETKALIVE:
560 		if ((error = suser(prc, 0)) != 0)
561 			break;
562 		if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
563 		    ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256) {
564 			error = EINVAL;
565 			break;
566 		}
567 		sc->sc_ka_timout = ikar->ikar_timeo;
568 		sc->sc_ka_cnt = ikar->ikar_cnt;
569 		if (sc->sc_ka_timout == 0 || sc->sc_ka_cnt == 0) {
570 			sc->sc_ka_timout = 0;
571 			sc->sc_ka_cnt = 0;
572 			sc->sc_ka_state = GRE_STATE_UKNWN;
573 			gre_link_state(sc);
574 			break;
575 		}
576 		if (!timeout_pending(&sc->sc_ka_snd)) {
577 			sc->sc_ka_holdmax = sc->sc_ka_cnt;
578 			timeout_add(&sc->sc_ka_snd, 1);
579 			timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timout *
580 			    sc->sc_ka_cnt);
581 		}
582 		break;
583 	case SIOCGETKALIVE:
584 		ikar->ikar_timeo = sc->sc_ka_timout;
585 		ikar->ikar_cnt = sc->sc_ka_cnt;
586 		break;
587 	case SIOCSLIFPHYADDR:
588 		if ((error = suser(prc, 0)) != 0)
589 			break;
590 		if (lifr->addr.ss_family != AF_INET ||
591 		    lifr->dstaddr.ss_family != AF_INET) {
592 			error = EAFNOSUPPORT;
593 			break;
594 		}
595 		if (lifr->addr.ss_len != sizeof(si) ||
596 		    lifr->dstaddr.ss_len != sizeof(si)) {
597 			error = EINVAL;
598 			break;
599 		}
600 		sc->g_src = (satosin((struct sockadrr *)&lifr->addr))->sin_addr;
601 		sc->g_dst =
602 		    (satosin((struct sockadrr *)&lifr->dstaddr))->sin_addr;
603 		goto recompute;
604 	case SIOCDIFPHYADDR:
605 		if ((error = suser(prc, 0)) != 0)
606 			break;
607 		sc->g_src.s_addr = INADDR_ANY;
608 		sc->g_dst.s_addr = INADDR_ANY;
609 		break;
610 	case SIOCGLIFPHYADDR:
611 		if (sc->g_src.s_addr == INADDR_ANY ||
612 		    sc->g_dst.s_addr == INADDR_ANY) {
613 			error = EADDRNOTAVAIL;
614 			break;
615 		}
616 		bzero(&si, sizeof(si));
617 		si.sin_family = AF_INET;
618 		si.sin_len = sizeof(struct sockaddr_in);
619 		si.sin_addr.s_addr = sc->g_src.s_addr;
620 		memcpy(&lifr->addr, &si, sizeof(si));
621 		si.sin_addr.s_addr = sc->g_dst.s_addr;
622 		memcpy(&lifr->dstaddr, &si, sizeof(si));
623 		break;
624 	case SIOCSLIFPHYRTABLE:
625 		if ((error = suser(prc, 0)) != 0)
626 			break;
627 		if (ifr->ifr_rdomainid < 0 ||
628 		    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
629 		    !rtable_exists(ifr->ifr_rdomainid)) {
630 			error = EINVAL;
631 			break;
632 		}
633 		sc->g_rtableid = ifr->ifr_rdomainid;
634 		goto recompute;
635 	case SIOCGLIFPHYRTABLE:
636 		ifr->ifr_rdomainid = sc->g_rtableid;
637 		break;
638 	default:
639 		error = ENOTTY;
640 	}
641 
642 	splx(s);
643 	return (error);
644 }
645 
646 /*
647  * do a checksum of a buffer - much like in_cksum, which operates on
648  * mbufs.
649  */
650 u_int16_t
651 gre_in_cksum(u_int16_t *p, u_int len)
652 {
653 	u_int32_t sum = 0;
654 	int nwords = len >> 1;
655 
656 	while (nwords-- != 0)
657 		sum += *p++;
658 
659 	if (len & 1) {
660 		union {
661 			u_short w;
662 			u_char c[2];
663 		} u;
664 		u.c[0] = *(u_char *) p;
665 		u.c[1] = 0;
666 		sum += u.w;
667 	}
668 
669 	/* end-around-carry */
670 	sum = (sum >> 16) + (sum & 0xffff);
671 	sum += (sum >> 16);
672 	return (~sum);
673 }
674 
675 void
676 gre_keepalive(void *arg)
677 {
678 	struct gre_softc *sc = arg;
679 
680 	if (!sc->sc_ka_timout)
681 		return;
682 
683 	sc->sc_ka_state = GRE_STATE_DOWN;
684 	gre_link_state(sc);
685 }
686 
687 void
688 gre_send_keepalive(void *arg)
689 {
690 	struct gre_softc *sc = arg;
691 	struct mbuf *m;
692 	struct ip *ip;
693 	struct gre_h *gh;
694 	struct sockaddr dst;
695 	int s;
696 
697 	if (sc->sc_ka_timout)
698 		timeout_add_sec(&sc->sc_ka_snd, sc->sc_ka_timout);
699 
700 	if (sc->g_proto != IPPROTO_GRE)
701 		return;
702 	if ((sc->sc_if.if_flags & IFF_UP) == 0 ||
703 	    sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY)
704 		return;
705 
706 	MGETHDR(m, M_DONTWAIT, MT_DATA);
707 	if (m == NULL) {
708 		sc->sc_if.if_oerrors++;
709 		return;
710 	}
711 
712 	m->m_len = m->m_pkthdr.len = sizeof(*ip) + sizeof(*gh);
713 	MH_ALIGN(m, m->m_len);
714 
715 	/* build the ip header */
716 	ip = mtod(m, struct ip *);
717 
718 	ip->ip_v = IPVERSION;
719 	ip->ip_hl = sizeof(*ip) >> 2;
720 	ip->ip_tos = IPTOS_LOWDELAY;
721 	ip->ip_len = htons(m->m_pkthdr.len);
722 	ip->ip_id = htons(ip_randomid());
723 	ip->ip_off = htons(IP_DF);
724 	ip->ip_ttl = ip_defttl;
725 	ip->ip_p = IPPROTO_GRE;
726 	ip->ip_src.s_addr = sc->g_dst.s_addr;
727 	ip->ip_dst.s_addr = sc->g_src.s_addr;
728 	ip->ip_sum = 0;
729 	ip->ip_sum = in_cksum(m, sizeof(*ip));
730 
731 	gh = (struct gre_h *)(ip + 1);
732 	/* We don't support any GRE flags for now */
733 	bzero(gh, sizeof(*gh));
734 
735 	bzero(&dst, sizeof(dst));
736 	dst.sa_family = AF_INET;
737 
738 	s = splsoftnet();
739 	/* should we care about the error? */
740 	gre_output(&sc->sc_if, m, &dst, NULL);
741 	splx(s);
742 }
743 
744 void
745 gre_recv_keepalive(struct gre_softc *sc)
746 {
747 	if (!sc->sc_ka_timout)
748 		return;
749 
750 	/* link state flap dampening */
751 	switch (sc->sc_ka_state) {
752 	case GRE_STATE_UKNWN:
753 	case GRE_STATE_DOWN:
754 		sc->sc_ka_state = GRE_STATE_HOLD;
755 		sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
756 		sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
757 		    16 * sc->sc_ka_cnt);
758 		break;
759 	case GRE_STATE_HOLD:
760 		if (--sc->sc_ka_holdcnt < 1) {
761 			sc->sc_ka_state = GRE_STATE_UP;
762 			gre_link_state(sc);
763 		}
764 		break;
765 	case GRE_STATE_UP:
766 		sc->sc_ka_holdmax--;
767 		sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_cnt);
768 		break;
769 	}
770 
771 	/* rescedule hold timer */
772 	timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timout * sc->sc_ka_cnt);
773 }
774 
775 void
776 gre_link_state(struct gre_softc *sc)
777 {
778 	struct ifnet *ifp = &sc->sc_if;
779 	int link_state = LINK_STATE_UNKNOWN;
780 
781 	if (sc->sc_ka_state == GRE_STATE_UP)
782 		link_state = LINK_STATE_UP;
783 	else if (sc->sc_ka_state != GRE_STATE_UKNWN)
784 		link_state = LINK_STATE_KALIVE_DOWN;
785 
786 	if (ifp->if_link_state != link_state) {
787 		ifp->if_link_state = link_state;
788 		if_link_state_change(ifp);
789 	}
790 }
791 #endif
792