xref: /openbsd/usr.sbin/ldpd/kroute.c (revision 898184e3)
1 /*	$OpenBSD: kroute.c,v 1.25 2011/06/26 19:19:23 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2009 Michele Marchetto <michele@openbsd.org>
5  * Copyright (c) 2004 Esben Norby <norby@openbsd.org>
6  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
7  *
8  * Permission to use, copy, modify, and distribute this software for any
9  * purpose with or without fee is hereby granted, provided that the above
10  * copyright notice and this permission notice appear in all copies.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19  */
20 
21 #include <sys/param.h>
22 #include <sys/types.h>
23 #include <sys/socket.h>
24 #include <sys/sysctl.h>
25 #include <sys/tree.h>
26 #include <sys/uio.h>
27 #include <netinet/in.h>
28 #include <arpa/inet.h>
29 #include <net/if.h>
30 #include <net/if_dl.h>
31 #include <net/if_types.h>
32 #include <net/route.h>
33 #include <netmpls/mpls.h>
34 #include <err.h>
35 #include <errno.h>
36 #include <fcntl.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <unistd.h>
41 
42 #include "ldpd.h"
43 #include "log.h"
44 
45 struct {
46 	u_int32_t		rtseq;
47 	pid_t			pid;
48 	int			fib_sync;
49 	int			fd;
50 	struct event		ev;
51 } kr_state;
52 
53 struct kroute_node {
54 	RB_ENTRY(kroute_node)	 entry;
55 	struct kroute		 r;
56 	struct kroute_node	*next;
57 };
58 
59 struct kif_node {
60 	RB_ENTRY(kif_node)	 entry;
61 	TAILQ_HEAD(, kif_addr)	 addrs;
62 	struct kif		 k;
63 };
64 
65 void	kr_redist_remove(struct kroute *);
66 int	kr_redist_eval(struct kroute *);
67 void	kr_redistribute(struct kroute_node *);
68 int	kroute_compare(struct kroute_node *, struct kroute_node *);
69 int	kif_compare(struct kif_node *, struct kif_node *);
70 
71 struct kroute_node	*kroute_find_fec(in_addr_t, u_int8_t, struct in_addr);
72 struct kroute_node	*kroute_find_any(in_addr_t, u_int8_t);
73 struct kroute_node	*kroute_matchprio(struct kroute_node *, u_int8_t);
74 int			 kroute_insert(struct kroute_node *);
75 int			 kroute_remove(struct kroute_node *);
76 void			 kroute_clear(void);
77 
78 struct kif_node		*kif_find(u_short);
79 struct kif_node		*kif_insert(u_short);
80 int			 kif_remove(struct kif_node *);
81 void			 kif_clear(void);
82 struct kif		*kif_update(u_short, int, struct if_data *,
83 			    struct sockaddr_dl *);
84 
85 struct kroute_node	*kroute_match(in_addr_t);
86 
87 int		protect_lo(void);
88 u_int8_t	prefixlen_classful(in_addr_t);
89 void		get_rtaddrs(int, struct sockaddr *, struct sockaddr **);
90 void		if_change(u_short, int, struct if_data *, struct sockaddr_dl *);
91 void		if_newaddr(u_short, struct sockaddr_in *, struct sockaddr_in *,
92 		    struct sockaddr_in *);
93 void		if_deladdr(u_short, struct sockaddr_in *, struct sockaddr_in *,
94 		    struct sockaddr_in *);
95 void		if_announce(void *);
96 
97 int		send_rtmsg(int, int, struct kroute *, u_int32_t);
98 int		dispatch_rtmsg(void);
99 int		fetchtable(void);
100 int		fetchifs(u_short);
101 int		rtmsg_process(char *, int);
102 
103 RB_HEAD(kroute_tree, kroute_node)	krt;
104 RB_PROTOTYPE(kroute_tree, kroute_node, entry, kroute_compare)
105 RB_GENERATE(kroute_tree, kroute_node, entry, kroute_compare)
106 
107 RB_HEAD(kif_tree, kif_node)		kit;
108 RB_PROTOTYPE(kif_tree, kif_node, entry, kif_compare)
109 RB_GENERATE(kif_tree, kif_node, entry, kif_compare)
110 
111 int		flag_implicit_null = 0;
112 
113 int
114 kif_init(void)
115 {
116 	RB_INIT(&kit);
117 	/* init also krt tree so that we can call kr_shutdown() */
118 	RB_INIT(&krt);
119 	kr_state.fib_sync = 0;	/* decoupled */
120 
121 	if (fetchifs(0) == -1)
122 		return (-1);
123 
124 	return (0);
125 }
126 
127 int
128 kr_init(int fs)
129 {
130 	int		opt = 0, rcvbuf, default_rcvbuf;
131 	socklen_t	optlen;
132 
133 	kr_state.fib_sync = fs;
134 
135 	if ((kr_state.fd = socket(AF_ROUTE, SOCK_RAW, 0)) == -1) {
136 		log_warn("kr_init: socket");
137 		return (-1);
138 	}
139 
140 	/* not interested in my own messages */
141 	if (setsockopt(kr_state.fd, SOL_SOCKET, SO_USELOOPBACK,
142 	    &opt, sizeof(opt)) == -1)
143 		log_warn("kr_init: setsockopt");	/* not fatal */
144 
145 	/* grow receive buffer, don't wanna miss messages */
146 	optlen = sizeof(default_rcvbuf);
147 	if (getsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF,
148 	    &default_rcvbuf, &optlen) == -1)
149 		log_warn("kr_init getsockopt SOL_SOCKET SO_RCVBUF");
150 	else
151 		for (rcvbuf = MAX_RTSOCK_BUF;
152 		    rcvbuf > default_rcvbuf &&
153 		    setsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF,
154 		    &rcvbuf, sizeof(rcvbuf)) == -1 && errno == ENOBUFS;
155 		    rcvbuf /= 2)
156 			;	/* nothing */
157 
158 	kr_state.pid = getpid();
159 	kr_state.rtseq = 1;
160 
161 	if (fetchtable() == -1)
162 		return (-1);
163 
164 	if (protect_lo() == -1)
165 		return (-1);
166 
167 	event_set(&kr_state.ev, kr_state.fd, EV_READ | EV_PERSIST,
168 	    kr_dispatch_msg, NULL);
169 	event_add(&kr_state.ev, NULL);
170 
171 	return (0);
172 }
173 
174 int
175 kr_change(struct kroute *kroute)
176 {
177 	struct kroute_node	*kr;
178 	int			 action = RTM_ADD;
179 
180 	kr = kroute_find_fec(kroute->prefix.s_addr, kroute->prefixlen,
181 	    kroute->nexthop);
182 
183 	if (kr == NULL) {
184 		log_warnx("kr_change: lost FEC %s/%d",
185 		    inet_ntoa(kroute->prefix), kroute->prefixlen);
186 		return (-1);
187 	}
188 
189 	if (kr->r.flags & F_LDPD_INSERTED)
190 		action = RTM_CHANGE;
191 
192 	kr->r.local_label = kroute->local_label;
193 	kr->r.remote_label = kroute->remote_label;
194 	kr->r.flags = kr->r.flags | F_LDPD_INSERTED;
195 
196 	/* send update */
197 	if (send_rtmsg(kr_state.fd, action, &kr->r, AF_MPLS) == -1)
198 		return (-1);
199 
200 	if (kr->r.nexthop.s_addr != INADDR_ANY &&
201 	    kr->r.remote_label != NO_LABEL) {
202 		if (send_rtmsg(kr_state.fd, RTM_CHANGE, &kr->r, AF_INET) == -1)
203 			return (-1);
204 	}
205 
206 	return  (0);
207 }
208 
209 int
210 kr_delete(struct kroute *kroute)
211 {
212 	struct kroute_node	*kr;
213 	int			 update = 0;
214 
215 	kr = kroute_find_fec(kroute->prefix.s_addr, kroute->prefixlen,
216 	    kroute->nexthop);
217 	if (kr == NULL)
218 		return (0);
219 
220 	if (!(kr->r.flags & F_LDPD_INSERTED))
221 		return (0);
222 	if (kr->r.nexthop.s_addr != INADDR_ANY &&
223 	    kr->r.remote_label != NO_LABEL)
224 		update = 1;
225 
226 	/* kill MPLS LSP */
227 	if (send_rtmsg(kr_state.fd, RTM_DELETE, &kr->r, AF_MPLS) == -1)
228 		return (-1);
229 
230 	kr->r.flags &= ~F_LDPD_INSERTED;
231 	kr->r.local_label = NO_LABEL;
232 	kr->r.remote_label = NO_LABEL;
233 
234 	if (update &&
235 	    send_rtmsg(kr_state.fd, RTM_CHANGE, &kr->r, AF_INET) == -1)
236 		return (-1);
237 
238 	return (0);
239 }
240 
241 void
242 kr_shutdown(void)
243 {
244 	kr_fib_decouple();
245 
246 	kroute_clear();
247 	kif_clear();
248 }
249 
250 void
251 kr_fib_couple(void)
252 {
253 	struct kroute_node	*kr;
254 
255 	if (kr_state.fib_sync == 1)	/* already coupled */
256 		return;
257 
258 	kr_state.fib_sync = 1;
259 
260 	RB_FOREACH(kr, kroute_tree, &krt)
261 		if (kr->r.flags & F_LDPD_INSERTED) {
262 			send_rtmsg(kr_state.fd, RTM_ADD, &kr->r, AF_MPLS);
263 
264 			if (kr->r.nexthop.s_addr != INADDR_ANY &&
265 			    kr->r.remote_label != NO_LABEL) {
266 				send_rtmsg(kr_state.fd, RTM_CHANGE,
267 				    &kr->r, AF_INET);
268 			}
269 		}
270 
271 	log_info("kernel routing table coupled");
272 }
273 
274 void
275 kr_fib_decouple(void)
276 {
277 	struct kroute_node	*kr;
278 	u_int32_t		 rl;
279 
280 	if (kr_state.fib_sync == 0)	/* already decoupled */
281 		return;
282 
283 	RB_FOREACH(kr, kroute_tree, &krt) {
284 		if (kr->r.flags & F_LDPD_INSERTED) {
285 			send_rtmsg(kr_state.fd, RTM_DELETE,
286 			    &kr->r, AF_MPLS);
287 
288 			if (kr->r.nexthop.s_addr != INADDR_ANY &&
289 			    kr->r.remote_label != NO_LABEL) {
290 				rl = kr->r.remote_label;
291 				kr->r.remote_label = NO_LABEL;
292 				send_rtmsg(kr_state.fd, RTM_CHANGE,
293 				    &kr->r, AF_INET);
294 				kr->r.remote_label = rl;
295 			}
296 		}
297 	}
298 
299 	kr_state.fib_sync = 0;
300 
301 	log_info("kernel routing table decoupled");
302 }
303 
304 /* ARGSUSED */
305 void
306 kr_dispatch_msg(int fd, short event, void *bula)
307 {
308 	dispatch_rtmsg();
309 }
310 
311 void
312 kr_show_route(struct imsg *imsg)
313 {
314 	struct kroute_node	*kr, *kn;
315 	int			 flags;
316 	struct in_addr		 addr;
317 
318 	switch (imsg->hdr.type) {
319 	case IMSG_CTL_KROUTE:
320 		if (imsg->hdr.len != IMSG_HEADER_SIZE + sizeof(flags)) {
321 			log_warnx("kr_show_route: wrong imsg len");
322 			return;
323 		}
324 		memcpy(&flags, imsg->data, sizeof(flags));
325 		RB_FOREACH(kr, kroute_tree, &krt)
326 			if (!flags || kr->r.flags & flags) {
327 				kn = kr;
328 				do {
329 					main_imsg_compose_ldpe(IMSG_CTL_KROUTE,
330 					    imsg->hdr.pid,
331 					    &kn->r, sizeof(kn->r));
332 				} while ((kn = kn->next) != NULL);
333 			}
334 		break;
335 	case IMSG_CTL_KROUTE_ADDR:
336 		if (imsg->hdr.len != IMSG_HEADER_SIZE +
337 		    sizeof(struct in_addr)) {
338 			log_warnx("kr_show_route: wrong imsg len");
339 			return;
340 		}
341 		memcpy(&addr, imsg->data, sizeof(addr));
342 		kr = NULL;
343 		kr = kroute_match(addr.s_addr);
344 		if (kr != NULL)
345 			main_imsg_compose_ldpe(IMSG_CTL_KROUTE, imsg->hdr.pid,
346 			    &kr->r, sizeof(kr->r));
347 		break;
348 	default:
349 		log_debug("kr_show_route: error handling imsg");
350 		break;
351 	}
352 
353 	main_imsg_compose_ldpe(IMSG_CTL_END, imsg->hdr.pid, NULL, 0);
354 }
355 
356 void
357 kr_ifinfo(char *ifname, pid_t pid)
358 {
359 	struct kif_node	*kif;
360 
361 	RB_FOREACH(kif, kif_tree, &kit)
362 		if (ifname == NULL || !strcmp(ifname, kif->k.ifname)) {
363 			main_imsg_compose_ldpe(IMSG_CTL_IFINFO,
364 			    pid, &kif->k, sizeof(kif->k));
365 		}
366 
367 	main_imsg_compose_ldpe(IMSG_CTL_END, pid, NULL, 0);
368 }
369 
370 void
371 kr_redist_remove(struct kroute *kr)
372 {
373 	/* was the route redistributed? */
374 	if ((kr->flags & F_REDISTRIBUTED) == 0)
375 		return;
376 
377 	/* remove redistributed flag */
378 	kr->flags &= ~F_REDISTRIBUTED;
379 	main_imsg_compose_lde(IMSG_NETWORK_DEL, 0, kr,
380 	    sizeof(struct kroute));
381 }
382 
383 int
384 kr_redist_eval(struct kroute *kr)
385 {
386 	u_int32_t	 a;
387 
388 	/* Dynamic routes are not redistributable. */
389 	if (kr->flags & F_DYNAMIC)
390 		goto dont_redistribute;
391 
392 	/*
393 	 * We consider the loopback net, multicast and experimental addresses
394 	 * as not redistributable.
395 	 */
396 	a = ntohl(kr->prefix.s_addr);
397 	if (IN_MULTICAST(a) || IN_BADCLASS(a) ||
398 	    (a >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
399 		goto dont_redistribute;
400 	/*
401 	 * Consider networks with nexthop loopback as not redistributable
402 	 * unless it is a reject or blackhole route.
403 	 */
404 	if (kr->nexthop.s_addr == htonl(INADDR_LOOPBACK) &&
405 	    !(kr->flags & (F_BLACKHOLE|F_REJECT)))
406 		goto dont_redistribute;
407 
408 	/* prefix should be redistributed */
409 	kr->flags |= F_REDISTRIBUTED;
410 	main_imsg_compose_lde(IMSG_NETWORK_ADD, 0, kr, sizeof(struct kroute));
411 	return (1);
412 
413 dont_redistribute:
414 	kr_redist_remove(kr);
415 	return (1);
416 }
417 
418 void
419 kr_redistribute(struct kroute_node *kh)
420 {
421 	struct kroute_node	*kn;
422 
423 	/* only the highest prio route can be redistributed */
424 	if (kroute_find_fec(kh->r.prefix.s_addr, kh->r.prefixlen,
425 	    kh->r.nexthop) != kh)
426 		return;
427 
428 	for (kn = kh; kn; kn = kn->next)
429 		kr_redist_eval(&kn->r);
430 }
431 
432 void
433 kr_reload(void)
434 {
435 	struct kroute_node	*kr;
436 
437 	/* XXX this does not make sense in ldpd */
438 	RB_FOREACH(kr, kroute_tree, &krt) {
439 		if (kr->r.flags & F_REDISTRIBUTED)
440 			kr_redistribute(kr);
441 	}
442 }
443 
444 /* rb-tree compare */
445 int
446 kroute_compare(struct kroute_node *a, struct kroute_node *b)
447 {
448 	if (ntohl(a->r.prefix.s_addr) < ntohl(b->r.prefix.s_addr))
449 		return (-1);
450 	if (ntohl(a->r.prefix.s_addr) > ntohl(b->r.prefix.s_addr))
451 		return (1);
452 	if (a->r.prefixlen < b->r.prefixlen)
453 		return (-1);
454 	if (a->r.prefixlen > b->r.prefixlen)
455 		return (1);
456 
457 	if (ntohl(a->r.nexthop.s_addr) < ntohl(b->r.nexthop.s_addr))
458 		return (-1);
459 	if (ntohl(a->r.nexthop.s_addr) > ntohl(b->r.nexthop.s_addr))
460 		return (1);
461 	return (0);
462 }
463 
464 int
465 kif_compare(struct kif_node *a, struct kif_node *b)
466 {
467 	return (b->k.ifindex - a->k.ifindex);
468 }
469 
470 /* tree management */
471 struct kroute_node *
472 kroute_find_fec(in_addr_t prefix, u_int8_t prefixlen, struct in_addr nexthop)
473 {
474 	struct kroute_node	s;
475 
476 	s.r.prefix.s_addr = prefix;
477 	s.r.prefixlen = prefixlen;
478 	s.r.nexthop.s_addr = nexthop.s_addr;
479 
480 	return (RB_FIND(kroute_tree, &krt, &s));
481 }
482 
483 struct kroute_node *
484 kroute_find_any(in_addr_t prefix, u_int8_t prefixlen)
485 {
486 	struct kroute_node	s;
487 	struct kroute_node	*kn, *best = NULL;
488 
489 	s.r.prefix.s_addr = prefix;
490 	s.r.prefixlen = prefixlen;
491 	s.r.nexthop.s_addr = 0;
492 
493 	kn = RB_NFIND(kroute_tree, &krt, &s);
494 	while (kn) {
495 		if (!best || best->r.priority > kn->r.priority)
496 			best = kn;
497 		kn = RB_NEXT(kroute_tree, &krt, kn);
498 		if (kn == NULL || kn->r.prefix.s_addr != prefix ||
499 		    kn->r.prefixlen != prefixlen)
500 			break;
501 	}
502 	return (best);
503 }
504 
505 struct kroute_node *
506 kroute_matchprio(struct kroute_node *kr, u_int8_t prio)
507 {
508 	while (kr) {
509 		if (kr->r.priority == prio)
510 			return (kr);
511 		kr = kr->next;
512 	}
513 
514 	return (NULL);
515 }
516 
517 int
518 kroute_insert(struct kroute_node *kr)
519 {
520 	struct kroute_node	*krm, *krh;
521 
522 	if ((krh = RB_INSERT(kroute_tree, &krt, kr)) != NULL) {
523 		/*
524 		 * Multiple FEC, add to ordered list
525 		 */
526 		if (kr->r.priority < krh->r.priority) {
527 			/* head element */
528 			if (RB_REMOVE(kroute_tree, &krt, krh) == NULL) {
529 				log_warnx("kroute_insert failed to del %s/%u",
530 				    inet_ntoa(krh->r.prefix), krh->r.prefixlen);
531 				return (-1);
532 			}
533 			if (RB_INSERT(kroute_tree, &krt, kr) != NULL) {
534 				log_warnx("kroute_insert failed to add %s/%u",
535 				    inet_ntoa(kr->r.prefix), kr->r.prefixlen);
536 				return (-1);
537 			}
538 			kr->next = krh;
539 			krh = kr;
540 		} else {
541 			for (krm = krh; krm->next != NULL &&
542 			    krm->next->r.priority < kr->r.priority;
543 			    krm = krm->next)
544 				;
545 			kr->next = krm->next;
546 			krm->next = kr;
547 		}
548 	} else
549 		krh = kr;
550 
551 	kr_redistribute(krh);
552 	return (0);
553 }
554 
555 int
556 kroute_remove(struct kroute_node *kr)
557 {
558 	struct kroute_node	*krm;
559 
560 	if ((krm = RB_FIND(kroute_tree, &krt, kr)) == NULL) {
561 		log_warnx("kroute_remove failed to find %s/%u",
562 		    inet_ntoa(kr->r.prefix), kr->r.prefixlen);
563 		return (-1);
564 	}
565 
566 	if (krm == kr) {
567 		/* head element */
568 		if (RB_REMOVE(kroute_tree, &krt, kr) == NULL) {
569 			log_warnx("kroute_remove failed for %s/%u",
570 			    inet_ntoa(kr->r.prefix), kr->r.prefixlen);
571 			return (-1);
572 		}
573 		if (kr->next != NULL) {
574 			if (RB_INSERT(kroute_tree, &krt, kr->next) != NULL) {
575 				log_warnx("kroute_remove failed to add %s/%u",
576 				    inet_ntoa(kr->r.prefix), kr->r.prefixlen);
577 				return (-1);
578 			}
579 		}
580 	} else {
581 		/* somewhere in the list */
582 		while (krm->next != kr && krm->next != NULL)
583 			krm = krm->next;
584 		if (krm->next == NULL) {
585 			log_warnx("kroute_remove multipath list corrupted "
586 			    "for %s/%u", inet_ntoa(kr->r.prefix),
587 			    kr->r.prefixlen);
588 			return (-1);
589 		}
590 		krm->next = kr->next;
591 	}
592 
593 	kr_redist_remove(&kr->r);
594 
595 	/* kill MPLS LSP if one was installed */
596 	if (kr->r.flags & F_LDPD_INSERTED)
597 		if (send_rtmsg(kr_state.fd, RTM_DELETE, &kr->r, AF_MPLS) ==
598 		    -1) {
599 			free(kr);
600 			return (-1);
601 		}
602 
603 	free(kr);
604 	return (0);
605 }
606 
607 void
608 kroute_clear(void)
609 {
610 	struct kroute_node	*kr;
611 
612 	while ((kr = RB_MIN(kroute_tree, &krt)) != NULL)
613 		kroute_remove(kr);
614 }
615 
616 struct kif_node *
617 kif_find(u_short ifindex)
618 {
619 	struct kif_node	s;
620 
621 	bzero(&s, sizeof(s));
622 	s.k.ifindex = ifindex;
623 
624 	return (RB_FIND(kif_tree, &kit, &s));
625 }
626 
627 struct kif *
628 kif_findname(char *ifname, struct in_addr addr, struct kif_addr **kap)
629 {
630 	struct kif_node	*kif;
631 	struct kif_addr	*ka;
632 
633 	RB_FOREACH(kif, kif_tree, &kit)
634 		if (!strcmp(ifname, kif->k.ifname)) {
635 			ka = TAILQ_FIRST(&kif->addrs);
636 			if (addr.s_addr != 0) {
637 				TAILQ_FOREACH(ka, &kif->addrs, entry) {
638 					if (addr.s_addr == ka->addr.s_addr)
639 						break;
640 				}
641 			}
642 			if (kap != NULL)
643 				*kap = ka;
644 			return (&kif->k);
645 		}
646 
647 	return (NULL);
648 }
649 
650 struct kif_node *
651 kif_insert(u_short ifindex)
652 {
653 	struct kif_node	*kif;
654 
655 	if ((kif = calloc(1, sizeof(struct kif_node))) == NULL)
656 		return (NULL);
657 
658 	kif->k.ifindex = ifindex;
659 	TAILQ_INIT(&kif->addrs);
660 
661 	if (RB_INSERT(kif_tree, &kit, kif) != NULL)
662 		fatalx("kif_insert: RB_INSERT");
663 
664 	return (kif);
665 }
666 
667 int
668 kif_remove(struct kif_node *kif)
669 {
670 	struct kif_addr	*ka;
671 
672 	if (RB_REMOVE(kif_tree, &kit, kif) == NULL) {
673 		log_warnx("RB_REMOVE(kif_tree, &kit, kif)");
674 		return (-1);
675 	}
676 
677 	while ((ka = TAILQ_FIRST(&kif->addrs)) != NULL) {
678 		TAILQ_REMOVE(&kif->addrs, ka, entry);
679 		free(ka);
680 	}
681 	free(kif);
682 	return (0);
683 }
684 
685 void
686 kif_clear(void)
687 {
688 	struct kif_node	*kif;
689 
690 	while ((kif = RB_MIN(kif_tree, &kit)) != NULL)
691 		kif_remove(kif);
692 }
693 
694 struct kif *
695 kif_update(u_short ifindex, int flags, struct if_data *ifd,
696     struct sockaddr_dl *sdl)
697 {
698 	struct kif_node		*kif;
699 
700 	if ((kif = kif_find(ifindex)) == NULL) {
701 		if ((kif = kif_insert(ifindex)) == NULL)
702 			return (NULL);
703 	}
704 
705 	kif->k.flags = flags;
706 	kif->k.link_state = ifd->ifi_link_state;
707 	kif->k.media_type = ifd->ifi_type;
708 	kif->k.baudrate = ifd->ifi_baudrate;
709 	kif->k.mtu = ifd->ifi_mtu;
710 
711 	if (sdl && sdl->sdl_family == AF_LINK) {
712 		if (sdl->sdl_nlen >= sizeof(kif->k.ifname))
713 			memcpy(kif->k.ifname, sdl->sdl_data,
714 			    sizeof(kif->k.ifname) - 1);
715 		else if (sdl->sdl_nlen > 0)
716 			memcpy(kif->k.ifname, sdl->sdl_data,
717 			    sdl->sdl_nlen);
718 		/* string already terminated via calloc() */
719 	}
720 
721 	return (&kif->k);
722 }
723 
724 struct kroute_node *
725 kroute_match(in_addr_t key)
726 {
727 	int			 i;
728 	struct kroute_node	*kr;
729 
730 	/* we will never match the default route */
731 	for (i = 32; i > 0; i--)
732 		if ((kr = kroute_find_any(key & prefixlen2mask(i), i)) != NULL)
733 			return (kr);
734 
735 	/* if we don't have a match yet, try to find a default route */
736 	if ((kr = kroute_find_any(0, 0)) != NULL)
737 			return (kr);
738 	return (NULL);
739 }
740 
741 /* misc */
742 int
743 protect_lo(void)
744 {
745 	struct kroute_node	*kr;
746 
747 	/* special protection for 127/8 */
748 	if ((kr = calloc(1, sizeof(struct kroute_node))) == NULL) {
749 		log_warn("protect_lo");
750 		return (-1);
751 	}
752 	kr->r.prefix.s_addr = htonl(INADDR_LOOPBACK & IN_CLASSA_NET);
753 	kr->r.prefixlen = 8;
754 	kr->r.flags = F_CONNECTED;
755 	kr->r.local_label = NO_LABEL;
756 	kr->r.remote_label = NO_LABEL;
757 
758 	if (RB_INSERT(kroute_tree, &krt, kr) != NULL)
759 		free(kr);	/* kernel route already there, no problem */
760 
761 	return (0);
762 }
763 
764 u_int8_t
765 prefixlen_classful(in_addr_t ina)
766 {
767 	/* it hurt to write this. */
768 
769 	if (ina >= 0xf0000000U)		/* class E */
770 		return (32);
771 	else if (ina >= 0xe0000000U)	/* class D */
772 		return (4);
773 	else if (ina >= 0xc0000000U)	/* class C */
774 		return (24);
775 	else if (ina >= 0x80000000U)	/* class B */
776 		return (16);
777 	else				/* class A */
778 		return (8);
779 }
780 
781 u_int8_t
782 mask2prefixlen(in_addr_t ina)
783 {
784 	if (ina == 0)
785 		return (0);
786 	else
787 		return (33 - ffs(ntohl(ina)));
788 }
789 
790 in_addr_t
791 prefixlen2mask(u_int8_t prefixlen)
792 {
793 	if (prefixlen == 0)
794 		return (0);
795 
796 	return (htonl(0xffffffff << (32 - prefixlen)));
797 }
798 
799 #define	ROUNDUP(a)	\
800     (((a) & (sizeof(long) - 1)) ? (1 + ((a) | (sizeof(long) - 1))) : (a))
801 
802 void
803 get_rtaddrs(int addrs, struct sockaddr *sa, struct sockaddr **rti_info)
804 {
805 	int	i;
806 
807 	for (i = 0; i < RTAX_MAX; i++) {
808 		if (addrs & (1 << i)) {
809 			rti_info[i] = sa;
810 			sa = (struct sockaddr *)((char *)(sa) +
811 			    ROUNDUP(sa->sa_len));
812 		} else
813 			rti_info[i] = NULL;
814 	}
815 }
816 
817 void
818 if_change(u_short ifindex, int flags, struct if_data *ifd,
819     struct sockaddr_dl *sdl)
820 {
821 	struct kif		*kif;
822 
823 	if ((kif = kif_update(ifindex, flags, ifd, sdl)) == NULL) {
824 		log_warn("if_change:  kif_update(%u)", ifindex);
825 		return;
826 	}
827 
828 	/* notify ldpe about interface link state */
829 	main_imsg_compose_ldpe(IMSG_IFINFO, 0, kif, sizeof(struct kif));
830 }
831 
832 void
833 if_newaddr(u_short ifindex, struct sockaddr_in *ifa, struct sockaddr_in *mask,
834     struct sockaddr_in *brd)
835 {
836 	struct kif_node *kif;
837 	struct kif_addr *ka;
838 
839 	if (ifa == NULL || ifa->sin_family != AF_INET)
840 		return;
841 	if ((kif = kif_find(ifindex)) == NULL) {
842 		log_warnx("if_newaddr: corresponding if %i not found", ifindex);
843 		return;
844 	}
845 	if ((ka = calloc(1, sizeof(struct kif_addr))) == NULL)
846 		fatal("if_newaddr");
847 	ka->addr = ifa->sin_addr;
848 	if (mask)
849 		ka->mask = mask->sin_addr;
850 	else
851 		ka->mask.s_addr = INADDR_NONE;
852 	if (brd)
853 		ka->dstbrd = brd->sin_addr;
854 	else
855 		ka->dstbrd.s_addr = INADDR_NONE;
856 
857 	TAILQ_INSERT_TAIL(&kif->addrs, ka, entry);
858 }
859 
860 void
861 if_deladdr(u_short ifindex, struct sockaddr_in *ifa, struct sockaddr_in *mask,
862     struct sockaddr_in *brd)
863 {
864 	struct kif_node *kif;
865 	struct kif_addr *ka, *nka;
866 
867 	if (ifa == NULL || ifa->sin_family != AF_INET)
868 		return;
869 	if ((kif = kif_find(ifindex)) == NULL) {
870 		log_warnx("if_deladdr: corresponding if %i not found", ifindex);
871 		return;
872 	}
873 
874 	for (ka = TAILQ_FIRST(&kif->addrs); ka != NULL; ka = nka) {
875 		nka = TAILQ_NEXT(ka, entry);
876 
877 		if (ka->addr.s_addr == ifa->sin_addr.s_addr) {
878 			TAILQ_REMOVE(&kif->addrs, ka, entry);
879 			/* XXX inform engine about if change? */
880 			free(ka);
881 			return;
882 		}
883 	}
884 }
885 
886 void
887 if_announce(void *msg)
888 {
889 	struct if_announcemsghdr	*ifan;
890 	struct kif_node			*kif;
891 
892 	ifan = msg;
893 
894 	switch (ifan->ifan_what) {
895 	case IFAN_ARRIVAL:
896 		kif = kif_insert(ifan->ifan_index);
897 		strlcpy(kif->k.ifname, ifan->ifan_name, sizeof(kif->k.ifname));
898 		break;
899 	case IFAN_DEPARTURE:
900 		kif = kif_find(ifan->ifan_index);
901 		kif_remove(kif);
902 		break;
903 	}
904 }
905 
906 /* rtsock */
907 int
908 send_rtmsg(int fd, int action, struct kroute *kroute, u_int32_t family)
909 {
910 	struct iovec		iov[5];
911 	struct rt_msghdr	hdr;
912 	struct sockaddr_mpls	label_in, label_out;
913 	struct sockaddr_in	dst, mask, nexthop;
914 	int			iovcnt = 0;
915 
916 	if (kr_state.fib_sync == 0)
917 		return (0);
918 
919 	/* Implicit NULL label should not be added to the FIB */
920 	if (family == AF_MPLS && kroute->local_label == MPLS_LABEL_IMPLNULL)
921 		return (0);
922 
923 	/* initialize header */
924 	bzero(&hdr, sizeof(hdr));
925 	hdr.rtm_version = RTM_VERSION;
926 
927 	hdr.rtm_type = action;
928 	hdr.rtm_flags = RTF_UP;
929 	hdr.rtm_fmask = RTF_MPLS;
930 	hdr.rtm_seq = kr_state.rtseq++;	/* overflow doesn't matter */
931 	hdr.rtm_msglen = sizeof(hdr);
932 	hdr.rtm_hdrlen = sizeof(struct rt_msghdr);
933 	hdr.rtm_priority = kroute->priority;
934 	/* adjust iovec */
935 	iov[iovcnt].iov_base = &hdr;
936 	iov[iovcnt++].iov_len = sizeof(hdr);
937 
938 	if (family == AF_MPLS) {
939 		bzero(&label_in, sizeof(label_in));
940 		label_in.smpls_len = sizeof(label_in);
941 		label_in.smpls_family = AF_MPLS;
942 		label_in.smpls_label =
943 		    htonl(kroute->local_label << MPLS_LABEL_OFFSET);
944 		/* adjust header */
945 		hdr.rtm_flags |= RTF_MPLS | RTF_MPATH;
946 		hdr.rtm_addrs |= RTA_DST;
947 		hdr.rtm_msglen += sizeof(label_in);
948 		/* adjust iovec */
949 		iov[iovcnt].iov_base = &label_in;
950 		iov[iovcnt++].iov_len = sizeof(label_in);
951 	} else {
952 		bzero(&dst, sizeof(dst));
953 		dst.sin_len = sizeof(dst);
954 		dst.sin_family = AF_INET;
955 		dst.sin_addr.s_addr = kroute->prefix.s_addr;
956 		/* adjust header */
957 		hdr.rtm_addrs |= RTA_DST;
958 		hdr.rtm_msglen += sizeof(dst);
959 		/* adjust iovec */
960 		iov[iovcnt].iov_base = &dst;
961 		iov[iovcnt++].iov_len = sizeof(dst);
962 	}
963 
964 	bzero(&nexthop, sizeof(nexthop));
965 	nexthop.sin_len = sizeof(nexthop);
966 	nexthop.sin_family = AF_INET;
967 	nexthop.sin_addr.s_addr = kroute->nexthop.s_addr;
968 	/* adjust header */
969 	hdr.rtm_flags |= RTF_GATEWAY;
970 	hdr.rtm_addrs |= RTA_GATEWAY;
971 	hdr.rtm_msglen += sizeof(nexthop);
972 	/* adjust iovec */
973 	iov[iovcnt].iov_base = &nexthop;
974 	iov[iovcnt++].iov_len = sizeof(nexthop);
975 
976 	if (family == AF_INET) {
977 		bzero(&mask, sizeof(mask));
978 		mask.sin_len = sizeof(mask);
979 		mask.sin_family = AF_INET;
980 		mask.sin_addr.s_addr = prefixlen2mask(kroute->prefixlen);
981 		/* adjust header */
982 		hdr.rtm_addrs |= RTA_NETMASK;
983 		hdr.rtm_msglen += sizeof(mask);
984 		/* adjust iovec */
985 		iov[iovcnt].iov_base = &mask;
986 		iov[iovcnt++].iov_len = sizeof(mask);
987 	}
988 
989 	/* If action is RTM_DELETE we have to get rid of MPLS infos */
990 	if (kroute->remote_label != NO_LABEL && action != RTM_DELETE) {
991 		bzero(&label_out, sizeof(label_out));
992 		label_out.smpls_len = sizeof(label_out);
993 		label_out.smpls_family = AF_MPLS;
994 		label_out.smpls_label =
995 		    htonl(kroute->remote_label << MPLS_LABEL_OFFSET);
996 		/* adjust header */
997 		hdr.rtm_addrs |= RTA_SRC;
998 		hdr.rtm_flags |= RTF_MPLS;
999 		hdr.rtm_msglen += sizeof(label_out);
1000 		/* adjust iovec */
1001 		iov[iovcnt].iov_base = &label_out;
1002 		iov[iovcnt++].iov_len = sizeof(label_out);
1003 
1004 		if (kroute->remote_label == MPLS_LABEL_IMPLNULL) {
1005 			if (family == AF_MPLS)
1006 				hdr.rtm_mpls = MPLS_OP_POP;
1007 			else
1008 				return (0);
1009 		} else {
1010 			if (family == AF_MPLS)
1011 				hdr.rtm_mpls = MPLS_OP_SWAP;
1012 			else
1013 				hdr.rtm_mpls = MPLS_OP_PUSH;
1014 		}
1015 	}
1016 
1017 
1018 retry:
1019 	if (writev(fd, iov, iovcnt) == -1) {
1020 		if (errno == ESRCH) {
1021 			if (hdr.rtm_type == RTM_CHANGE && family == AF_MPLS) {
1022 				hdr.rtm_type = RTM_ADD;
1023 				goto retry;
1024 			} else if (hdr.rtm_type == RTM_DELETE) {
1025 				log_info("route %s/%u vanished before delete",
1026 				    inet_ntoa(kroute->prefix),
1027 				    kroute->prefixlen);
1028 				return (0);
1029 			}
1030 		}
1031 		log_warn("send_rtmsg: action %u, AF %d, prefix %s/%u",
1032 		    hdr.rtm_type, family, inet_ntoa(kroute->prefix),
1033 		    kroute->prefixlen);
1034 		return (0);
1035 	}
1036 
1037 	return (0);
1038 }
1039 
1040 int
1041 fetchtable(void)
1042 {
1043 	size_t			 len;
1044 	int			 mib[7];
1045 	char			*buf;
1046 	int			 rv;
1047 
1048 	mib[0] = CTL_NET;
1049 	mib[1] = AF_ROUTE;
1050 	mib[2] = 0;
1051 	mib[3] = AF_INET;
1052 	mib[4] = NET_RT_DUMP;
1053 	mib[5] = 0;
1054 	mib[6] = 0;	/* rtableid */
1055 
1056 	if (sysctl(mib, 7, NULL, &len, NULL, 0) == -1) {
1057 		log_warn("sysctl");
1058 		return (-1);
1059 	}
1060 	if ((buf = malloc(len)) == NULL) {
1061 		log_warn("fetchtable");
1062 		return (-1);
1063 	}
1064 	if (sysctl(mib, 7, buf, &len, NULL, 0) == -1) {
1065 		log_warn("sysctl");
1066 		free(buf);
1067 		return (-1);
1068 	}
1069 
1070 	rv = rtmsg_process(buf, len);
1071 	free(buf);
1072 
1073 	return (rv);
1074 }
1075 
1076 int
1077 fetchifs(u_short ifindex)
1078 {
1079 	size_t			 len;
1080 	int			 mib[6];
1081 	char			*buf;
1082 	int			 rv;
1083 
1084 	mib[0] = CTL_NET;
1085 	mib[1] = AF_ROUTE;
1086 	mib[2] = 0;
1087 	mib[3] = AF_INET;
1088 	mib[4] = NET_RT_IFLIST;
1089 	mib[5] = ifindex;
1090 
1091 	if (sysctl(mib, 6, NULL, &len, NULL, 0) == -1) {
1092 		log_warn("sysctl");
1093 		return (-1);
1094 	}
1095 	if ((buf = malloc(len)) == NULL) {
1096 		log_warn("fetchif");
1097 		return (-1);
1098 	}
1099 	if (sysctl(mib, 6, buf, &len, NULL, 0) == -1) {
1100 		log_warn("sysctl");
1101 		free(buf);
1102 		return (-1);
1103 	}
1104 
1105 	rv = rtmsg_process(buf, len);
1106 	free(buf);
1107 
1108 	return (rv);
1109 }
1110 
1111 int
1112 dispatch_rtmsg(void)
1113 {
1114 	char			 buf[RT_BUF_SIZE];
1115 	ssize_t			 n;
1116 
1117 	if ((n = read(kr_state.fd, &buf, sizeof(buf))) == -1) {
1118 		log_warn("dispatch_rtmsg: read error");
1119 		return (-1);
1120 	}
1121 
1122 	if (n == 0) {
1123 		log_warnx("routing socket closed");
1124 		return (-1);
1125 	}
1126 
1127 	return (rtmsg_process(buf, n));
1128 }
1129 
1130 int
1131 rtmsg_process(char *buf, int len)
1132 {
1133 	struct rt_msghdr	*rtm;
1134 	struct if_msghdr	 ifm;
1135 	struct ifa_msghdr	*ifam;
1136 	struct sockaddr		*sa, *rti_info[RTAX_MAX];
1137 	struct sockaddr_in	*sa_in;
1138 	struct kroute_node	*kr, *okr;
1139 	struct in_addr		 prefix, nexthop;
1140 	u_int8_t		 prefixlen, prio;
1141 	int			 flags, mpath;
1142 	u_short			 ifindex = 0;
1143 
1144 	int			 offset;
1145 	char			*next;
1146 
1147 	for (offset = 0; offset < len; offset += rtm->rtm_msglen) {
1148 		next = buf + offset;
1149 		rtm = (struct rt_msghdr *)next;
1150 		if (rtm->rtm_version != RTM_VERSION)
1151 			continue;
1152 
1153 		prefix.s_addr = 0;
1154 		prefixlen = 0;
1155 		flags = 0;
1156 		nexthop.s_addr = 0;
1157 		mpath = 0;
1158 		prio = 0;
1159 
1160 		sa = (struct sockaddr *)(next + rtm->rtm_hdrlen);
1161 		get_rtaddrs(rtm->rtm_addrs, sa, rti_info);
1162 
1163 		switch (rtm->rtm_type) {
1164 		case RTM_ADD:
1165 		case RTM_GET:
1166 		case RTM_CHANGE:
1167 		case RTM_DELETE:
1168 			prefix.s_addr = 0;
1169 			prefixlen = 0;
1170 			nexthop.s_addr = 0;
1171 			mpath = 0;
1172 			prio = 0;
1173 
1174 			if (rtm->rtm_errno)		/* failed attempts... */
1175 				continue;
1176 
1177 			if (rtm->rtm_tableid != 0)
1178 				continue;
1179 
1180 			if ((sa = rti_info[RTAX_DST]) == NULL)
1181 				continue;
1182 
1183 			if (rtm->rtm_flags & RTF_LLINFO)	/* arp cache */
1184 				continue;
1185 
1186 			if (rtm->rtm_flags & RTF_MPATH)
1187 				mpath = 1;
1188 			prio = rtm->rtm_priority;
1189 
1190 			switch (sa->sa_family) {
1191 			case AF_INET:
1192 				prefix.s_addr =
1193 				    ((struct sockaddr_in *)sa)->sin_addr.s_addr;
1194 				sa_in = (struct sockaddr_in *)
1195 				    rti_info[RTAX_NETMASK];
1196 				if (sa_in != NULL) {
1197 					if (sa_in->sin_len != 0)
1198 						prefixlen = mask2prefixlen(
1199 						    sa_in->sin_addr.s_addr);
1200 				} else if (rtm->rtm_flags & RTF_HOST)
1201 					prefixlen = 32;
1202 				else
1203 					prefixlen =
1204 					    prefixlen_classful(prefix.s_addr);
1205 				if (rtm->rtm_flags & RTF_STATIC)
1206 					flags |= F_STATIC;
1207 				if (rtm->rtm_flags & RTF_BLACKHOLE)
1208 					flags |= F_BLACKHOLE;
1209 				if (rtm->rtm_flags & RTF_REJECT)
1210 					flags |= F_REJECT;
1211 				if (rtm->rtm_flags & RTF_DYNAMIC)
1212 					flags |= F_DYNAMIC;
1213 				break;
1214 			default:
1215 				continue;
1216 			}
1217 
1218 			ifindex = rtm->rtm_index;
1219 			if ((sa = rti_info[RTAX_GATEWAY]) != NULL) {
1220 				switch (sa->sa_family) {
1221 				case AF_INET:
1222 					nexthop.s_addr = ((struct
1223 					    sockaddr_in *)sa)->sin_addr.s_addr;
1224 					break;
1225 				case AF_LINK:
1226 					flags |= F_CONNECTED;
1227 					break;
1228 				}
1229 			}
1230 		}
1231 
1232 		switch (rtm->rtm_type) {
1233 		case RTM_ADD:
1234 		case RTM_GET:
1235 		case RTM_CHANGE:
1236 			if (nexthop.s_addr == 0 && !(flags & F_CONNECTED)) {
1237 				log_warnx("no nexthop for %s/%u",
1238 				    inet_ntoa(prefix), prefixlen);
1239 				continue;
1240 			}
1241 
1242 			if ((okr = kroute_find_fec(prefix.s_addr, prefixlen,
1243 			    nexthop))
1244 			    != NULL) {
1245 				/* get the correct route */
1246 				kr = okr;
1247 				if ((kr = kroute_matchprio(okr, prio)) ==
1248 				    NULL) {
1249 					log_warnx("mpath route not found");
1250 					/* add routes we missed out earlier */
1251 					goto add;
1252 				}
1253 
1254 				if (kr->r.flags & F_LDPD_INSERTED)
1255 					flags |= F_LDPD_INSERTED;
1256 				kr->r.nexthop.s_addr = nexthop.s_addr;
1257 				kr->r.flags = flags;
1258 				kr->r.ifindex = ifindex;
1259 
1260 				/* just readd, the RDE will care */
1261 				kr_redistribute(okr);
1262 			} else {
1263 add:
1264 				if ((kr = calloc(1,
1265 				    sizeof(struct kroute_node))) == NULL) {
1266 					log_warn("dispatch calloc");
1267 					return (-1);
1268 				}
1269 				kr->r.prefix.s_addr = prefix.s_addr;
1270 				kr->r.prefixlen = prefixlen;
1271 				kr->r.nexthop.s_addr = nexthop.s_addr;
1272 				kr->r.flags = flags;
1273 				kr->r.ifindex = ifindex;
1274 				kr->r.priority = prio;
1275 				kr->r.local_label = NO_LABEL;
1276 				kr->r.remote_label = NO_LABEL;
1277 
1278 				kroute_insert(kr);
1279 			}
1280 			break;
1281 		case RTM_DELETE:
1282 			if ((kr = kroute_find_fec(prefix.s_addr, prefixlen,
1283 			    nexthop)) == NULL)
1284 				continue;
1285 			/* get the correct route */
1286 			okr = kr;
1287 			if ((kr = kroute_matchprio(kr, prio)) == NULL) {
1288 				log_warnx("dispatch_rtmsg mpath route"
1289 				    " not found");
1290 				return (-1);
1291 			}
1292 			if (kroute_remove(kr) == -1)
1293 				return (-1);
1294 			break;
1295 		case RTM_IFINFO:
1296 			memcpy(&ifm, next, sizeof(ifm));
1297 			if_change(ifm.ifm_index, ifm.ifm_flags, &ifm.ifm_data,
1298 			    (struct sockaddr_dl *)rti_info[RTAX_IFP]);
1299 			break;
1300 		case RTM_NEWADDR:
1301 			ifam = (struct ifa_msghdr *)rtm;
1302 			if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA |
1303 			    RTA_BRD)) == 0)
1304 				break;
1305 
1306 			if_newaddr(ifam->ifam_index,
1307 			    (struct sockaddr_in *)rti_info[RTAX_IFA],
1308 			    (struct sockaddr_in *)rti_info[RTAX_NETMASK],
1309 			    (struct sockaddr_in *)rti_info[RTAX_BRD]);
1310 			break;
1311 		case RTM_DELADDR:
1312 			ifam = (struct ifa_msghdr *)rtm;
1313 			if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA |
1314 			    RTA_BRD)) == 0)
1315 				break;
1316 
1317 			if_deladdr(ifam->ifam_index,
1318 			    (struct sockaddr_in *)rti_info[RTAX_IFA],
1319 			    (struct sockaddr_in *)rti_info[RTAX_NETMASK],
1320 			    (struct sockaddr_in *)rti_info[RTAX_BRD]);
1321 			break;
1322 		case RTM_IFANNOUNCE:
1323 			if_announce(next);
1324 			break;
1325 		default:
1326 			/* ignore for now */
1327 			break;
1328 		}
1329 	}
1330 
1331 	return (offset);
1332 }
1333