xref: /openbsd/usr.sbin/ospfd/kroute.c (revision 274d7c50)
1 /*	$OpenBSD: kroute.c,v 1.113 2019/11/09 15:54:19 denis Exp $ */
2 
3 /*
4  * Copyright (c) 2004 Esben Norby <norby@openbsd.org>
5  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/types.h>
21 #include <sys/socket.h>
22 #include <sys/sysctl.h>
23 #include <sys/tree.h>
24 #include <sys/uio.h>
25 #include <netinet/in.h>
26 #include <arpa/inet.h>
27 #include <net/if.h>
28 #include <net/if_dl.h>
29 #include <net/if_types.h>
30 #include <net/route.h>
31 #include <err.h>
32 #include <errno.h>
33 #include <fcntl.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <limits.h>
39 
40 #include "ospfd.h"
41 #include "log.h"
42 
43 struct {
44 	u_int32_t		rtseq;
45 	pid_t			pid;
46 	int			fib_sync;
47 	int			fib_serial;
48 	u_int8_t		fib_prio;
49 	int			fd;
50 	struct event		ev;
51 	struct event		reload;
52 	u_int			rdomain;
53 #define KR_RELOAD_IDLE	0
54 #define KR_RELOAD_FETCH	1
55 #define KR_RELOAD_HOLD	2
56 	int			reload_state;
57 } kr_state;
58 
59 struct kroute_node {
60 	RB_ENTRY(kroute_node)	 entry;
61 	struct kroute_node	*next;
62 	struct kroute		 r;
63 	int			 serial;
64 };
65 
66 struct kif_node {
67 	RB_ENTRY(kif_node)	 entry;
68 	TAILQ_HEAD(, kif_addr)	 addrs;
69 	struct kif		 k;
70 };
71 
72 void	kr_redist_remove(struct kroute_node *, struct kroute_node *);
73 int	kr_redist_eval(struct kroute *, struct kroute *);
74 void	kr_redistribute(struct kroute_node *);
75 int	kroute_compare(struct kroute_node *, struct kroute_node *);
76 int	kif_compare(struct kif_node *, struct kif_node *);
77 int	kr_change_fib(struct kroute_node *, struct kroute *, int, int);
78 int	kr_delete_fib(struct kroute_node *);
79 
80 struct kroute_node	*kroute_find(in_addr_t, u_int8_t, u_int8_t);
81 struct kroute_node	*kroute_matchgw(struct kroute_node *, struct in_addr);
82 int			 kroute_insert(struct kroute_node *);
83 int			 kroute_remove(struct kroute_node *);
84 void			 kroute_clear(void);
85 
86 struct kif_node		*kif_find(u_short);
87 struct kif_node		*kif_insert(u_short);
88 int			 kif_remove(struct kif_node *);
89 struct kif		*kif_update(u_short, int, struct if_data *,
90 			    struct sockaddr_dl *);
91 int			 kif_validate(u_short);
92 
93 struct kroute_node	*kroute_match(in_addr_t);
94 
95 int		protect_lo(void);
96 u_int8_t	prefixlen_classful(in_addr_t);
97 void		get_rtaddrs(int, struct sockaddr *, struct sockaddr **);
98 void		if_change(u_short, int, struct if_data *, struct sockaddr_dl *);
99 void		if_newaddr(u_short, struct sockaddr_in *, struct sockaddr_in *,
100 		    struct sockaddr_in *);
101 void		if_deladdr(u_short, struct sockaddr_in *, struct sockaddr_in *,
102 		    struct sockaddr_in *);
103 void		if_announce(void *);
104 
105 int		send_rtmsg(int, int, struct kroute *);
106 int		dispatch_rtmsg(void);
107 int		fetchtable(void);
108 int		fetchifs(u_short);
109 int		rtmsg_process(char *, size_t);
110 void		kr_fib_reload_timer(int, short, void *);
111 void		kr_fib_reload_arm_timer(int);
112 
113 RB_HEAD(kroute_tree, kroute_node)	krt = RB_INITIALIZER(&krt);
114 RB_PROTOTYPE(kroute_tree, kroute_node, entry, kroute_compare)
115 RB_GENERATE(kroute_tree, kroute_node, entry, kroute_compare)
116 
117 RB_HEAD(kif_tree, kif_node)		kit = RB_INITIALIZER(&kit);
118 RB_PROTOTYPE(kif_tree, kif_node, entry, kif_compare)
119 RB_GENERATE(kif_tree, kif_node, entry, kif_compare)
120 
121 int
122 kif_init(void)
123 {
124 	if (fetchifs(0) == -1)
125 		return (-1);
126 
127 	return (0);
128 }
129 
130 int
131 kr_init(int fs, u_int rdomain, int redis_label_or_prefix, u_int8_t fib_prio)
132 {
133 	int		opt = 0, rcvbuf, default_rcvbuf;
134 	socklen_t	optlen;
135 	int		filter_prio = fib_prio;
136 
137 	kr_state.fib_sync = fs;
138 	kr_state.rdomain = rdomain;
139 	kr_state.fib_prio = fib_prio;
140 
141 	if ((kr_state.fd = socket(AF_ROUTE,
142 	    SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, AF_INET)) == -1) {
143 		log_warn("kr_init: socket");
144 		return (-1);
145 	}
146 
147 	/* not interested in my own messages */
148 	if (setsockopt(kr_state.fd, SOL_SOCKET, SO_USELOOPBACK,
149 	    &opt, sizeof(opt)) == -1)
150 		log_warn("kr_init: setsockopt");	/* not fatal */
151 
152 	if (redis_label_or_prefix) {
153 		filter_prio = 0;
154 		log_info("%s: priority filter disabled", __func__);
155 	} else
156 		log_debug("%s: priority filter enabled", __func__);
157 
158 	if (setsockopt(kr_state.fd, AF_ROUTE, ROUTE_PRIOFILTER, &filter_prio,
159 	    sizeof(filter_prio)) == -1) {
160 		log_warn("%s: setsockopt AF_ROUTE ROUTE_PRIOFILTER", __func__);
161 		/* not fatal */
162 	}
163 
164 	/* grow receive buffer, don't wanna miss messages */
165 	optlen = sizeof(default_rcvbuf);
166 	if (getsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF,
167 	    &default_rcvbuf, &optlen) == -1)
168 		log_warn("kr_init getsockopt SOL_SOCKET SO_RCVBUF");
169 	else
170 		for (rcvbuf = MAX_RTSOCK_BUF;
171 		    rcvbuf > default_rcvbuf &&
172 		    setsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF,
173 		    &rcvbuf, sizeof(rcvbuf)) == -1 && errno == ENOBUFS;
174 		    rcvbuf /= 2)
175 			;	/* nothing */
176 
177 	kr_state.pid = getpid();
178 	kr_state.rtseq = 1;
179 
180 	if (fetchtable() == -1)
181 		return (-1);
182 
183 	if (protect_lo() == -1)
184 		return (-1);
185 
186 	event_set(&kr_state.ev, kr_state.fd, EV_READ | EV_PERSIST,
187 	    kr_dispatch_msg, NULL);
188 	event_add(&kr_state.ev, NULL);
189 
190 	kr_state.reload_state = KR_RELOAD_IDLE;
191 	evtimer_set(&kr_state.reload, kr_fib_reload_timer, NULL);
192 
193 	return (0);
194 }
195 
196 int
197 kr_change_fib(struct kroute_node *kr, struct kroute *kroute, int krcount,
198     int action)
199 {
200 	int			 i;
201 	struct kroute_node	*kn, *nkn;
202 
203 	if (action == RTM_ADD) {
204 		/*
205 		 * First remove all stale multipath routes.
206 		 * This step must be skipped when the action is RTM_CHANGE
207 		 * because it is already a single path route that will be
208 		 * changed.
209 		 */
210 		for (kn = kr; kn != NULL; kn = nkn) {
211 			for (i = 0; i < krcount; i++) {
212 				if (kn->r.nexthop.s_addr ==
213 				    kroute[i].nexthop.s_addr)
214 					break;
215 			}
216 			nkn = kn->next;
217 			if (i == krcount) {
218 				/* stale route */
219 				if (kr_delete_fib(kn) == -1)
220 					log_warnx("kr_delete_fib failed");
221 				/*
222 				 * if head element was removed we need to adjust
223 				 * the head
224 				 */
225 				if (kr == kn)
226 					kr = nkn;
227 			}
228 		}
229 	}
230 
231 	/*
232 	 * now add or change the route
233 	 */
234 	for (i = 0; i < krcount; i++) {
235 		/* nexthop within 127/8 -> ignore silently */
236 		if ((kroute[i].nexthop.s_addr & htonl(IN_CLASSA_NET)) ==
237 		    htonl(INADDR_LOOPBACK & IN_CLASSA_NET))
238 			continue;
239 
240 		if (action == RTM_ADD && kr) {
241 			for (kn = kr; kn != NULL; kn = kn->next) {
242 				if (kn->r.nexthop.s_addr ==
243 				    kroute[i].nexthop.s_addr)
244 					break;
245 			}
246 
247 			if (kn != NULL)
248 				/* nexthop already present, skip it */
249 				continue;
250 		} else
251 			/* modify first entry */
252 			kn = kr;
253 
254 		/* send update */
255 		if (send_rtmsg(kr_state.fd, action, &kroute[i]) == -1)
256 			return (-1);
257 
258 		/* create new entry unless we are changing the first entry */
259 		if (action == RTM_ADD)
260 			if ((kn = calloc(1, sizeof(*kn))) == NULL)
261 				fatal(NULL);
262 
263 		kn->r.prefix.s_addr = kroute[i].prefix.s_addr;
264 		kn->r.prefixlen = kroute[i].prefixlen;
265 		kn->r.nexthop.s_addr = kroute[i].nexthop.s_addr;
266 		kn->r.flags = kroute[i].flags | F_OSPFD_INSERTED;
267 		kn->r.priority = kr_state.fib_prio;
268 		kn->r.ext_tag = kroute[i].ext_tag;
269 		rtlabel_unref(kn->r.rtlabel);	/* for RTM_CHANGE */
270 		kn->r.rtlabel = kroute[i].rtlabel;
271 
272 		if (action == RTM_ADD)
273 			if (kroute_insert(kn) == -1) {
274 				log_debug("kr_update_fib: cannot insert %s",
275 				    inet_ntoa(kn->r.nexthop));
276 				free(kn);
277 			}
278 		action = RTM_ADD;
279 	}
280 	return  (0);
281 }
282 
283 int
284 kr_change(struct kroute *kroute, int krcount)
285 {
286 	struct kroute_node	*kr;
287 	int			 action = RTM_ADD;
288 
289 	kroute->rtlabel = rtlabel_tag2id(kroute->ext_tag);
290 
291 	kr = kroute_find(kroute->prefix.s_addr, kroute->prefixlen,
292 	    kr_state.fib_prio);
293 	if (kr != NULL && kr->next == NULL && krcount == 1)
294 		/* single path OSPF route */
295 		action = RTM_CHANGE;
296 
297 	return (kr_change_fib(kr, kroute, krcount, action));
298 }
299 
300 int
301 kr_delete_fib(struct kroute_node *kr)
302 {
303 	if (kr->r.priority != kr_state.fib_prio)
304 		log_warn("kr_delete_fib: %s/%d has wrong priority %d",
305 		    inet_ntoa(kr->r.prefix), kr->r.prefixlen, kr->r.priority);
306 
307 	if (send_rtmsg(kr_state.fd, RTM_DELETE, &kr->r) == -1)
308 		return (-1);
309 
310 	if (kroute_remove(kr) == -1)
311 		return (-1);
312 
313 	return (0);
314 }
315 
316 int
317 kr_delete(struct kroute *kroute)
318 {
319 	struct kroute_node	*kr, *nkr;
320 
321 	if ((kr = kroute_find(kroute->prefix.s_addr, kroute->prefixlen,
322 	    kr_state.fib_prio)) == NULL)
323 		return (0);
324 
325 	while (kr != NULL) {
326 		nkr = kr->next;
327 		if (kr_delete_fib(kr) == -1)
328 			return (-1);
329 		kr = nkr;
330 	}
331 	return (0);
332 }
333 
334 void
335 kr_shutdown(void)
336 {
337 	kr_fib_decouple();
338 	kroute_clear();
339 	kif_clear();
340 }
341 
342 void
343 kr_fib_couple(void)
344 {
345 	struct kroute_node	*kr;
346 	struct kroute_node	*kn;
347 
348 	if (kr_state.fib_sync == 1)	/* already coupled */
349 		return;
350 
351 	kr_state.fib_sync = 1;
352 
353 	RB_FOREACH(kr, kroute_tree, &krt)
354 		if (kr->r.priority == kr_state.fib_prio)
355 			for (kn = kr; kn != NULL; kn = kn->next)
356 				send_rtmsg(kr_state.fd, RTM_ADD, &kn->r);
357 
358 	log_info("kernel routing table coupled");
359 }
360 
361 void
362 kr_fib_decouple(void)
363 {
364 	struct kroute_node	*kr;
365 	struct kroute_node	*kn;
366 
367 	if (kr_state.fib_sync == 0)	/* already decoupled */
368 		return;
369 
370 	RB_FOREACH(kr, kroute_tree, &krt)
371 		if (kr->r.priority == kr_state.fib_prio)
372 			for (kn = kr; kn != NULL; kn = kn->next)
373 				send_rtmsg(kr_state.fd, RTM_DELETE, &kn->r);
374 
375 	kr_state.fib_sync = 0;
376 
377 	log_info("kernel routing table decoupled");
378 }
379 
380 void
381 kr_fib_reload_timer(int fd, short event, void *bula)
382 {
383 	if (kr_state.reload_state == KR_RELOAD_FETCH) {
384 		kr_fib_reload();
385 		kr_state.reload_state = KR_RELOAD_HOLD;
386 		kr_fib_reload_arm_timer(KR_RELOAD_HOLD_TIMER);
387 	} else {
388 		kr_state.reload_state = KR_RELOAD_IDLE;
389 	}
390 }
391 
392 void
393 kr_fib_reload_arm_timer(int delay)
394 {
395 	struct timeval		 tv;
396 
397 	timerclear(&tv);
398 	tv.tv_sec = delay / 1000;
399 	tv.tv_usec = (delay % 1000) * 1000;
400 
401 	if (evtimer_add(&kr_state.reload, &tv) == -1)
402 		fatal("add_reload_timer");
403 }
404 
405 void
406 kr_fib_reload()
407 {
408 	struct kroute_node	*krn, *kr, *kn;
409 
410 	log_info("reloading interface list and routing table");
411 
412 	kr_state.fib_serial++;
413 
414 	if (fetchifs(0) == -1 || fetchtable() == -1)
415 		return;
416 
417 	for (kr = RB_MIN(kroute_tree, &krt); kr != NULL; kr = krn) {
418 		krn = RB_NEXT(kroute_tree, &krt, kr);
419 
420 		do {
421 			kn = kr->next;
422 
423 			if (kr->serial != kr_state.fib_serial) {
424 				if (kr->r.priority == kr_state.fib_prio) {
425 					kr->serial = kr_state.fib_serial;
426 					if (send_rtmsg(kr_state.fd,
427 					    RTM_ADD, &kr->r) != 0)
428 						break;
429 				} else
430 					kroute_remove(kr);
431 			}
432 
433 		} while ((kr = kn) != NULL);
434 	}
435 }
436 
437 void
438 kr_fib_update_prio(u_int8_t fib_prio)
439 {
440 	struct kroute_node      *kr;
441 
442 	RB_FOREACH(kr, kroute_tree, &krt)
443 		if ((kr->r.flags & F_OSPFD_INSERTED))
444 			kr->r.priority = fib_prio;
445 
446 	log_info("fib priority changed from %hhu to %hhu",
447 	    kr_state.fib_prio, fib_prio);
448 
449 	kr_state.fib_prio = fib_prio;
450  }
451 
452 /* ARGSUSED */
453 void
454 kr_dispatch_msg(int fd, short event, void *bula)
455 {
456 	/* XXX this is stupid */
457 	if (dispatch_rtmsg() == -1)
458 		event_loopexit(NULL);
459 }
460 
461 void
462 kr_show_route(struct imsg *imsg)
463 {
464 	struct kroute_node	*kr;
465 	struct kroute_node	*kn;
466 	int			 flags;
467 	struct in_addr		 addr;
468 
469 	switch (imsg->hdr.type) {
470 	case IMSG_CTL_KROUTE:
471 		if (imsg->hdr.len != IMSG_HEADER_SIZE + sizeof(flags)) {
472 			log_warnx("kr_show_route: wrong imsg len");
473 			return;
474 		}
475 		memcpy(&flags, imsg->data, sizeof(flags));
476 		RB_FOREACH(kr, kroute_tree, &krt)
477 			if (!flags || kr->r.flags & flags) {
478 				kn = kr;
479 				do {
480 					main_imsg_compose_ospfe(IMSG_CTL_KROUTE,
481 					    imsg->hdr.pid,
482 					    &kn->r, sizeof(kn->r));
483 				} while ((kn = kn->next) != NULL);
484 			}
485 		break;
486 	case IMSG_CTL_KROUTE_ADDR:
487 		if (imsg->hdr.len != IMSG_HEADER_SIZE +
488 		    sizeof(struct in_addr)) {
489 			log_warnx("kr_show_route: wrong imsg len");
490 			return;
491 		}
492 		memcpy(&addr, imsg->data, sizeof(addr));
493 		kr = NULL;
494 		kr = kroute_match(addr.s_addr);
495 		if (kr != NULL)
496 			main_imsg_compose_ospfe(IMSG_CTL_KROUTE, imsg->hdr.pid,
497 			    &kr->r, sizeof(kr->r));
498 		break;
499 	default:
500 		log_debug("kr_show_route: error handling imsg");
501 		break;
502 	}
503 
504 	main_imsg_compose_ospfe(IMSG_CTL_END, imsg->hdr.pid, NULL, 0);
505 }
506 
507 void
508 kr_ifinfo(char *ifname, pid_t pid)
509 {
510 	struct kif_node	*kif;
511 
512 	RB_FOREACH(kif, kif_tree, &kit)
513 		if (ifname == NULL || !strcmp(ifname, kif->k.ifname)) {
514 			main_imsg_compose_ospfe(IMSG_CTL_IFINFO,
515 			    pid, &kif->k, sizeof(kif->k));
516 		}
517 
518 	main_imsg_compose_ospfe(IMSG_CTL_END, pid, NULL, 0);
519 }
520 
521 void
522 kr_redist_remove(struct kroute_node *kh, struct kroute_node *kn)
523 {
524 	struct kroute	*kr;
525 
526 	/* was the route redistributed? */
527 	if ((kn->r.flags & F_REDISTRIBUTED) == 0)
528 		return;
529 
530 	/* remove redistributed flag */
531 	kn->r.flags &= ~F_REDISTRIBUTED;
532 	kr = &kn->r;
533 
534 	/* probably inform the RDE (check if no other path is redistributed) */
535 	for (kn = kh; kn; kn = kn->next)
536 		if (kn->r.flags & F_REDISTRIBUTED)
537 			break;
538 
539 	if (kn == NULL)
540 		main_imsg_compose_rde(IMSG_NETWORK_DEL, 0, kr,
541 		    sizeof(struct kroute));
542 }
543 
544 int
545 kr_redist_eval(struct kroute *kr, struct kroute *new_kr)
546 {
547 	u_int32_t	 a, metric = 0;
548 
549 	/* Only non-ospfd routes are considered for redistribution. */
550 	if (!(kr->flags & F_KERNEL))
551 		goto dont_redistribute;
552 
553 	/* Dynamic routes are not redistributable. */
554 	if (kr->flags & F_DYNAMIC)
555 		goto dont_redistribute;
556 
557 	/* interface is not up and running so don't announce */
558 	if (kr->flags & F_DOWN)
559 		goto dont_redistribute;
560 
561 	/*
562 	 * We consider the loopback net, multicast and experimental addresses
563 	 * as not redistributable.
564 	 */
565 	a = ntohl(kr->prefix.s_addr);
566 	if (IN_MULTICAST(a) || IN_BADCLASS(a) ||
567 	    (a >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
568 		goto dont_redistribute;
569 	/*
570 	 * Consider networks with nexthop loopback as not redistributable
571 	 * unless it is a reject or blackhole route.
572 	 */
573 	if (kr->nexthop.s_addr == htonl(INADDR_LOOPBACK) &&
574 	    !(kr->flags & (F_BLACKHOLE|F_REJECT)))
575 		goto dont_redistribute;
576 
577 	/* Should we redistribute this route? */
578 	if (!ospf_redistribute(kr, &metric))
579 		goto dont_redistribute;
580 
581 	/* prefix should be redistributed */
582 	kr->flags |= F_REDISTRIBUTED;
583 	/*
584 	 * only one of all multipath routes can be redistributed so
585 	 * redistribute the best one.
586 	 */
587 	if (new_kr->metric > metric) {
588 		*new_kr = *kr;
589 		new_kr->metric = metric;
590 	}
591 
592 	return (1);
593 
594 dont_redistribute:
595 	/* was the route redistributed? */
596 	if ((kr->flags & F_REDISTRIBUTED) == 0)
597 		return (0);
598 
599 	kr->flags &= ~F_REDISTRIBUTED;
600 	return (1);
601 }
602 
603 void
604 kr_redistribute(struct kroute_node *kh)
605 {
606 	struct kroute_node	*kn;
607 	struct kroute		 kr;
608 	int			 redistribute = 0;
609 
610 	/* only the highest prio route can be redistributed */
611 	if (kroute_find(kh->r.prefix.s_addr, kh->r.prefixlen, RTP_ANY) != kh)
612 		return;
613 
614 	bzero(&kr, sizeof(kr));
615 	kr.metric = UINT_MAX;
616 	for (kn = kh; kn; kn = kn->next)
617 		if (kr_redist_eval(&kn->r, &kr))
618 			redistribute = 1;
619 
620 	if (!redistribute)
621 		return;
622 
623 	if (kr.flags & F_REDISTRIBUTED) {
624 		main_imsg_compose_rde(IMSG_NETWORK_ADD, 0, &kr,
625 		    sizeof(struct kroute));
626 	} else {
627 		kr = kh->r;
628 		main_imsg_compose_rde(IMSG_NETWORK_DEL, 0, &kr,
629 		    sizeof(struct kroute));
630 	}
631 }
632 
633 void
634 kr_reload(int redis_label_or_prefix)
635 {
636 	struct kroute_node	*kr, *kn;
637 	u_int32_t		 dummy;
638 	int			 r;
639 	int			 filter_prio = kr_state.fib_prio;
640 
641 	/* update the priority filter */
642 	if (redis_label_or_prefix) {
643 		filter_prio = 0;
644 		log_info("%s: priority filter disabled", __func__);
645 	} else
646 		log_debug("%s: priority filter enabled", __func__);
647 
648 	if (setsockopt(kr_state.fd, AF_ROUTE, ROUTE_PRIOFILTER, &filter_prio,
649 	    sizeof(filter_prio)) == -1) {
650 		log_warn("%s: setsockopt AF_ROUTE ROUTE_PRIOFILTER", __func__);
651 		/* not fatal */
652 	}
653 
654 	/* update redistribute lists */
655 	RB_FOREACH(kr, kroute_tree, &krt) {
656 		for (kn = kr; kn; kn = kn->next) {
657 			r = ospf_redistribute(&kn->r, &dummy);
658 			/*
659 			 * if it is redistributed, redistribute again metric
660 			 * may have changed.
661 			 */
662 			if ((kn->r.flags & F_REDISTRIBUTED && !r) || r)
663 				break;
664 		}
665 		if (kn) {
666 			/*
667 			 * kr_redistribute copes with removes and RDE with
668 			 * duplicates
669 			 */
670 			kr_redistribute(kr);
671 		}
672 	}
673 }
674 
675 /* rb-tree compare */
676 int
677 kroute_compare(struct kroute_node *a, struct kroute_node *b)
678 {
679 	if (ntohl(a->r.prefix.s_addr) < ntohl(b->r.prefix.s_addr))
680 		return (-1);
681 	if (ntohl(a->r.prefix.s_addr) > ntohl(b->r.prefix.s_addr))
682 		return (1);
683 	if (a->r.prefixlen < b->r.prefixlen)
684 		return (-1);
685 	if (a->r.prefixlen > b->r.prefixlen)
686 		return (1);
687 
688 	/* if the priority is RTP_ANY finish on the first address hit */
689 	if (a->r.priority == RTP_ANY || b->r.priority == RTP_ANY)
690 		return (0);
691 	if (a->r.priority < b->r.priority)
692 		return (-1);
693 	if (a->r.priority > b->r.priority)
694 		return (1);
695 	return (0);
696 }
697 
698 int
699 kif_compare(struct kif_node *a, struct kif_node *b)
700 {
701 	return (b->k.ifindex - a->k.ifindex);
702 }
703 
704 /* tree management */
705 struct kroute_node *
706 kroute_find(in_addr_t prefix, u_int8_t prefixlen, u_int8_t prio)
707 {
708 	struct kroute_node	s;
709 	struct kroute_node	*kn, *tmp;
710 
711 	s.r.prefix.s_addr = prefix;
712 	s.r.prefixlen = prefixlen;
713 	s.r.priority = prio;
714 
715 	kn = RB_FIND(kroute_tree, &krt, &s);
716 	if (kn && prio == RTP_ANY) {
717 		tmp = RB_PREV(kroute_tree, &krt, kn);
718 		while (tmp) {
719 			if (kroute_compare(&s, tmp) == 0)
720 				kn = tmp;
721 			else
722 				break;
723 			tmp = RB_PREV(kroute_tree, &krt, kn);
724 		}
725 	}
726 	return (kn);
727 }
728 
729 struct kroute_node *
730 kroute_matchgw(struct kroute_node *kr, struct in_addr nh)
731 {
732 	in_addr_t	nexthop;
733 
734 	nexthop = nh.s_addr;
735 
736 	while (kr) {
737 		if (kr->r.nexthop.s_addr == nexthop)
738 			return (kr);
739 		kr = kr->next;
740 	}
741 
742 	return (NULL);
743 }
744 
745 int
746 kroute_insert(struct kroute_node *kr)
747 {
748 	struct kroute_node	*krm, *krh;
749 
750 	kr->serial = kr_state.fib_serial;
751 
752 	if ((krh = RB_INSERT(kroute_tree, &krt, kr)) != NULL) {
753 		/*
754 		 * Multipath route, add at end of list.
755 		 */
756 		krm = krh;
757 		while (krm->next != NULL)
758 			krm = krm->next;
759 		krm->next = kr;
760 		kr->next = NULL; /* to be sure */
761 	} else
762 		krh = kr;
763 
764 	if (!(kr->r.flags & F_KERNEL)) {
765 		/* don't validate or redistribute ospf route */
766 		kr->r.flags &= ~F_DOWN;
767 		return (0);
768 	}
769 
770 	if (kif_validate(kr->r.ifindex))
771 		kr->r.flags &= ~F_DOWN;
772 	else
773 		kr->r.flags |= F_DOWN;
774 
775 	kr_redistribute(krh);
776 	return (0);
777 }
778 
779 int
780 kroute_remove(struct kroute_node *kr)
781 {
782 	struct kroute_node	*krm;
783 
784 	if ((krm = RB_FIND(kroute_tree, &krt, kr)) == NULL) {
785 		log_warnx("kroute_remove failed to find %s/%u",
786 		    inet_ntoa(kr->r.prefix), kr->r.prefixlen);
787 		return (-1);
788 	}
789 
790 	if (krm == kr) {
791 		/* head element */
792 		if (RB_REMOVE(kroute_tree, &krt, kr) == NULL) {
793 			log_warnx("kroute_remove failed for %s/%u",
794 			    inet_ntoa(kr->r.prefix), kr->r.prefixlen);
795 			return (-1);
796 		}
797 		if (kr->next != NULL) {
798 			if (RB_INSERT(kroute_tree, &krt, kr->next) != NULL) {
799 				log_warnx("kroute_remove failed to add %s/%u",
800 				    inet_ntoa(kr->r.prefix), kr->r.prefixlen);
801 				return (-1);
802 			}
803 		}
804 	} else {
805 		/* somewhere in the list */
806 		while (krm->next != kr && krm->next != NULL)
807 			krm = krm->next;
808 		if (krm->next == NULL) {
809 			log_warnx("kroute_remove multipath list corrupted "
810 			    "for %s/%u", inet_ntoa(kr->r.prefix),
811 			    kr->r.prefixlen);
812 			return (-1);
813 		}
814 		krm->next = kr->next;
815 	}
816 
817 	kr_redist_remove(krm, kr);
818 	rtlabel_unref(kr->r.rtlabel);
819 
820 	free(kr);
821 	return (0);
822 }
823 
824 void
825 kroute_clear(void)
826 {
827 	struct kroute_node	*kr;
828 
829 	while ((kr = RB_MIN(kroute_tree, &krt)) != NULL)
830 		kroute_remove(kr);
831 }
832 
833 struct kif_node *
834 kif_find(u_short ifindex)
835 {
836 	struct kif_node	s;
837 
838 	bzero(&s, sizeof(s));
839 	s.k.ifindex = ifindex;
840 
841 	return (RB_FIND(kif_tree, &kit, &s));
842 }
843 
844 struct kif *
845 kif_findname(char *ifname, struct in_addr addr, struct kif_addr **kap)
846 {
847 	struct kif_node	*kif;
848 	struct kif_addr	*ka;
849 
850 	RB_FOREACH(kif, kif_tree, &kit)
851 		if (!strcmp(ifname, kif->k.ifname)) {
852 			ka = TAILQ_FIRST(&kif->addrs);
853 			if (addr.s_addr != 0) {
854 				TAILQ_FOREACH(ka, &kif->addrs, entry) {
855 					if (addr.s_addr == ka->addr.s_addr)
856 						break;
857 				}
858 			}
859 			if (kap != NULL)
860 				*kap = ka;
861 			return (&kif->k);
862 		}
863 
864 	return (NULL);
865 }
866 
867 struct kif_node *
868 kif_insert(u_short ifindex)
869 {
870 	struct kif_node	*kif;
871 
872 	if ((kif = calloc(1, sizeof(struct kif_node))) == NULL)
873 		return (NULL);
874 
875 	kif->k.ifindex = ifindex;
876 	TAILQ_INIT(&kif->addrs);
877 
878 	if (RB_INSERT(kif_tree, &kit, kif) != NULL)
879 		fatalx("kif_insert: RB_INSERT");
880 
881 	return (kif);
882 }
883 
884 int
885 kif_remove(struct kif_node *kif)
886 {
887 	struct kif_addr	*ka;
888 
889 	if (RB_REMOVE(kif_tree, &kit, kif) == NULL) {
890 		log_warnx("RB_REMOVE(kif_tree, &kit, kif)");
891 		return (-1);
892 	}
893 
894 	while ((ka = TAILQ_FIRST(&kif->addrs)) != NULL) {
895 		TAILQ_REMOVE(&kif->addrs, ka, entry);
896 		free(ka);
897 	}
898 	free(kif);
899 	return (0);
900 }
901 
902 void
903 kif_clear(void)
904 {
905 	struct kif_node	*kif;
906 
907 	while ((kif = RB_MIN(kif_tree, &kit)) != NULL)
908 		kif_remove(kif);
909 }
910 
911 struct kif *
912 kif_update(u_short ifindex, int flags, struct if_data *ifd,
913     struct sockaddr_dl *sdl)
914 {
915 	struct kif_node		*kif;
916 
917 	if ((kif = kif_find(ifindex)) == NULL) {
918 		if ((kif = kif_insert(ifindex)) == NULL)
919 			return (NULL);
920 		kif->k.nh_reachable = (flags & IFF_UP) &&
921 		    LINK_STATE_IS_UP(ifd->ifi_link_state);
922 	}
923 
924 	kif->k.flags = flags;
925 	kif->k.link_state = ifd->ifi_link_state;
926 	kif->k.if_type = ifd->ifi_type;
927 	kif->k.baudrate = ifd->ifi_baudrate;
928 	kif->k.mtu = ifd->ifi_mtu;
929 	kif->k.rdomain = ifd->ifi_rdomain;
930 
931 	if (sdl && sdl->sdl_family == AF_LINK) {
932 		if (sdl->sdl_nlen >= sizeof(kif->k.ifname))
933 			memcpy(kif->k.ifname, sdl->sdl_data,
934 			    sizeof(kif->k.ifname) - 1);
935 		else if (sdl->sdl_nlen > 0)
936 			memcpy(kif->k.ifname, sdl->sdl_data,
937 			    sdl->sdl_nlen);
938 		/* string already terminated via calloc() */
939 	}
940 
941 	return (&kif->k);
942 }
943 
944 int
945 kif_validate(u_short ifindex)
946 {
947 	struct kif_node		*kif;
948 
949 	if ((kif = kif_find(ifindex)) == NULL) {
950 		log_warnx("interface with index %u not found", ifindex);
951 		return (1);
952 	}
953 
954 	return (kif->k.nh_reachable);
955 }
956 
957 struct kroute_node *
958 kroute_match(in_addr_t key)
959 {
960 	int			 i;
961 	struct kroute_node	*kr;
962 
963 	/* we will never match the default route */
964 	for (i = 32; i > 0; i--)
965 		if ((kr = kroute_find(key & prefixlen2mask(i), i,
966 		    RTP_ANY)) != NULL)
967 			return (kr);
968 
969 	/* if we don't have a match yet, try to find a default route */
970 	if ((kr = kroute_find(0, 0, RTP_ANY)) != NULL)
971 			return (kr);
972 
973 	return (NULL);
974 }
975 
976 /* misc */
977 int
978 protect_lo(void)
979 {
980 	struct kroute_node	*kr;
981 
982 	/* special protection for 127/8 */
983 	if ((kr = calloc(1, sizeof(struct kroute_node))) == NULL) {
984 		log_warn("protect_lo");
985 		return (-1);
986 	}
987 	kr->r.prefix.s_addr = htonl(INADDR_LOOPBACK & IN_CLASSA_NET);
988 	kr->r.prefixlen = 8;
989 	kr->r.flags = F_KERNEL|F_CONNECTED;
990 
991 	if (RB_INSERT(kroute_tree, &krt, kr) != NULL)
992 		free(kr);	/* kernel route already there, no problem */
993 
994 	return (0);
995 }
996 
997 u_int8_t
998 prefixlen_classful(in_addr_t ina)
999 {
1000 	/* it hurt to write this. */
1001 
1002 	if (ina >= 0xf0000000U)		/* class E */
1003 		return (32);
1004 	else if (ina >= 0xe0000000U)	/* class D */
1005 		return (4);
1006 	else if (ina >= 0xc0000000U)	/* class C */
1007 		return (24);
1008 	else if (ina >= 0x80000000U)	/* class B */
1009 		return (16);
1010 	else				/* class A */
1011 		return (8);
1012 }
1013 
1014 u_int8_t
1015 mask2prefixlen(in_addr_t ina)
1016 {
1017 	if (ina == 0)
1018 		return (0);
1019 	else
1020 		return (33 - ffs(ntohl(ina)));
1021 }
1022 
1023 in_addr_t
1024 prefixlen2mask(u_int8_t prefixlen)
1025 {
1026 	if (prefixlen == 0)
1027 		return (0);
1028 
1029 	return (htonl(0xffffffff << (32 - prefixlen)));
1030 }
1031 
1032 #define ROUNDUP(a) \
1033 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1034 
1035 void
1036 get_rtaddrs(int addrs, struct sockaddr *sa, struct sockaddr **rti_info)
1037 {
1038 	int	i;
1039 
1040 	for (i = 0; i < RTAX_MAX; i++) {
1041 		if (addrs & (1 << i)) {
1042 			rti_info[i] = sa;
1043 			sa = (struct sockaddr *)((char *)(sa) +
1044 			    ROUNDUP(sa->sa_len));
1045 		} else
1046 			rti_info[i] = NULL;
1047 	}
1048 }
1049 
1050 void
1051 if_change(u_short ifindex, int flags, struct if_data *ifd,
1052     struct sockaddr_dl *sdl)
1053 {
1054 	struct kroute_node	*kr, *tkr;
1055 	struct kif		*kif;
1056 	u_int8_t		 reachable;
1057 
1058 	if ((kif = kif_update(ifindex, flags, ifd, sdl)) == NULL) {
1059 		log_warn("if_change:  kif_update(%u)", ifindex);
1060 		return;
1061 	}
1062 
1063 	/* notify ospfe about interface link state */
1064 	main_imsg_compose_ospfe(IMSG_IFINFO, 0, kif, sizeof(struct kif));
1065 
1066 	reachable = (kif->flags & IFF_UP) &&
1067 	    LINK_STATE_IS_UP(kif->link_state);
1068 
1069 	if (reachable == kif->nh_reachable)
1070 		return;		/* nothing changed wrt nexthop validity */
1071 
1072 	kif->nh_reachable = reachable;
1073 
1074 	/* update redistribute list */
1075 	RB_FOREACH(kr, kroute_tree, &krt) {
1076 		for (tkr = kr; tkr != NULL; tkr = tkr->next) {
1077 			if (tkr->r.ifindex == ifindex) {
1078 				if (reachable)
1079 					tkr->r.flags &= ~F_DOWN;
1080 				else
1081 					tkr->r.flags |= F_DOWN;
1082 
1083 			}
1084 		}
1085 		kr_redistribute(kr);
1086 	}
1087 }
1088 
1089 void
1090 if_newaddr(u_short ifindex, struct sockaddr_in *ifa, struct sockaddr_in *mask,
1091     struct sockaddr_in *brd)
1092 {
1093 	struct kif_node 	*kif;
1094 	struct kif_addr 	*ka;
1095 	struct ifaddrchange	 ifn;
1096 
1097 	if (ifa == NULL || ifa->sin_family != AF_INET)
1098 		return;
1099 	if ((kif = kif_find(ifindex)) == NULL) {
1100 		log_warnx("if_newaddr: corresponding if %d not found", ifindex);
1101 		return;
1102 	}
1103 	if ((ka = calloc(1, sizeof(struct kif_addr))) == NULL)
1104 		fatal("if_newaddr");
1105 	ka->addr = ifa->sin_addr;
1106 	if (mask)
1107 		ka->mask = mask->sin_addr;
1108 	else
1109 		ka->mask.s_addr = INADDR_NONE;
1110 	if (brd)
1111 		ka->dstbrd = brd->sin_addr;
1112 	else
1113 		ka->dstbrd.s_addr = INADDR_NONE;
1114 
1115 	TAILQ_INSERT_TAIL(&kif->addrs, ka, entry);
1116 
1117 	ifn.addr = ka->addr;
1118 	ifn.mask = ka->mask;
1119 	ifn.dst = ka->dstbrd;
1120 	ifn.ifindex = ifindex;
1121 	main_imsg_compose_ospfe(IMSG_IFADDRADD, 0, &ifn, sizeof(ifn));
1122 }
1123 
1124 void
1125 if_deladdr(u_short ifindex, struct sockaddr_in *ifa, struct sockaddr_in *mask,
1126     struct sockaddr_in *brd)
1127 {
1128 	struct kif_node 	*kif;
1129 	struct kif_addr		*ka, *nka;
1130 	struct ifaddrchange	 ifc;
1131 
1132 	if (ifa == NULL || ifa->sin_family != AF_INET)
1133 		return;
1134 	if ((kif = kif_find(ifindex)) == NULL) {
1135 		log_warnx("if_deladdr: corresponding if %d not found", ifindex);
1136 		return;
1137 	}
1138 
1139 	for (ka = TAILQ_FIRST(&kif->addrs); ka != NULL; ka = nka) {
1140 		nka = TAILQ_NEXT(ka, entry);
1141 
1142 		if (ka->addr.s_addr == ifa->sin_addr.s_addr) {
1143 			TAILQ_REMOVE(&kif->addrs, ka, entry);
1144 			ifc.addr = ifa->sin_addr;
1145 			ifc.ifindex = ifindex;
1146 			main_imsg_compose_ospfe(IMSG_IFADDRDEL, 0, &ifc,
1147 			    sizeof(ifc));
1148 			free(ka);
1149 			return;
1150 		}
1151 	}
1152 }
1153 
1154 void
1155 if_announce(void *msg)
1156 {
1157 	struct if_announcemsghdr	*ifan;
1158 	struct kif_node			*kif;
1159 
1160 	ifan = msg;
1161 
1162 	switch (ifan->ifan_what) {
1163 	case IFAN_ARRIVAL:
1164 		kif = kif_insert(ifan->ifan_index);
1165 		strlcpy(kif->k.ifname, ifan->ifan_name, sizeof(kif->k.ifname));
1166 		break;
1167 	case IFAN_DEPARTURE:
1168 		kif = kif_find(ifan->ifan_index);
1169 		kif_remove(kif);
1170 		break;
1171 	}
1172 }
1173 
1174 /* rtsock */
1175 int
1176 send_rtmsg(int fd, int action, struct kroute *kroute)
1177 {
1178 	struct iovec		iov[5];
1179 	struct rt_msghdr	hdr;
1180 	struct sockaddr_in	prefix;
1181 	struct sockaddr_in	nexthop;
1182 	struct sockaddr_in	mask;
1183 	struct sockaddr_rtlabel	sa_rl;
1184 	int			iovcnt = 0;
1185 	const char		*label;
1186 
1187 	if (kr_state.fib_sync == 0)
1188 		return (0);
1189 
1190 	/* initialize header */
1191 	bzero(&hdr, sizeof(hdr));
1192 	hdr.rtm_version = RTM_VERSION;
1193 	hdr.rtm_type = action;
1194 	hdr.rtm_priority = kr_state.fib_prio;
1195 	hdr.rtm_tableid = kr_state.rdomain;	/* rtableid */
1196 	if (action == RTM_CHANGE)
1197 		hdr.rtm_fmask = RTF_REJECT|RTF_BLACKHOLE;
1198 	else
1199 		hdr.rtm_flags = RTF_MPATH;
1200 	hdr.rtm_seq = kr_state.rtseq++;	/* overflow doesn't matter */
1201 	hdr.rtm_msglen = sizeof(hdr);
1202 	/* adjust iovec */
1203 	iov[iovcnt].iov_base = &hdr;
1204 	iov[iovcnt++].iov_len = sizeof(hdr);
1205 
1206 	bzero(&prefix, sizeof(prefix));
1207 	prefix.sin_len = sizeof(prefix);
1208 	prefix.sin_family = AF_INET;
1209 	prefix.sin_addr.s_addr = kroute->prefix.s_addr;
1210 	/* adjust header */
1211 	hdr.rtm_addrs |= RTA_DST;
1212 	hdr.rtm_msglen += sizeof(prefix);
1213 	/* adjust iovec */
1214 	iov[iovcnt].iov_base = &prefix;
1215 	iov[iovcnt++].iov_len = sizeof(prefix);
1216 
1217 	if (kroute->nexthop.s_addr != 0) {
1218 		bzero(&nexthop, sizeof(nexthop));
1219 		nexthop.sin_len = sizeof(nexthop);
1220 		nexthop.sin_family = AF_INET;
1221 		nexthop.sin_addr.s_addr = kroute->nexthop.s_addr;
1222 		/* adjust header */
1223 		hdr.rtm_flags |= RTF_GATEWAY;
1224 		hdr.rtm_addrs |= RTA_GATEWAY;
1225 		hdr.rtm_msglen += sizeof(nexthop);
1226 		/* adjust iovec */
1227 		iov[iovcnt].iov_base = &nexthop;
1228 		iov[iovcnt++].iov_len = sizeof(nexthop);
1229 	}
1230 
1231 	bzero(&mask, sizeof(mask));
1232 	mask.sin_len = sizeof(mask);
1233 	mask.sin_family = AF_INET;
1234 	mask.sin_addr.s_addr = prefixlen2mask(kroute->prefixlen);
1235 	/* adjust header */
1236 	hdr.rtm_addrs |= RTA_NETMASK;
1237 	hdr.rtm_msglen += sizeof(mask);
1238 	/* adjust iovec */
1239 	iov[iovcnt].iov_base = &mask;
1240 	iov[iovcnt++].iov_len = sizeof(mask);
1241 
1242 	if (kroute->rtlabel != 0) {
1243 		sa_rl.sr_len = sizeof(sa_rl);
1244 		sa_rl.sr_family = AF_UNSPEC;
1245 		label = rtlabel_id2name(kroute->rtlabel);
1246 		if (strlcpy(sa_rl.sr_label, label,
1247 		    sizeof(sa_rl.sr_label)) >= sizeof(sa_rl.sr_label)) {
1248 			log_warnx("send_rtmsg: invalid rtlabel");
1249 			return (-1);
1250 		}
1251 		/* adjust header */
1252 		hdr.rtm_addrs |= RTA_LABEL;
1253 		hdr.rtm_msglen += sizeof(sa_rl);
1254 		/* adjust iovec */
1255 		iov[iovcnt].iov_base = &sa_rl;
1256 		iov[iovcnt++].iov_len = sizeof(sa_rl);
1257 	}
1258 
1259 retry:
1260 	if (writev(fd, iov, iovcnt) == -1) {
1261 		if (errno == ESRCH) {
1262 			if (hdr.rtm_type == RTM_CHANGE) {
1263 				hdr.rtm_type = RTM_ADD;
1264 				goto retry;
1265 			} else if (hdr.rtm_type == RTM_DELETE) {
1266 				log_info("route %s/%u vanished before delete",
1267 				    inet_ntoa(kroute->prefix),
1268 				    kroute->prefixlen);
1269 				return (0);
1270 			}
1271 		}
1272 		log_warn("send_rtmsg: action %u, prefix %s/%u", hdr.rtm_type,
1273 		    inet_ntoa(kroute->prefix), kroute->prefixlen);
1274 		return (0);
1275 	}
1276 
1277 	return (0);
1278 }
1279 
1280 int
1281 fetchtable(void)
1282 {
1283 	size_t			 len;
1284 	int			 mib[7];
1285 	char			*buf;
1286 	int			 rv;
1287 
1288 	mib[0] = CTL_NET;
1289 	mib[1] = PF_ROUTE;
1290 	mib[2] = 0;
1291 	mib[3] = AF_INET;
1292 	mib[4] = NET_RT_DUMP;
1293 	mib[5] = 0;
1294 	mib[6] = kr_state.rdomain;	/* rtableid */
1295 
1296 	if (sysctl(mib, 7, NULL, &len, NULL, 0) == -1) {
1297 		log_warn("sysctl");
1298 		return (-1);
1299 	}
1300 	if ((buf = malloc(len)) == NULL) {
1301 		log_warn("fetchtable");
1302 		return (-1);
1303 	}
1304 	if (sysctl(mib, 7, buf, &len, NULL, 0) == -1) {
1305 		log_warn("sysctl");
1306 		free(buf);
1307 		return (-1);
1308 	}
1309 
1310 	rv = rtmsg_process(buf, len);
1311 	free(buf);
1312 
1313 	return (rv);
1314 }
1315 
1316 int
1317 fetchifs(u_short ifindex)
1318 {
1319 	size_t			 len;
1320 	int			 mib[6];
1321 	char			*buf;
1322 	int			 rv;
1323 
1324 	mib[0] = CTL_NET;
1325 	mib[1] = PF_ROUTE;
1326 	mib[2] = 0;
1327 	mib[3] = AF_INET;
1328 	mib[4] = NET_RT_IFLIST;
1329 	mib[5] = ifindex;
1330 
1331 	if (sysctl(mib, 6, NULL, &len, NULL, 0) == -1) {
1332 		log_warn("sysctl");
1333 		return (-1);
1334 	}
1335 	if ((buf = malloc(len)) == NULL) {
1336 		log_warn("fetchif");
1337 		return (-1);
1338 	}
1339 	if (sysctl(mib, 6, buf, &len, NULL, 0) == -1) {
1340 		log_warn("sysctl");
1341 		free(buf);
1342 		return (-1);
1343 	}
1344 
1345 	rv = rtmsg_process(buf, len);
1346 	free(buf);
1347 
1348 	return (rv);
1349 }
1350 
1351 int
1352 dispatch_rtmsg(void)
1353 {
1354 	char			 buf[RT_BUF_SIZE];
1355 	ssize_t			 n;
1356 
1357 	if ((n = read(kr_state.fd, &buf, sizeof(buf))) == -1) {
1358 		if (errno == EAGAIN || errno == EINTR)
1359 			return (0);
1360 		log_warn("dispatch_rtmsg: read error");
1361 		return (-1);
1362 	}
1363 
1364 	if (n == 0) {
1365 		log_warnx("routing socket closed");
1366 		return (-1);
1367 	}
1368 
1369 	return (rtmsg_process(buf, n));
1370 }
1371 
1372 int
1373 rtmsg_process(char *buf, size_t len)
1374 {
1375 	struct rt_msghdr	*rtm;
1376 	struct if_msghdr	 ifm;
1377 	struct ifa_msghdr	*ifam;
1378 	struct sockaddr		*sa, *rti_info[RTAX_MAX];
1379 	struct sockaddr_in	*sa_in;
1380 	struct sockaddr_rtlabel	*label;
1381 	struct kroute_node	*kr, *okr;
1382 	struct in_addr		 prefix, nexthop;
1383 	u_int8_t		 prefixlen, prio;
1384 	int			 flags, mpath;
1385 	u_short			 ifindex = 0;
1386 	int			 rv, delay;
1387 
1388 	size_t			 offset;
1389 	char			*next;
1390 
1391 	for (offset = 0; offset < len; offset += rtm->rtm_msglen) {
1392 		next = buf + offset;
1393 		rtm = (struct rt_msghdr *)next;
1394 		if (len < offset + sizeof(u_short) ||
1395 		    len < offset + rtm->rtm_msglen)
1396 			fatalx("%s: partial rtm in buffer", __func__);
1397 		if (rtm->rtm_version != RTM_VERSION)
1398 			continue;
1399 
1400 		prefix.s_addr = 0;
1401 		prefixlen = 0;
1402 		nexthop.s_addr = 0;
1403 		mpath = 0;
1404 		prio = 0;
1405 		flags = F_KERNEL;
1406 
1407 		sa = (struct sockaddr *)(next + rtm->rtm_hdrlen);
1408 		get_rtaddrs(rtm->rtm_addrs, sa, rti_info);
1409 
1410 		switch (rtm->rtm_type) {
1411 		case RTM_ADD:
1412 		case RTM_GET:
1413 		case RTM_CHANGE:
1414 		case RTM_DELETE:
1415 			if (rtm->rtm_errno)		/* failed attempts... */
1416 				continue;
1417 
1418 			if (rtm->rtm_tableid != kr_state.rdomain)
1419 				continue;
1420 
1421 			if (rtm->rtm_type == RTM_GET &&
1422 			    rtm->rtm_pid != kr_state.pid)
1423 				continue;
1424 
1425 			if ((sa = rti_info[RTAX_DST]) == NULL)
1426 				continue;
1427 
1428 			/* Skip ARP/ND cache and broadcast routes. */
1429 			if (rtm->rtm_flags & (RTF_LLINFO|RTF_BROADCAST))
1430 				continue;
1431 
1432 			if (rtm->rtm_flags & RTF_MPATH)
1433 				mpath = 1;
1434 			prio = rtm->rtm_priority;
1435 			flags = (prio == kr_state.fib_prio) ?
1436 			    F_OSPFD_INSERTED : F_KERNEL;
1437 
1438 			switch (sa->sa_family) {
1439 			case AF_INET:
1440 				prefix.s_addr =
1441 				    ((struct sockaddr_in *)sa)->sin_addr.s_addr;
1442 				sa_in = (struct sockaddr_in *)
1443 				    rti_info[RTAX_NETMASK];
1444 				if (sa_in != NULL) {
1445 					if (sa_in->sin_len != 0)
1446 						prefixlen = mask2prefixlen(
1447 						    sa_in->sin_addr.s_addr);
1448 				} else if (rtm->rtm_flags & RTF_HOST)
1449 					prefixlen = 32;
1450 				else
1451 					prefixlen =
1452 					    prefixlen_classful(prefix.s_addr);
1453 				if (rtm->rtm_flags & RTF_STATIC)
1454 					flags |= F_STATIC;
1455 				if (rtm->rtm_flags & RTF_BLACKHOLE)
1456 					flags |= F_BLACKHOLE;
1457 				if (rtm->rtm_flags & RTF_REJECT)
1458 					flags |= F_REJECT;
1459 				if (rtm->rtm_flags & RTF_DYNAMIC)
1460 					flags |= F_DYNAMIC;
1461 				break;
1462 			default:
1463 				continue;
1464 			}
1465 
1466 			ifindex = rtm->rtm_index;
1467 			if ((sa = rti_info[RTAX_GATEWAY]) != NULL) {
1468 				switch (sa->sa_family) {
1469 				case AF_INET:
1470 					if (rtm->rtm_flags & RTF_CONNECTED)
1471 						flags |= F_CONNECTED;
1472 
1473 					nexthop.s_addr = ((struct
1474 					    sockaddr_in *)sa)->sin_addr.s_addr;
1475 					break;
1476 				case AF_LINK:
1477 					/*
1478 					 * Traditional BSD connected routes have
1479 					 * a gateway of type AF_LINK.
1480 					 */
1481 					flags |= F_CONNECTED;
1482 					break;
1483 				}
1484 			}
1485 		}
1486 
1487 		switch (rtm->rtm_type) {
1488 		case RTM_ADD:
1489 		case RTM_GET:
1490 		case RTM_CHANGE:
1491 			if (nexthop.s_addr == 0 && !(flags & F_CONNECTED)) {
1492 				log_warnx("no nexthop for %s/%u",
1493 				    inet_ntoa(prefix), prefixlen);
1494 				continue;
1495 			}
1496 
1497 			if ((okr = kroute_find(prefix.s_addr, prefixlen, prio))
1498 			    != NULL) {
1499 				/* get the correct route */
1500 				kr = okr;
1501 				if ((mpath || prio == kr_state.fib_prio) &&
1502 				    (kr = kroute_matchgw(okr, nexthop)) ==
1503 				    NULL) {
1504 					log_warnx("%s: mpath route not found",
1505 					    __func__);
1506 					/* add routes we missed out earlier */
1507 					goto add;
1508 				}
1509 
1510 				if (kr->r.flags & F_REDISTRIBUTED)
1511 					flags |= F_REDISTRIBUTED;
1512 				kr->r.nexthop.s_addr = nexthop.s_addr;
1513 				kr->r.flags = flags;
1514 				kr->r.ifindex = ifindex;
1515 
1516 				rtlabel_unref(kr->r.rtlabel);
1517 				kr->r.rtlabel = 0;
1518 				kr->r.ext_tag = 0;
1519 				if ((label = (struct sockaddr_rtlabel *)
1520 				    rti_info[RTAX_LABEL]) != NULL) {
1521 					kr->r.rtlabel =
1522 					    rtlabel_name2id(label->sr_label);
1523 					kr->r.ext_tag =
1524 					    rtlabel_id2tag(kr->r.rtlabel);
1525 				}
1526 
1527 				if (kif_validate(kr->r.ifindex))
1528 					kr->r.flags &= ~F_DOWN;
1529 				else
1530 					kr->r.flags |= F_DOWN;
1531 
1532 				/* just readd, the RDE will care */
1533 				kr->serial = kr_state.fib_serial;
1534 				kr_redistribute(okr);
1535 			} else {
1536 add:
1537 				if ((kr = calloc(1,
1538 				    sizeof(struct kroute_node))) == NULL) {
1539 					log_warn("%s: calloc", __func__);
1540 					return (-1);
1541 				}
1542 
1543 				kr->r.prefix.s_addr = prefix.s_addr;
1544 				kr->r.prefixlen = prefixlen;
1545 				kr->r.nexthop.s_addr = nexthop.s_addr;
1546 				kr->r.flags = flags;
1547 				kr->r.ifindex = ifindex;
1548 				kr->r.priority = prio;
1549 
1550 				if (rtm->rtm_priority == kr_state.fib_prio) {
1551 					log_warnx("alien OSPF route %s/%d",
1552 					    inet_ntoa(prefix), prefixlen);
1553 					rv = send_rtmsg(kr_state.fd,
1554 					    RTM_DELETE, &kr->r);
1555 					free(kr);
1556 					if (rv == -1)
1557 						return (-1);
1558 				} else {
1559 					if ((label = (struct sockaddr_rtlabel *)
1560 					    rti_info[RTAX_LABEL]) != NULL) {
1561 						kr->r.rtlabel =
1562 						    rtlabel_name2id(
1563 						    label->sr_label);
1564 						kr->r.ext_tag =
1565 						    rtlabel_id2tag(
1566 						    kr->r.rtlabel);
1567 					}
1568 
1569 					kroute_insert(kr);
1570 				}
1571 			}
1572 			break;
1573 		case RTM_DELETE:
1574 			if ((kr = kroute_find(prefix.s_addr, prefixlen, prio))
1575 			    == NULL)
1576 				continue;
1577 			if (!(kr->r.flags & F_KERNEL))
1578 				continue;
1579 			/* get the correct route */
1580 			okr = kr;
1581 			if (mpath &&
1582 			    (kr = kroute_matchgw(kr, nexthop)) == NULL) {
1583 				log_warnx("%s: mpath route not found",
1584 				    __func__);
1585 				return (-1);
1586 			}
1587 			if (kroute_remove(kr) == -1)
1588 				return (-1);
1589 			break;
1590 		case RTM_IFINFO:
1591 			memcpy(&ifm, next, sizeof(ifm));
1592 			if_change(ifm.ifm_index, ifm.ifm_flags, &ifm.ifm_data,
1593 			    (struct sockaddr_dl *)rti_info[RTAX_IFP]);
1594 			break;
1595 		case RTM_NEWADDR:
1596 			ifam = (struct ifa_msghdr *)rtm;
1597 			if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA |
1598 			    RTA_BRD)) == 0)
1599 				break;
1600 
1601 			if_newaddr(ifam->ifam_index,
1602 			    (struct sockaddr_in *)rti_info[RTAX_IFA],
1603 			    (struct sockaddr_in *)rti_info[RTAX_NETMASK],
1604 			    (struct sockaddr_in *)rti_info[RTAX_BRD]);
1605 			break;
1606 		case RTM_DELADDR:
1607 			ifam = (struct ifa_msghdr *)rtm;
1608 			if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA |
1609 			    RTA_BRD)) == 0)
1610 				break;
1611 
1612 			if_deladdr(ifam->ifam_index,
1613 			    (struct sockaddr_in *)rti_info[RTAX_IFA],
1614 			    (struct sockaddr_in *)rti_info[RTAX_NETMASK],
1615 			    (struct sockaddr_in *)rti_info[RTAX_BRD]);
1616 			break;
1617 		case RTM_IFANNOUNCE:
1618 			if_announce(next);
1619 			break;
1620 		case RTM_DESYNC:
1621 			/*
1622 			 * We lost some routing packets. Schedule a reload
1623 			 * of the kernel route/interface information.
1624 			 */
1625 			if (kr_state.reload_state == KR_RELOAD_IDLE) {
1626 				delay = KR_RELOAD_TIMER;
1627 				log_info("desync; scheduling fib reload");
1628 			} else {
1629 				delay = KR_RELOAD_HOLD_TIMER;
1630 				log_debug("desync during KR_RELOAD_%s",
1631 				    kr_state.reload_state ==
1632 				    KR_RELOAD_FETCH ? "FETCH" : "HOLD");
1633 			}
1634 			kr_state.reload_state = KR_RELOAD_FETCH;
1635 			kr_fib_reload_arm_timer(delay);
1636 			break;
1637 		default:
1638 			/* ignore for now */
1639 			break;
1640 		}
1641 	}
1642 
1643 	return (offset);
1644 }
1645