xref: /openbsd/usr.sbin/ospf6d/kroute.c (revision 088a2cd9)
1 /*	$OpenBSD: kroute.c,v 1.69 2024/05/18 11:17:30 jsg Exp $ */
2 
3 /*
4  * Copyright (c) 2004 Esben Norby <norby@openbsd.org>
5  * Copyright (c) 2003, 2004 Henning Brauer <henning@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/types.h>
21 #include <sys/socket.h>
22 #include <sys/sysctl.h>
23 #include <sys/tree.h>
24 #include <sys/uio.h>
25 #include <netinet/in.h>
26 #include <arpa/inet.h>
27 #include <net/if.h>
28 #include <net/if_dl.h>
29 #include <net/if_types.h>
30 #include <net/route.h>
31 #include <err.h>
32 #include <errno.h>
33 #include <fcntl.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <limits.h>
39 
40 #include "ospf6d.h"
41 #include "ospfe.h"
42 #include "log.h"
43 
44 struct {
45 	u_int32_t		rtseq;
46 	pid_t			pid;
47 	int			fib_sync;
48 	int			fib_serial;
49 	u_int8_t		fib_prio;
50 	int			fd;
51 	struct event		ev;
52 	struct event		reload;
53 	u_int			rdomain;
54 #define KR_RELOAD_IDLE 0
55 #define KR_RELOAD_FETCH        1
56 #define KR_RELOAD_HOLD 2
57 	int                     reload_state;
58 } kr_state;
59 
60 struct kroute_node {
61 	RB_ENTRY(kroute_node)	 entry;
62 	struct kroute_node	*next;
63 	struct kroute		 r;
64 	int			 serial;
65 };
66 
67 void	kr_redist_remove(struct kroute_node *, struct kroute_node *);
68 int	kr_redist_eval(struct kroute *, struct kroute *);
69 void	kr_redistribute(struct kroute_node *);
70 int	kroute_compare(struct kroute_node *, struct kroute_node *);
71 int	kr_change_fib(struct kroute_node *, struct kroute *, int, int);
72 int	kr_delete_fib(struct kroute_node *);
73 
74 struct kroute_node	*kroute_find(const struct in6_addr *, u_int8_t,
75 			    u_int8_t);
76 struct kroute_node	*kroute_matchgw(struct kroute_node *,
77 			    struct in6_addr *, unsigned int);
78 int			 kroute_insert(struct kroute_node *);
79 int			 kroute_remove(struct kroute_node *);
80 void			 kroute_clear(void);
81 
82 struct iface		*kif_update(u_short, int, struct if_data *,
83 			   struct sockaddr_dl *);
84 int			 kif_validate(u_short);
85 
86 struct kroute_node	*kroute_match(struct in6_addr *);
87 
88 int		protect_lo(void);
89 void		get_rtaddrs(int, struct sockaddr *, struct sockaddr **);
90 void		if_change(u_short, int, struct if_data *, struct sockaddr_dl *);
91 void		if_newaddr(u_short, struct sockaddr_in6 *,
92 		    struct sockaddr_in6 *, struct sockaddr_in6 *);
93 void		if_deladdr(u_short, struct sockaddr_in6 *,
94 		    struct sockaddr_in6 *, struct sockaddr_in6 *);
95 void		if_announce(void *);
96 
97 int		send_rtmsg(int, int, struct kroute *);
98 int		dispatch_rtmsg(void);
99 int		fetchtable(void);
100 int		rtmsg_process(char *, size_t);
101 void		kr_fib_reload_timer(int, short, void *);
102 void		kr_fib_reload_arm_timer(int);
103 
104 RB_HEAD(kroute_tree, kroute_node)	krt;
RB_PROTOTYPE(kroute_tree,kroute_node,entry,kroute_compare)105 RB_PROTOTYPE(kroute_tree, kroute_node, entry, kroute_compare)
106 RB_GENERATE(kroute_tree, kroute_node, entry, kroute_compare)
107 
108 int
109 kr_init(int fs, u_int rdomain, int redis_label_or_prefix, u_int8_t fib_prio)
110 {
111 	int		opt = 0, rcvbuf, default_rcvbuf;
112 	socklen_t	optlen;
113 	int		filter_prio = fib_prio;
114 	int		filter_flags = RTF_LLINFO | RTF_BROADCAST;
115 
116 	kr_state.fib_sync = fs;
117 	kr_state.rdomain = rdomain;
118 	kr_state.fib_prio = fib_prio;
119 
120 	if ((kr_state.fd = socket(AF_ROUTE,
121 	    SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, AF_INET6)) == -1) {
122 		log_warn("kr_init: socket");
123 		return (-1);
124 	}
125 
126 	/* not interested in my own messages */
127 	if (setsockopt(kr_state.fd, SOL_SOCKET, SO_USELOOPBACK,
128 	    &opt, sizeof(opt)) == -1)
129 		log_warn("kr_init: setsockopt");	/* not fatal */
130 
131 	if (redis_label_or_prefix) {
132 		filter_prio = 0;
133 		log_info("%s: priority filter disabled", __func__);
134 	} else
135 		log_debug("%s: priority filter enabled", __func__);
136 
137 	if (setsockopt(kr_state.fd, AF_ROUTE, ROUTE_PRIOFILTER, &filter_prio,
138 	    sizeof(filter_prio)) == -1) {
139 		log_warn("%s: setsockopt AF_ROUTE ROUTE_PRIOFILTER", __func__);
140 		/* not fatal */
141 	}
142 
143 	if (setsockopt(kr_state.fd, AF_ROUTE, ROUTE_FLAGFILTER, &filter_flags,
144 	    sizeof(filter_flags)) == -1) {
145 		log_warn("%s: setsockopt AF_ROUTE ROUTE_FLAGFILTER", __func__);
146 		/* not fatal */
147 	}
148 
149 	/* grow receive buffer, don't wanna miss messages */
150 	optlen = sizeof(default_rcvbuf);
151 	if (getsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF,
152 	    &default_rcvbuf, &optlen) == -1)
153 		log_warn("kr_init getsockopt SOL_SOCKET SO_RCVBUF");
154 	else
155 		for (rcvbuf = MAX_RTSOCK_BUF;
156 		    rcvbuf > default_rcvbuf &&
157 		    setsockopt(kr_state.fd, SOL_SOCKET, SO_RCVBUF,
158 		    &rcvbuf, sizeof(rcvbuf)) == -1 && errno == ENOBUFS;
159 		    rcvbuf /= 2)
160 			;	/* nothing */
161 
162 	kr_state.pid = getpid();
163 	kr_state.rtseq = 1;
164 
165 	RB_INIT(&krt);
166 
167 	if (fetchtable() == -1)
168 		return (-1);
169 
170 	if (protect_lo() == -1)
171 		return (-1);
172 
173 	event_set(&kr_state.ev, kr_state.fd, EV_READ | EV_PERSIST,
174 	    kr_dispatch_msg, NULL);
175 	event_add(&kr_state.ev, NULL);
176 
177 	kr_state.reload_state = KR_RELOAD_IDLE;
178 	evtimer_set(&kr_state.reload, kr_fib_reload_timer, NULL);
179 
180 	return (0);
181 }
182 
183 int
kr_change_fib(struct kroute_node * kr,struct kroute * kroute,int krcount,int action)184 kr_change_fib(struct kroute_node *kr, struct kroute *kroute, int krcount,
185     int action)
186 {
187 	int			 i;
188 	struct kroute_node	*kn, *nkn;
189 
190 	if (action == RTM_ADD) {
191 		/*
192 		 * First remove all stale multipath routes.
193 		 * This step must be skipped when the action is RTM_CHANGE
194 		 * because it is already a single path route that will be
195 		 * changed.
196 		 */
197 		for (kn = kr; kn != NULL; kn = nkn) {
198 			for (i = 0; i < krcount; i++) {
199 				if (kn->r.scope == kroute[i].scope &&
200 				    IN6_ARE_ADDR_EQUAL(&kn->r.nexthop,
201 				    &kroute[i].nexthop))
202 					break;
203 			}
204 			nkn = kn->next;
205 			if (i == krcount) {
206 				/* stale route */
207 				if (kr_delete_fib(kn) == -1)
208 					log_warnx("kr_delete_fib failed");
209 				/*
210 				 * if head element was removed we need to adjust
211 				 * the head
212 				 */
213 				if (kr == kn)
214 					kr = nkn;
215 			}
216 		}
217 	}
218 
219 	/*
220 	 * now add or change the route
221 	 */
222 	for (i = 0; i < krcount; i++) {
223 		/* nexthop ::1 -> ignore silently */
224 		if (IN6_IS_ADDR_LOOPBACK(&kroute[i].nexthop))
225 			continue;
226 
227 		if (action == RTM_ADD && kr) {
228 			for (kn = kr; kn != NULL; kn = kn->next) {
229 				if (kn->r.scope == kroute[i].scope &&
230 				    IN6_ARE_ADDR_EQUAL(&kn->r.nexthop,
231 				    &kroute[i].nexthop))
232 					break;
233 			}
234 
235 			if (kn != NULL)
236 				/* nexthop already present, skip it */
237 				continue;
238 		} else
239 			/* modify first entry */
240 			kn = kr;
241 
242 		/* send update */
243 		if (send_rtmsg(kr_state.fd, action, &kroute[i]) == -1)
244 			return (-1);
245 
246 		/* create new entry unless we are changing the first entry */
247 		if (action == RTM_ADD)
248 			if ((kn = calloc(1, sizeof(*kn))) == NULL)
249 				fatal(NULL);
250 
251 		kn->r.prefix = kroute[i].prefix;
252 		kn->r.prefixlen = kroute[i].prefixlen;
253 		kn->r.nexthop = kroute[i].nexthop;
254 		kn->r.scope = kroute[i].scope;
255 		kn->r.flags = kroute[i].flags | F_OSPFD_INSERTED;
256 		kn->r.priority = kr_state.fib_prio;
257 		kn->r.ext_tag = kroute[i].ext_tag;
258 		rtlabel_unref(kn->r.rtlabel);	/* for RTM_CHANGE */
259 		kn->r.rtlabel = kroute[i].rtlabel;
260 
261 		if (action == RTM_ADD)
262 			if (kroute_insert(kn) == -1) {
263 				log_debug("kr_update_fib: cannot insert %s",
264 				    log_in6addr(&kn->r.nexthop));
265 				free(kn);
266 			}
267 		action = RTM_ADD;
268 	}
269 	return  (0);
270 }
271 
272 int
kr_change(struct kroute * kroute,int krcount)273 kr_change(struct kroute *kroute, int krcount)
274 {
275 	struct kroute_node	*kr;
276 	int			 action = RTM_ADD;
277 
278 	kroute->rtlabel = rtlabel_tag2id(kroute->ext_tag);
279 
280 	kr = kroute_find(&kroute->prefix, kroute->prefixlen, kr_state.fib_prio);
281 	if (kr != NULL && kr->next == NULL && krcount == 1) {
282 		/*
283 		 * single path OSPF route.
284 		 * The kernel does not allow to change a gateway route to a
285 		 * cloning route or contrary. In this case remove and add the
286 		 * route, otherwise change the existing one.
287 		 */
288 		if ((IN6_IS_ADDR_UNSPECIFIED(&kroute->nexthop) &&
289 		    !IN6_IS_ADDR_UNSPECIFIED(&kr->r.nexthop)) ||
290 		    (!IN6_IS_ADDR_UNSPECIFIED(&kroute->nexthop) &&
291 		    IN6_IS_ADDR_UNSPECIFIED(&kr->r.nexthop))) {
292 			if (kr_delete_fib(kr) == 0)
293 				kr = NULL;
294 			else {
295 				log_warn("kr_change: failed to remove route: "
296 				    "%s/%d", log_in6addr(&kr->r.prefix),
297 				    kr->r.prefixlen);
298 				return (-1);
299 			}
300 		} else
301 			action = RTM_CHANGE;
302 	}
303 
304 	return (kr_change_fib(kr, kroute, krcount, action));
305 }
306 
307 int
kr_delete_fib(struct kroute_node * kr)308 kr_delete_fib(struct kroute_node *kr)
309 {
310 	if (kr->r.priority != kr_state.fib_prio)
311 		log_warn("kr_delete_fib: %s/%d has wrong priority %d",
312 		    log_in6addr(&kr->r.prefix), kr->r.prefixlen,
313 		    kr->r.priority);
314 
315 	if (send_rtmsg(kr_state.fd, RTM_DELETE, &kr->r) == -1)
316 		return (-1);
317 
318 	if (kroute_remove(kr) == -1)
319 		return (-1);
320 
321 	return (0);
322 }
323 
324 int
kr_delete(struct kroute * kroute)325 kr_delete(struct kroute *kroute)
326 {
327 	struct kroute_node	*kr, *nkr;
328 
329 	if ((kr = kroute_find(&kroute->prefix, kroute->prefixlen,
330 	    kr_state.fib_prio)) == NULL)
331 		return (0);
332 
333 	while (kr != NULL) {
334 		nkr = kr->next;
335 		if (kr_delete_fib(kr) == -1)
336 			return (-1);
337 		kr = nkr;
338 	}
339 
340 	return (0);
341 }
342 
343 void
kr_shutdown(void)344 kr_shutdown(void)
345 {
346 	kr_fib_decouple();
347 	kroute_clear();
348 }
349 
350 void
kr_fib_couple(void)351 kr_fib_couple(void)
352 {
353 	struct kroute_node	*kr;
354 	struct kroute_node	*kn;
355 
356 	if (kr_state.fib_sync == 1)	/* already coupled */
357 		return;
358 
359 	kr_state.fib_sync = 1;
360 
361 	RB_FOREACH(kr, kroute_tree, &krt)
362 		if (kr->r.priority == kr_state.fib_prio)
363 			for (kn = kr; kn != NULL; kn = kn->next)
364 				send_rtmsg(kr_state.fd, RTM_ADD, &kn->r);
365 
366 	log_info("kernel routing table coupled");
367 }
368 
369 void
kr_fib_decouple(void)370 kr_fib_decouple(void)
371 {
372 	struct kroute_node	*kr;
373 	struct kroute_node	*kn;
374 
375 	if (kr_state.fib_sync == 0)	/* already decoupled */
376 		return;
377 
378 	RB_FOREACH(kr, kroute_tree, &krt)
379 		if (kr->r.priority == kr_state.fib_prio)
380 			for (kn = kr; kn != NULL; kn = kn->next)
381 				send_rtmsg(kr_state.fd, RTM_DELETE, &kn->r);
382 
383 	kr_state.fib_sync = 0;
384 
385 	log_info("kernel routing table decoupled");
386 }
387 
388 void
kr_fib_reload_timer(int fd,short event,void * bula)389 kr_fib_reload_timer(int fd, short event, void *bula)
390 {
391 	if (kr_state.reload_state == KR_RELOAD_FETCH) {
392 		kr_fib_reload();
393 		kr_state.reload_state = KR_RELOAD_HOLD;
394 		kr_fib_reload_arm_timer(KR_RELOAD_HOLD_TIMER);
395 	} else {
396 		kr_state.reload_state = KR_RELOAD_IDLE;
397 	}
398 }
399 
400 void
kr_fib_reload_arm_timer(int delay)401 kr_fib_reload_arm_timer(int delay)
402 {
403 	struct timeval		tv;
404 
405 	timerclear(&tv);
406 	tv.tv_sec = delay / 1000;
407 	tv.tv_usec = (delay % 1000) * 1000;
408 
409 	if (evtimer_add(&kr_state.reload, &tv) == -1)
410 		fatal("add_reload_timer");
411 }
412 
413 void
kr_fib_reload(void)414 kr_fib_reload(void)
415 {
416 	struct kroute_node	*krn, *kr, *kn;
417 
418 	log_info("reloading interface list and routing table");
419 
420 	kr_state.fib_serial++;
421 
422 	if (fetchifs(0) != 0 || fetchtable() != 0)
423 		return;
424 
425 	for (kr = RB_MIN(kroute_tree, &krt); kr != NULL; kr = krn) {
426 		krn = RB_NEXT(kroute_tree, &krt, kr);
427 
428 		do {
429 			kn = kr->next;
430 
431 			if (kr->serial != kr_state.fib_serial) {
432 
433 				if (kr->r.priority == kr_state.fib_prio) {
434 					kr->serial = kr_state.fib_serial;
435 					if (send_rtmsg(kr_state.fd,
436 					    RTM_ADD, &kr->r) != 0)
437 						break;
438 				} else
439 					kroute_remove(kr);
440 			}
441 
442 		} while ((kr = kn) != NULL);
443 	}
444 }
445 
446 void
kr_fib_update_prio(u_int8_t fib_prio)447 kr_fib_update_prio(u_int8_t fib_prio)
448 {
449 	struct kroute_node	*kr;
450 
451 	RB_FOREACH(kr, kroute_tree, &krt)
452 		if ((kr->r.flags & F_OSPFD_INSERTED))
453 			kr->r.priority = fib_prio;
454 
455 	log_info("fib priority changed from %hhu to %hhu", kr_state.fib_prio,
456 	    fib_prio);
457 
458 	kr_state.fib_prio = fib_prio;
459 }
460 
461 void
kr_dispatch_msg(int fd,short event,void * bula)462 kr_dispatch_msg(int fd, short event, void *bula)
463 {
464 	/* XXX this is stupid */
465 	dispatch_rtmsg();
466 }
467 
468 void
kr_show_route(struct imsg * imsg)469 kr_show_route(struct imsg *imsg)
470 {
471 	struct kroute_node	*kr;
472 	struct kroute_node	*kn;
473 	int			 flags;
474 	struct in6_addr		 addr;
475 
476 	switch (imsg->hdr.type) {
477 	case IMSG_CTL_KROUTE:
478 		if (imsg->hdr.len != IMSG_HEADER_SIZE + sizeof(flags)) {
479 			log_warnx("kr_show_route: wrong imsg len");
480 			return;
481 		}
482 		memcpy(&flags, imsg->data, sizeof(flags));
483 		RB_FOREACH(kr, kroute_tree, &krt)
484 			if (!flags || kr->r.flags & flags) {
485 				kn = kr;
486 				do {
487 					main_imsg_compose_ospfe(IMSG_CTL_KROUTE,
488 					    imsg->hdr.pid,
489 					    &kn->r, sizeof(kn->r));
490 				} while ((kn = kn->next) != NULL);
491 			}
492 		break;
493 	case IMSG_CTL_KROUTE_ADDR:
494 		if (imsg->hdr.len != IMSG_HEADER_SIZE +
495 		    sizeof(struct in6_addr)) {
496 			log_warnx("kr_show_route: wrong imsg len");
497 			return;
498 		}
499 		memcpy(&addr, imsg->data, sizeof(addr));
500 		kr = kroute_match(&addr);
501 		if (kr != NULL)
502 			main_imsg_compose_ospfe(IMSG_CTL_KROUTE, imsg->hdr.pid,
503 			    &kr->r, sizeof(kr->r));
504 		break;
505 	default:
506 		log_debug("kr_show_route: error handling imsg");
507 		break;
508 	}
509 
510 	main_imsg_compose_ospfe(IMSG_CTL_END, imsg->hdr.pid, NULL, 0);
511 }
512 
513 void
kr_redist_remove(struct kroute_node * kh,struct kroute_node * kn)514 kr_redist_remove(struct kroute_node *kh, struct kroute_node *kn)
515 {
516 	struct kroute	 *kr;
517 
518 	/* was the route redistributed? */
519 	if ((kn->r.flags & F_REDISTRIBUTED) == 0)
520 		return;
521 
522 	/* remove redistributed flag */
523 	kn->r.flags &= ~F_REDISTRIBUTED;
524 	kr = &kn->r;
525 
526 	/* probably inform the RDE (check if no other path is redistributed) */
527 	for (kn = kh; kn; kn = kn->next)
528 		if (kn->r.flags & F_REDISTRIBUTED)
529 			break;
530 
531 	if (kn == NULL)
532 		main_imsg_compose_rde(IMSG_NETWORK_DEL, 0, kr,
533 		    sizeof(struct kroute));
534 }
535 
536 int
kr_redist_eval(struct kroute * kr,struct kroute * new_kr)537 kr_redist_eval(struct kroute *kr, struct kroute *new_kr)
538 {
539 	u_int32_t	 metric = 0;
540 
541 	/* Only non-ospfd routes are considered for redistribution. */
542 	if (!(kr->flags & F_KERNEL))
543 		goto dont_redistribute;
544 
545 	/* Dynamic routes are not redistributable. */
546 	if (kr->flags & F_DYNAMIC)
547 		goto dont_redistribute;
548 
549 	/* interface is not up and running so don't announce */
550 	if (kr->flags & F_DOWN)
551 		goto dont_redistribute;
552 
553 	/*
554 	 * We consider loopback, multicast, link- and site-local,
555 	 * IPv4 mapped and IPv4 compatible addresses as not redistributable.
556 	 */
557 	if (IN6_IS_ADDR_LOOPBACK(&kr->prefix) ||
558 	    IN6_IS_ADDR_MULTICAST(&kr->prefix) ||
559 	    IN6_IS_ADDR_LINKLOCAL(&kr->prefix) ||
560 	    IN6_IS_ADDR_SITELOCAL(&kr->prefix) ||
561 	    IN6_IS_ADDR_V4MAPPED(&kr->prefix) ||
562 	    IN6_IS_ADDR_V4COMPAT(&kr->prefix))
563 		goto dont_redistribute;
564 	/*
565 	 * Consider networks with nexthop loopback as not redistributable
566 	 * unless it is a reject or blackhole route.
567 	 */
568 	if (IN6_IS_ADDR_LOOPBACK(&kr->nexthop) &&
569 	    !(kr->flags & (F_BLACKHOLE|F_REJECT)))
570 		goto dont_redistribute;
571 
572 	/* Should we redistribute this route? */
573 	if (!ospf_redistribute(kr, &metric))
574 		goto dont_redistribute;
575 
576 	/* prefix should be redistributed */
577 	kr->flags |= F_REDISTRIBUTED;
578 	/*
579 	 * only one of all multipath routes can be redistributed so
580 	 * redistribute the best one.
581 	 */
582 	if (new_kr->metric > metric) {
583 		*new_kr = *kr;
584 		new_kr->metric = metric;
585 	}
586 
587 	return (1);
588 
589 dont_redistribute:
590 	/* was the route redistributed? */
591 	if ((kr->flags & F_REDISTRIBUTED) == 0)
592 		return (0);
593 
594 	kr->flags &= ~F_REDISTRIBUTED;
595 	return (1);
596 }
597 
598 void
kr_redistribute(struct kroute_node * kh)599 kr_redistribute(struct kroute_node *kh)
600 {
601 	struct kroute_node	*kn;
602 	struct kroute		 kr;
603 	int			 redistribute = 0;
604 
605 	/* only the highest prio route can be redistributed */
606 	if (kroute_find(&kh->r.prefix, kh->r.prefixlen, RTP_ANY) != kh)
607 		return;
608 
609 	bzero(&kr, sizeof(kr));
610 	kr.metric = UINT_MAX;
611 	for (kn = kh; kn; kn = kn->next)
612 		if (kr_redist_eval(&kn->r, &kr))
613 			redistribute = 1;
614 
615 	if (!redistribute)
616 		return;
617 
618 	if (kr.flags & F_REDISTRIBUTED) {
619 		main_imsg_compose_rde(IMSG_NETWORK_ADD, 0, &kr,
620 		    sizeof(struct kroute));
621 	} else {
622 		kr = kh->r;
623 		main_imsg_compose_rde(IMSG_NETWORK_DEL, 0, &kr,
624 		    sizeof(struct kroute));
625 	}
626 }
627 
628 void
kr_reload(int redis_label_or_prefix)629 kr_reload(int redis_label_or_prefix)
630 {
631 	struct kroute_node	*kr, *kn;
632 	u_int32_t		 dummy;
633 	int			 r;
634 	int			 filter_prio = kr_state.fib_prio;
635 
636 	/* update the priority filter */
637 	if (redis_label_or_prefix) {
638 		filter_prio = 0;
639 		log_info("%s: priority filter disabled", __func__);
640 	} else
641 		log_debug("%s: priority filter enabled", __func__);
642 
643 	if (setsockopt(kr_state.fd, AF_ROUTE, ROUTE_PRIOFILTER, &filter_prio,
644 	    sizeof(filter_prio)) == -1) {
645 		log_warn("%s: setsockopt AF_ROUTE ROUTE_PRIOFILTER", __func__);
646 		/* not fatal */
647 	}
648 
649 	RB_FOREACH(kr, kroute_tree, &krt) {
650 		for (kn = kr; kn; kn = kn->next) {
651 			r = ospf_redistribute(&kn->r, &dummy);
652 			/*
653 			 * if it is redistributed, redistribute again metric
654 			 * may have changed.
655 			 */
656 			if ((kn->r.flags & F_REDISTRIBUTED && !r) || r)
657 				break;
658 		}
659 		if (kn) {
660 			/*
661 			 * kr_redistribute copes with removes and RDE with
662 			 * duplicates
663 			 */
664 			kr_redistribute(kr);
665 		}
666 	}
667 }
668 
669 /* rb-tree compare */
670 int
kroute_compare(struct kroute_node * a,struct kroute_node * b)671 kroute_compare(struct kroute_node *a, struct kroute_node *b)
672 {
673 	int	i;
674 
675 	/* XXX maybe switch a & b */
676 	i = memcmp(&a->r.prefix, &b->r.prefix, sizeof(a->r.prefix));
677 	if (i)
678 		return (i);
679 	if (a->r.prefixlen < b->r.prefixlen)
680 		return (-1);
681 	if (a->r.prefixlen > b->r.prefixlen)
682 		return (1);
683 
684 	/* if the priority is RTP_ANY finish on the first address hit */
685 	if (a->r.priority == RTP_ANY || b->r.priority == RTP_ANY)
686 		return (0);
687 	if (a->r.priority < b->r.priority)
688 		return (-1);
689 	if (a->r.priority > b->r.priority)
690 		return (1);
691 	return (0);
692 }
693 
694 /* tree management */
695 struct kroute_node *
kroute_find(const struct in6_addr * prefix,u_int8_t prefixlen,u_int8_t prio)696 kroute_find(const struct in6_addr *prefix, u_int8_t prefixlen, u_int8_t prio)
697 {
698 	struct kroute_node	s;
699 	struct kroute_node	*kn, *tmp;
700 
701 	s.r.prefix = *prefix;
702 	s.r.prefixlen = prefixlen;
703 	s.r.priority = prio;
704 
705 	kn = RB_FIND(kroute_tree, &krt, &s);
706 	if (kn && prio == RTP_ANY) {
707 		tmp = RB_PREV(kroute_tree, &krt, kn);
708 		while (tmp) {
709 			if (kroute_compare(&s, tmp) == 0)
710 				kn = tmp;
711 			else
712 				break;
713 			tmp = RB_PREV(kroute_tree, &krt, kn);
714 		}
715 	}
716 	return (kn);
717 }
718 
719 struct kroute_node *
kroute_matchgw(struct kroute_node * kr,struct in6_addr * nh,unsigned int scope)720 kroute_matchgw(struct kroute_node *kr, struct in6_addr *nh, unsigned int scope)
721 {
722 	while (kr) {
723 		if (scope == kr->r.scope &&
724 		    IN6_ARE_ADDR_EQUAL(&kr->r.nexthop, nh))
725 			return (kr);
726 		kr = kr->next;
727 	}
728 
729 	return (NULL);
730 }
731 
732 int
kroute_insert(struct kroute_node * kr)733 kroute_insert(struct kroute_node *kr)
734 {
735 	struct kroute_node	*krm, *krh;
736 
737 	kr->serial = kr_state.fib_serial;
738 
739 	if ((krh = RB_INSERT(kroute_tree, &krt, kr)) != NULL) {
740 		/*
741 		 * Multipath route, add at end of list.
742 		 */
743 		krm = krh;
744 		while (krm->next != NULL)
745 			krm = krm->next;
746 		krm->next = kr;
747 		kr->next = NULL; /* to be sure */
748 	} else
749 		krh = kr;
750 
751 	if (!(kr->r.flags & F_KERNEL)) {
752 		/* don't validate or redistribute ospf route */
753 		kr->r.flags &= ~F_DOWN;
754 		return (0);
755 	}
756 
757 	if (kif_validate(kr->r.ifindex))
758 		kr->r.flags &= ~F_DOWN;
759 	else
760 		kr->r.flags |= F_DOWN;
761 
762 	kr_redistribute(krh);
763 	return (0);
764 }
765 
766 int
kroute_remove(struct kroute_node * kr)767 kroute_remove(struct kroute_node *kr)
768 {
769 	struct kroute_node	*krm;
770 
771 	if ((krm = RB_FIND(kroute_tree, &krt, kr)) == NULL) {
772 		log_warnx("kroute_remove failed to find %s/%u",
773 		    log_in6addr(&kr->r.prefix), kr->r.prefixlen);
774 		return (-1);
775 	}
776 
777 	if (krm == kr) {
778 		/* head element */
779 		if (RB_REMOVE(kroute_tree, &krt, kr) == NULL) {
780 			log_warnx("kroute_remove failed for %s/%u",
781 			    log_in6addr(&kr->r.prefix), kr->r.prefixlen);
782 			return (-1);
783 		}
784 		if (kr->next != NULL) {
785 			if (RB_INSERT(kroute_tree, &krt, kr->next) != NULL) {
786 				log_warnx("kroute_remove failed to add %s/%u",
787 				    log_in6addr(&kr->r.prefix),
788 				    kr->r.prefixlen);
789 				return (-1);
790 			}
791 		}
792 	} else {
793 		/* somewhere in the list */
794 		while (krm->next != kr && krm->next != NULL)
795 			krm = krm->next;
796 		if (krm->next == NULL) {
797 			log_warnx("kroute_remove multipath list corrupted "
798 			    "for %s/%u", log_in6addr(&kr->r.prefix),
799 			    kr->r.prefixlen);
800 			return (-1);
801 		}
802 		krm->next = kr->next;
803 	}
804 
805 	kr_redist_remove(krm, kr);
806 	rtlabel_unref(kr->r.rtlabel);
807 
808 	free(kr);
809 	return (0);
810 }
811 
812 void
kroute_clear(void)813 kroute_clear(void)
814 {
815 	struct kroute_node	*kr;
816 
817 	while ((kr = RB_MIN(kroute_tree, &krt)) != NULL)
818 		kroute_remove(kr);
819 }
820 
821 struct iface *
kif_update(u_short ifindex,int flags,struct if_data * ifd,struct sockaddr_dl * sdl)822 kif_update(u_short ifindex, int flags, struct if_data *ifd,
823     struct sockaddr_dl *sdl)
824 {
825 	struct iface	*iface;
826 	char		 ifname[IF_NAMESIZE];
827 
828 	if ((iface = if_find(ifindex)) == NULL) {
829 		bzero(ifname, sizeof(ifname));
830 		if (sdl && sdl->sdl_family == AF_LINK) {
831 			if (sdl->sdl_nlen >= sizeof(ifname))
832 				memcpy(ifname, sdl->sdl_data,
833 				    sizeof(ifname) - 1);
834 			else if (sdl->sdl_nlen > 0)
835 				memcpy(ifname, sdl->sdl_data, sdl->sdl_nlen);
836 			else
837 				return (NULL);
838 		} else
839 			return (NULL);
840 		if ((iface = if_new(ifindex, ifname)) == NULL)
841 			return (NULL);
842 	}
843 
844 	if_update(iface, ifd->ifi_mtu, flags, ifd->ifi_type,
845 	    ifd->ifi_link_state, ifd->ifi_baudrate, ifd->ifi_rdomain);
846 
847 	return (iface);
848 }
849 
850 int
kif_validate(u_short ifindex)851 kif_validate(u_short ifindex)
852 {
853 	struct iface	*iface;
854 
855 	if ((iface = if_find(ifindex)) == NULL) {
856 		log_warnx("interface with index %u not found", ifindex);
857 		return (-1);
858 	}
859 
860 	return ((iface->flags & IFF_UP) && LINK_STATE_IS_UP(iface->linkstate));
861 }
862 
863 struct kroute_node *
kroute_match(struct in6_addr * key)864 kroute_match(struct in6_addr *key)
865 {
866 	int			 i;
867 	struct kroute_node	*kr;
868 	struct in6_addr		 ina;
869 
870 	/* we will never match the default route */
871 	for (i = 128; i > 0; i--) {
872 		inet6applymask(&ina, key, i);
873 		if ((kr = kroute_find(&ina, i, RTP_ANY)) != NULL)
874 			return (kr);
875 	}
876 
877 	/* if we don't have a match yet, try to find a default route */
878 	if ((kr = kroute_find(&in6addr_any, 0, RTP_ANY)) != NULL)
879 			return (kr);
880 
881 	return (NULL);
882 }
883 
884 /* misc */
885 int
protect_lo(void)886 protect_lo(void)
887 {
888 	struct kroute_node	*kr;
889 
890 	/* special protection for loopback */
891 	if ((kr = calloc(1, sizeof(struct kroute_node))) == NULL) {
892 		log_warn("protect_lo");
893 		return (-1);
894 	}
895 	memcpy(&kr->r.prefix, &in6addr_loopback, sizeof(kr->r.prefix));
896 	kr->r.prefixlen = 128;
897 	kr->r.flags = F_KERNEL|F_CONNECTED;
898 
899 	if (RB_INSERT(kroute_tree, &krt, kr) != NULL)
900 		free(kr);	/* kernel route already there, no problem */
901 
902 	return (0);
903 }
904 
905 #define ROUNDUP(a) \
906 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
907 
908 void
get_rtaddrs(int addrs,struct sockaddr * sa,struct sockaddr ** rti_info)909 get_rtaddrs(int addrs, struct sockaddr *sa, struct sockaddr **rti_info)
910 {
911 	int	i;
912 
913 	for (i = 0; i < RTAX_MAX; i++) {
914 		if (addrs & (1 << i)) {
915 			rti_info[i] = sa;
916 			sa = (struct sockaddr *)((char *)(sa) +
917 			    ROUNDUP(sa->sa_len));
918 		} else
919 			rti_info[i] = NULL;
920 	}
921 }
922 
923 void
if_change(u_short ifindex,int flags,struct if_data * ifd,struct sockaddr_dl * sdl)924 if_change(u_short ifindex, int flags, struct if_data *ifd,
925     struct sockaddr_dl *sdl)
926 {
927 	struct kroute_node	*kr, *tkr;
928 	struct iface		*iface;
929 	u_int8_t		 wasvalid, isvalid;
930 
931 	wasvalid = kif_validate(ifindex);
932 
933 	if ((iface = kif_update(ifindex, flags, ifd, sdl)) == NULL) {
934 		log_warn("if_change: kif_update(%u)", ifindex);
935 		return;
936 	}
937 
938 	/* inform engine and rde about state change */
939 	main_imsg_compose_rde(IMSG_IFINFO, 0, iface, sizeof(struct iface));
940 	main_imsg_compose_ospfe(IMSG_IFINFO, 0, iface, sizeof(struct iface));
941 
942 	isvalid = (iface->flags & IFF_UP) &&
943 	    LINK_STATE_IS_UP(iface->linkstate);
944 
945 	if (wasvalid == isvalid)
946 		return;		/* nothing changed wrt validity */
947 
948 	/* update redistribute list */
949 	RB_FOREACH(kr, kroute_tree, &krt) {
950 		for (tkr = kr; tkr != NULL; tkr = tkr->next) {
951 			if (tkr->r.ifindex == ifindex) {
952 				if (isvalid)
953 					tkr->r.flags &= ~F_DOWN;
954 				else
955 					tkr->r.flags |= F_DOWN;
956 
957 			}
958 		}
959 		kr_redistribute(kr);
960 	}
961 }
962 
963 void
if_newaddr(u_short ifindex,struct sockaddr_in6 * ifa,struct sockaddr_in6 * mask,struct sockaddr_in6 * brd)964 if_newaddr(u_short ifindex, struct sockaddr_in6 *ifa, struct sockaddr_in6 *mask,
965     struct sockaddr_in6 *brd)
966 {
967 	struct iface		*iface;
968 	struct iface_addr	*ia;
969 	struct ifaddrchange	 ifc;
970 
971 	if (ifa == NULL || ifa->sin6_family != AF_INET6)
972 		return;
973 	if ((iface = if_find(ifindex)) == NULL) {
974 		log_warnx("if_newaddr: corresponding if %d not found", ifindex);
975 		return;
976 	}
977 
978 	/* We only care about link-local and global-scope. */
979 	if (IN6_IS_ADDR_UNSPECIFIED(&ifa->sin6_addr) ||
980 	    IN6_IS_ADDR_LOOPBACK(&ifa->sin6_addr) ||
981 	    IN6_IS_ADDR_MULTICAST(&ifa->sin6_addr) ||
982 	    IN6_IS_ADDR_SITELOCAL(&ifa->sin6_addr) ||
983 	    IN6_IS_ADDR_V4MAPPED(&ifa->sin6_addr) ||
984 	    IN6_IS_ADDR_V4COMPAT(&ifa->sin6_addr))
985 		return;
986 
987 	clearscope(&ifa->sin6_addr);
988 
989 	if (IN6_IS_ADDR_LINKLOCAL(&ifa->sin6_addr) ||
990 	    iface->flags & IFF_LOOPBACK)
991 		iface->addr = ifa->sin6_addr;
992 
993 	if ((ia = calloc(1, sizeof(struct iface_addr))) == NULL)
994 		fatal("if_newaddr");
995 
996 	ia->addr = ifa->sin6_addr;
997 
998 	if (mask)
999 		ia->prefixlen = mask2prefixlen(mask);
1000 	else
1001 		ia->prefixlen = 0;
1002 	if (brd && brd->sin6_family == AF_INET6)
1003 		ia->dstbrd = brd->sin6_addr;
1004 	else
1005 		bzero(&ia->dstbrd, sizeof(ia->dstbrd));
1006 
1007 	switch (iface->type) {
1008 	case IF_TYPE_BROADCAST:
1009 	case IF_TYPE_NBMA:
1010 		log_debug("if_newaddr: ifindex %u, addr %s/%d",
1011 		    ifindex, log_in6addr(&ia->addr), ia->prefixlen);
1012 		break;
1013 	case IF_TYPE_VIRTUALLINK:	/* FIXME */
1014 		break;
1015 	case IF_TYPE_POINTOPOINT:
1016 	case IF_TYPE_POINTOMULTIPOINT:
1017 		log_debug("if_newaddr: ifindex %u, addr %s/%d, "
1018 		    "dest %s", ifindex, log_in6addr(&ia->addr),
1019 		    ia->prefixlen, log_in6addr(&ia->dstbrd));
1020 		break;
1021 	default:
1022 		fatalx("if_newaddr: unknown interface type");
1023 	}
1024 
1025 	TAILQ_INSERT_TAIL(&iface->ifa_list, ia, entry);
1026 	/* inform engine and rde if interface is used */
1027 	if (iface->cflags & F_IFACE_CONFIGURED) {
1028 		ifc.addr = ia->addr;
1029 		ifc.dstbrd = ia->dstbrd;
1030 		ifc.prefixlen = ia->prefixlen;
1031 		ifc.ifindex = ifindex;
1032 		main_imsg_compose_ospfe(IMSG_IFADDRNEW, 0, &ifc, sizeof(ifc));
1033 		main_imsg_compose_rde(IMSG_IFADDRNEW, 0, &ifc, sizeof(ifc));
1034 	}
1035 }
1036 
1037 void
if_deladdr(u_short ifindex,struct sockaddr_in6 * ifa,struct sockaddr_in6 * mask,struct sockaddr_in6 * brd)1038 if_deladdr(u_short ifindex, struct sockaddr_in6 *ifa, struct sockaddr_in6 *mask,
1039     struct sockaddr_in6 *brd)
1040 {
1041 	struct iface		*iface;
1042 	struct iface_addr	*ia, *nia;
1043 	struct ifaddrchange	 ifc;
1044 
1045 	if (ifa == NULL || ifa->sin6_family != AF_INET6)
1046 		return;
1047 	if ((iface = if_find(ifindex)) == NULL) {
1048 		log_warnx("if_deladdr: corresponding if %d not found", ifindex);
1049 		return;
1050 	}
1051 
1052 	/* We only care about link-local and global-scope. */
1053 	if (IN6_IS_ADDR_UNSPECIFIED(&ifa->sin6_addr) ||
1054 	    IN6_IS_ADDR_LOOPBACK(&ifa->sin6_addr) ||
1055 	    IN6_IS_ADDR_MULTICAST(&ifa->sin6_addr) ||
1056 	    IN6_IS_ADDR_SITELOCAL(&ifa->sin6_addr) ||
1057 	    IN6_IS_ADDR_V4MAPPED(&ifa->sin6_addr) ||
1058 	    IN6_IS_ADDR_V4COMPAT(&ifa->sin6_addr))
1059 		return;
1060 
1061 	clearscope(&ifa->sin6_addr);
1062 
1063 	for (ia = TAILQ_FIRST(&iface->ifa_list); ia != NULL; ia = nia) {
1064 		nia = TAILQ_NEXT(ia, entry);
1065 
1066 		if (IN6_ARE_ADDR_EQUAL(&ia->addr, &ifa->sin6_addr)) {
1067 			log_debug("if_deladdr: ifindex %u, addr %s/%d",
1068 			    ifindex, log_in6addr(&ia->addr), ia->prefixlen);
1069 			TAILQ_REMOVE(&iface->ifa_list, ia, entry);
1070 			/* inform engine and rde if interface is used */
1071 			if (iface->cflags & F_IFACE_CONFIGURED) {
1072 				ifc.addr = ia->addr;
1073 				ifc.dstbrd = ia->dstbrd;
1074 				ifc.prefixlen = ia->prefixlen;
1075 				ifc.ifindex = ifindex;
1076 				main_imsg_compose_ospfe(IMSG_IFADDRDEL, 0, &ifc,
1077 				    sizeof(ifc));
1078 				main_imsg_compose_rde(IMSG_IFADDRDEL, 0, &ifc,
1079 				    sizeof(ifc));
1080 			}
1081 			free(ia);
1082 			return;
1083 		}
1084 	}
1085 }
1086 
1087 void
if_announce(void * msg)1088 if_announce(void *msg)
1089 {
1090 	struct if_announcemsghdr	*ifan;
1091 	struct iface			*iface;
1092 
1093 	ifan = msg;
1094 
1095 	switch (ifan->ifan_what) {
1096 	case IFAN_ARRIVAL:
1097 		if ((iface = if_new(ifan->ifan_index, ifan->ifan_name)) == NULL)
1098 			fatal("if_announce failed");
1099 		break;
1100 	case IFAN_DEPARTURE:
1101 		iface = if_find(ifan->ifan_index);
1102 		if_del(iface);
1103 		break;
1104 	}
1105 }
1106 
1107 /* rtsock */
1108 int
send_rtmsg(int fd,int action,struct kroute * kroute)1109 send_rtmsg(int fd, int action, struct kroute *kroute)
1110 {
1111 	struct iovec		iov[5];
1112 	struct rt_msghdr	hdr;
1113 	struct pad {
1114 		struct sockaddr_in6	addr;
1115 		char			pad[sizeof(long)]; /* thank you IPv6 */
1116 	} prefix, nexthop, mask;
1117 	struct {
1118 		struct sockaddr_dl	addr;
1119 		char			pad[sizeof(long)];
1120 	} ifp;
1121 	struct sockaddr_rtlabel	sa_rl;
1122 	int			iovcnt = 0;
1123 	const char		*label;
1124 
1125 	if (kr_state.fib_sync == 0)
1126 		return (0);
1127 
1128 	/* initialize header */
1129 	bzero(&hdr, sizeof(hdr));
1130 	hdr.rtm_version = RTM_VERSION;
1131 	hdr.rtm_type = action;
1132 	hdr.rtm_priority = kr_state.fib_prio;
1133 	hdr.rtm_tableid = kr_state.rdomain;	/* rtableid */
1134 	if (action == RTM_CHANGE)
1135 		hdr.rtm_fmask = RTF_REJECT|RTF_BLACKHOLE;
1136 	else
1137 		hdr.rtm_flags = RTF_MPATH;
1138 	hdr.rtm_seq = kr_state.rtseq++;	/* overflow doesn't matter */
1139 	hdr.rtm_hdrlen = sizeof(hdr);
1140 	hdr.rtm_msglen = sizeof(hdr);
1141 	/* adjust iovec */
1142 	iov[iovcnt].iov_base = &hdr;
1143 	iov[iovcnt++].iov_len = sizeof(hdr);
1144 
1145 	bzero(&prefix, sizeof(prefix));
1146 	prefix.addr.sin6_len = sizeof(struct sockaddr_in6);
1147 	prefix.addr.sin6_family = AF_INET6;
1148 	prefix.addr.sin6_addr = kroute->prefix;
1149 	/* adjust header */
1150 	hdr.rtm_addrs |= RTA_DST;
1151 	hdr.rtm_msglen += ROUNDUP(sizeof(struct sockaddr_in6));
1152 	/* adjust iovec */
1153 	iov[iovcnt].iov_base = &prefix;
1154 	iov[iovcnt++].iov_len = ROUNDUP(sizeof(struct sockaddr_in6));
1155 
1156 	if (!IN6_IS_ADDR_UNSPECIFIED(&kroute->nexthop)) {
1157 		bzero(&nexthop, sizeof(nexthop));
1158 		nexthop.addr.sin6_len = sizeof(struct sockaddr_in6);
1159 		nexthop.addr.sin6_family = AF_INET6;
1160 		nexthop.addr.sin6_addr = kroute->nexthop;
1161 		nexthop.addr.sin6_scope_id = kroute->scope;
1162 		/*
1163 		 * XXX we should set the sin6_scope_id but the kernel
1164 		 * XXX does not expect it that way. It must be fiddled
1165 		 * XXX into the sin6_addr. Welcome to the typical
1166 		 * XXX IPv6 insanity and all without wine bottles.
1167 		 */
1168 		embedscope(&nexthop.addr);
1169 
1170 		/* adjust header */
1171 		hdr.rtm_flags |= RTF_GATEWAY;
1172 		hdr.rtm_addrs |= RTA_GATEWAY;
1173 		hdr.rtm_msglen += ROUNDUP(sizeof(struct sockaddr_in6));
1174 		/* adjust iovec */
1175 		iov[iovcnt].iov_base = &nexthop;
1176 		iov[iovcnt++].iov_len = ROUNDUP(sizeof(struct sockaddr_in6));
1177 	} else if (kroute->ifindex) {
1178 		/*
1179 		 * We don't have an interface address in that network,
1180 		 * so we install a cloning route.  The kernel will then
1181 		 * do neighbor discovery.
1182 		 */
1183 		bzero(&ifp, sizeof(ifp));
1184 		ifp.addr.sdl_len = sizeof(struct sockaddr_dl);
1185 		ifp.addr.sdl_family = AF_LINK;
1186 
1187 		ifp.addr.sdl_index  = kroute->ifindex;
1188 		/* adjust header */
1189 		hdr.rtm_flags |= RTF_CLONING;
1190 		hdr.rtm_addrs |= RTA_GATEWAY;
1191 		hdr.rtm_msglen += ROUNDUP(sizeof(struct sockaddr_dl));
1192 		/* adjust iovec */
1193 		iov[iovcnt].iov_base = &ifp;
1194 		iov[iovcnt++].iov_len = ROUNDUP(sizeof(struct sockaddr_dl));
1195 	}
1196 
1197 	bzero(&mask, sizeof(mask));
1198 	mask.addr.sin6_len = sizeof(struct sockaddr_in6);
1199 	mask.addr.sin6_family = AF_INET6;
1200 	mask.addr.sin6_addr = *prefixlen2mask(kroute->prefixlen);
1201 	/* adjust header */
1202 	if (kroute->prefixlen == 128)
1203 		hdr.rtm_flags |= RTF_HOST;
1204 	hdr.rtm_addrs |= RTA_NETMASK;
1205 	hdr.rtm_msglen += ROUNDUP(sizeof(struct sockaddr_in6));
1206 	/* adjust iovec */
1207 	iov[iovcnt].iov_base = &mask;
1208 	iov[iovcnt++].iov_len = ROUNDUP(sizeof(struct sockaddr_in6));
1209 
1210 	if (kroute->rtlabel != 0) {
1211 		sa_rl.sr_len = sizeof(sa_rl);
1212 		sa_rl.sr_family = AF_UNSPEC;
1213 		label = rtlabel_id2name(kroute->rtlabel);
1214 		if (strlcpy(sa_rl.sr_label, label,
1215 		    sizeof(sa_rl.sr_label)) >= sizeof(sa_rl.sr_label)) {
1216 			log_warnx("send_rtmsg: invalid rtlabel");
1217 			return (-1);
1218 		}
1219 		/* adjust header */
1220 		hdr.rtm_addrs |= RTA_LABEL;
1221 		hdr.rtm_msglen += sizeof(sa_rl);
1222 		/* adjust iovec */
1223 		iov[iovcnt].iov_base = &sa_rl;
1224 		iov[iovcnt++].iov_len = sizeof(sa_rl);
1225 	}
1226 
1227 retry:
1228 	if (writev(fd, iov, iovcnt) == -1) {
1229 		if (errno == ESRCH) {
1230 			if (hdr.rtm_type == RTM_CHANGE) {
1231 				hdr.rtm_type = RTM_ADD;
1232 				goto retry;
1233 			} else if (hdr.rtm_type == RTM_DELETE) {
1234 				log_info("route %s/%u vanished before delete",
1235 				    log_sockaddr(&prefix), kroute->prefixlen);
1236 				return (0);
1237 			}
1238 		}
1239 		log_warn("send_rtmsg: action %u, prefix %s/%u", hdr.rtm_type,
1240 		    log_sockaddr(&prefix), kroute->prefixlen);
1241 		return (0);
1242 	}
1243 
1244 	return (0);
1245 }
1246 
1247 int
fetchtable(void)1248 fetchtable(void)
1249 {
1250 	size_t			 len;
1251 	int			 mib[7];
1252 	char			*buf;
1253 	int			 rv;
1254 
1255 	mib[0] = CTL_NET;
1256 	mib[1] = PF_ROUTE;
1257 	mib[2] = 0;
1258 	mib[3] = AF_INET6;
1259 	mib[4] = NET_RT_DUMP;
1260 	mib[5] = 0;
1261 	mib[6] = kr_state.rdomain;	/* rtableid */
1262 
1263 	if (sysctl(mib, 7, NULL, &len, NULL, 0) == -1) {
1264 		log_warn("sysctl");
1265 		return (-1);
1266 	}
1267 	if ((buf = malloc(len)) == NULL) {
1268 		log_warn("fetchtable");
1269 		return (-1);
1270 	}
1271 	if (sysctl(mib, 7, buf, &len, NULL, 0) == -1) {
1272 		log_warn("sysctl");
1273 		free(buf);
1274 		return (-1);
1275 	}
1276 
1277 	rv = rtmsg_process(buf, len);
1278 	free(buf);
1279 
1280 	return (rv);
1281 }
1282 
1283 int
fetchifs(u_short ifindex)1284 fetchifs(u_short ifindex)
1285 {
1286 	size_t			 len;
1287 	int			 mib[6];
1288 	char			*buf;
1289 	int			 rv;
1290 
1291 	mib[0] = CTL_NET;
1292 	mib[1] = PF_ROUTE;
1293 	mib[2] = 0;
1294 	mib[3] = AF_INET6;
1295 	mib[4] = NET_RT_IFLIST;
1296 	mib[5] = ifindex;
1297 
1298 	if (sysctl(mib, 6, NULL, &len, NULL, 0) == -1) {
1299 		log_warn("sysctl");
1300 		return (-1);
1301 	}
1302 	if ((buf = malloc(len)) == NULL) {
1303 		log_warn("fetchifs");
1304 		return (-1);
1305 	}
1306 	if (sysctl(mib, 6, buf, &len, NULL, 0) == -1) {
1307 		log_warn("sysctl");
1308 		free(buf);
1309 		return (-1);
1310 	}
1311 
1312 	rv = rtmsg_process(buf, len);
1313 	free(buf);
1314 
1315 	return (rv);
1316 }
1317 
1318 int
dispatch_rtmsg(void)1319 dispatch_rtmsg(void)
1320 {
1321 	char			 buf[RT_BUF_SIZE];
1322 	ssize_t			 n;
1323 
1324 	if ((n = read(kr_state.fd, &buf, sizeof(buf))) == -1) {
1325 		if (errno == EAGAIN || errno == EINTR)
1326 			return (0);
1327 		log_warn("dispatch_rtmsg: read error");
1328 		return (-1);
1329 	}
1330 
1331 	if (n == 0) {
1332 		log_warnx("routing socket closed");
1333 		return (-1);
1334 	}
1335 
1336 	return (rtmsg_process(buf, n));
1337 }
1338 
1339 int
rtmsg_process(char * buf,size_t len)1340 rtmsg_process(char *buf, size_t len)
1341 {
1342 	struct rt_msghdr	*rtm;
1343 	struct if_msghdr	 ifm;
1344 	struct ifa_msghdr	*ifam;
1345 	struct sockaddr		*sa, *rti_info[RTAX_MAX];
1346 	struct sockaddr_in6	*sa_in6;
1347 	struct sockaddr_rtlabel	*label;
1348 	struct kroute_node	*kr, *okr;
1349 	struct in6_addr		 prefix, nexthop;
1350 	u_int8_t		 prefixlen, prio;
1351 	int			 flags, mpath;
1352 	unsigned int		 scope;
1353 	u_short			 ifindex = 0;
1354 	int			 rv, delay;
1355 	size_t			 offset;
1356 	char			*next;
1357 
1358 	for (offset = 0; offset < len; offset += rtm->rtm_msglen) {
1359 		next = buf + offset;
1360 		rtm = (struct rt_msghdr *)next;
1361 		if (len < offset + sizeof(u_short) ||
1362 		    len < offset + rtm->rtm_msglen)
1363 			fatalx("rtmsg_process: partial rtm in buffer");
1364 		if (rtm->rtm_version != RTM_VERSION)
1365 			continue;
1366 
1367 		bzero(&prefix, sizeof(prefix));
1368 		bzero(&nexthop, sizeof(nexthop));
1369 		scope = 0;
1370 		prefixlen = 0;
1371 		flags = F_KERNEL;
1372 		mpath = 0;
1373 		prio = 0;
1374 
1375 		sa = (struct sockaddr *)(next + rtm->rtm_hdrlen);
1376 		get_rtaddrs(rtm->rtm_addrs, sa, rti_info);
1377 
1378 		switch (rtm->rtm_type) {
1379 		case RTM_ADD:
1380 		case RTM_GET:
1381 		case RTM_CHANGE:
1382 		case RTM_DELETE:
1383 			if (rtm->rtm_errno)		/* failed attempts... */
1384 				continue;
1385 
1386 			if (rtm->rtm_tableid != kr_state.rdomain)
1387 				continue;
1388 
1389 			if (rtm->rtm_type == RTM_GET &&
1390 			    rtm->rtm_pid != kr_state.pid) /* caused by us */
1391 				continue;
1392 
1393 			if ((sa = rti_info[RTAX_DST]) == NULL)
1394 				continue;
1395 
1396 			/* Skip ARP/ND cache and broadcast routes. */
1397 			if (rtm->rtm_flags & (RTF_LLINFO|RTF_BROADCAST))
1398 				continue;
1399 
1400 			if (rtm->rtm_flags & RTF_MPATH)
1401 				mpath = 1;
1402 			prio = rtm->rtm_priority;
1403 			flags = (prio == kr_state.fib_prio) ?
1404 			    F_OSPFD_INSERTED : F_KERNEL;
1405 
1406 			switch (sa->sa_family) {
1407 			case AF_INET6:
1408 				prefix =
1409 				    ((struct sockaddr_in6 *)sa)->sin6_addr;
1410 				sa_in6 = (struct sockaddr_in6 *)
1411 				    rti_info[RTAX_NETMASK];
1412 				if (sa_in6 != NULL) {
1413 					if (sa_in6->sin6_len != 0)
1414 						prefixlen = mask2prefixlen(
1415 						    sa_in6);
1416 				} else if (rtm->rtm_flags & RTF_HOST)
1417 					prefixlen = 128;
1418 				else
1419 					fatalx("classful IPv6 address?!!");
1420 				if (rtm->rtm_flags & RTF_STATIC)
1421 					flags |= F_STATIC;
1422 				if (rtm->rtm_flags & RTF_BLACKHOLE)
1423 					flags |= F_BLACKHOLE;
1424 				if (rtm->rtm_flags & RTF_REJECT)
1425 					flags |= F_REJECT;
1426 				if (rtm->rtm_flags & RTF_DYNAMIC)
1427 					flags |= F_DYNAMIC;
1428 				break;
1429 			default:
1430 				continue;
1431 			}
1432 
1433 			ifindex = rtm->rtm_index;
1434 			if ((sa = rti_info[RTAX_GATEWAY]) != NULL) {
1435 				switch (sa->sa_family) {
1436 				case AF_INET6:
1437 					if (rtm->rtm_flags & RTF_CONNECTED)
1438 						flags |= F_CONNECTED;
1439 
1440 					sa_in6 = (struct sockaddr_in6 *)sa;
1441 					/*
1442 					 * XXX The kernel provides the scope
1443 					 * XXX via the kame hack instead of
1444 					 * XXX the scope_id field.
1445 					 */
1446 					recoverscope(sa_in6);
1447 					nexthop = sa_in6->sin6_addr;
1448 					scope = sa_in6->sin6_scope_id;
1449 					break;
1450 				case AF_LINK:
1451 					flags |= F_CONNECTED;
1452 					break;
1453 				}
1454 			}
1455 		}
1456 
1457 		switch (rtm->rtm_type) {
1458 		case RTM_ADD:
1459 		case RTM_GET:
1460 		case RTM_CHANGE:
1461 			if (IN6_IS_ADDR_UNSPECIFIED(&nexthop) &&
1462 			    !(flags & F_CONNECTED)) {
1463 				log_warnx("rtmsg_process no nexthop for %s/%u",
1464 				    log_in6addr(&prefix), prefixlen);
1465 				continue;
1466 			}
1467 
1468 			if ((okr = kroute_find(&prefix, prefixlen, prio))
1469 			    != NULL) {
1470 				kr = okr;
1471 				if ((mpath || prio == kr_state.fib_prio) &&
1472 				    (kr = kroute_matchgw(okr, &nexthop, scope)) ==
1473 				    NULL) {
1474 					log_warnx("rtmsg_process: mpath route"
1475 					    " not found");
1476 					/* add routes we missed out earlier */
1477 					goto add;
1478 				}
1479 
1480 				if (kr->r.flags & F_REDISTRIBUTED)
1481 					flags |= F_REDISTRIBUTED;
1482 				kr->r.nexthop = nexthop;
1483 				kr->r.scope = scope;
1484 				kr->r.flags = flags;
1485 				kr->r.ifindex = ifindex;
1486 
1487 				rtlabel_unref(kr->r.rtlabel);
1488 				kr->r.rtlabel = 0;
1489 				kr->r.ext_tag = 0;
1490 				if ((label = (struct sockaddr_rtlabel *)
1491 				    rti_info[RTAX_LABEL]) != NULL) {
1492 					kr->r.rtlabel =
1493 					    rtlabel_name2id(label->sr_label);
1494 					kr->r.ext_tag =
1495 					    rtlabel_id2tag(kr->r.rtlabel);
1496 				}
1497 
1498 				if (kif_validate(kr->r.ifindex))
1499 					kr->r.flags &= ~F_DOWN;
1500 				else
1501 					kr->r.flags |= F_DOWN;
1502 
1503 				/* just readd, the RDE will care */
1504 				kr->serial = kr_state.fib_serial;
1505 				kr_redistribute(kr);
1506 			} else {
1507 add:
1508 				if ((kr = calloc(1,
1509 				    sizeof(struct kroute_node))) == NULL) {
1510 					log_warn("rtmsg_process calloc");
1511 					return (-1);
1512 				}
1513 				kr->r.prefix = prefix;
1514 				kr->r.prefixlen = prefixlen;
1515 				kr->r.nexthop = nexthop;
1516 				kr->r.scope = scope;
1517 				kr->r.flags = flags;
1518 				kr->r.ifindex = ifindex;
1519 				kr->r.priority = prio;
1520 
1521 				if (rtm->rtm_priority == kr_state.fib_prio) {
1522 					log_warnx("alien OSPF route %s/%d",
1523 					    log_in6addr(&prefix), prefixlen);
1524 					rv = send_rtmsg(kr_state.fd,
1525 					    RTM_DELETE, &kr->r);
1526 					free(kr);
1527 					if (rv == -1)
1528 						return (-1);
1529 				} else {
1530 					if ((label = (struct sockaddr_rtlabel *)
1531 					    rti_info[RTAX_LABEL]) != NULL) {
1532 						kr->r.rtlabel =
1533 						    rtlabel_name2id(
1534 						    label->sr_label);
1535 						kr->r.ext_tag =
1536 						    rtlabel_id2tag(
1537 						    kr->r.rtlabel);
1538 					}
1539 
1540 					kroute_insert(kr);
1541 				}
1542 			}
1543 			break;
1544 		case RTM_DELETE:
1545 			if ((kr = kroute_find(&prefix, prefixlen, prio)) ==
1546 			    NULL)
1547 				continue;
1548 			if (!(kr->r.flags & F_KERNEL))
1549 				continue;
1550 			/* get the correct route */
1551 			okr = kr;
1552 			if (mpath && (kr = kroute_matchgw(kr, &nexthop,
1553 			    scope)) == NULL) {
1554 				log_warnx("rtmsg_process mpath route"
1555 				    " not found");
1556 				return (-1);
1557 			}
1558 			if (kroute_remove(kr) == -1)
1559 				return (-1);
1560 			break;
1561 		case RTM_IFINFO:
1562 			memcpy(&ifm, next, sizeof(ifm));
1563 			if_change(ifm.ifm_index, ifm.ifm_flags, &ifm.ifm_data,
1564 			    (struct sockaddr_dl *)rti_info[RTAX_IFP]);
1565 			break;
1566 		case RTM_NEWADDR:
1567 			ifam = (struct ifa_msghdr *)rtm;
1568 			if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA |
1569 			    RTA_BRD)) == 0)
1570 				break;
1571 
1572 			if_newaddr(ifam->ifam_index,
1573 			    (struct sockaddr_in6 *)rti_info[RTAX_IFA],
1574 			    (struct sockaddr_in6 *)rti_info[RTAX_NETMASK],
1575 			    (struct sockaddr_in6 *)rti_info[RTAX_BRD]);
1576 			break;
1577 		case RTM_DELADDR:
1578 			ifam = (struct ifa_msghdr *)rtm;
1579 			if ((ifam->ifam_addrs & (RTA_NETMASK | RTA_IFA |
1580 			    RTA_BRD)) == 0)
1581 				break;
1582 
1583 			if_deladdr(ifam->ifam_index,
1584 			    (struct sockaddr_in6 *)rti_info[RTAX_IFA],
1585 			    (struct sockaddr_in6 *)rti_info[RTAX_NETMASK],
1586 			    (struct sockaddr_in6 *)rti_info[RTAX_BRD]);
1587 			break;
1588 		case RTM_IFANNOUNCE:
1589 			if_announce(next);
1590 			break;
1591 		case RTM_DESYNC:
1592 			/*
1593 			 * We lost some routing packets. Schedule a reload
1594 			 * of the kernel route/interface information.
1595 			 */
1596 			if (kr_state.reload_state == KR_RELOAD_IDLE) {
1597 				delay = KR_RELOAD_TIMER;
1598 				log_info("desync; scheduling fib reload");
1599 			} else {
1600 				delay = KR_RELOAD_HOLD_TIMER;
1601 				log_debug("desync during KR_RELOAD_%s",
1602 				    kr_state.reload_state ==
1603 				    KR_RELOAD_FETCH ? "FETCH" : "HOLD");
1604 			}
1605 			kr_state.reload_state = KR_RELOAD_FETCH;
1606 			kr_fib_reload_arm_timer(delay);
1607 			break;
1608 		default:
1609 			/* ignore for now */
1610 			break;
1611 		}
1612 	}
1613 	return (offset);
1614 }
1615