xref: /openbsd/sys/net/rtsock.c (revision dfc54264)
1 /*	$OpenBSD: rtsock.c,v 1.374 2024/06/14 08:32:22 mvs Exp $	*/
2 /*	$NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1988, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
62  */
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/proc.h>
67 #include <sys/sysctl.h>
68 #include <sys/mbuf.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71 #include <sys/domain.h>
72 #include <sys/pool.h>
73 #include <sys/protosw.h>
74 #include <sys/srp.h>
75 
76 #include <net/if.h>
77 #include <net/if_dl.h>
78 #include <net/if_var.h>
79 #include <net/route.h>
80 
81 #include <netinet/in.h>
82 
83 #ifdef MPLS
84 #include <netmpls/mpls.h>
85 #endif
86 #ifdef IPSEC
87 #include <netinet/ip_ipsp.h>
88 #include <net/if_enc.h>
89 #endif
90 #ifdef BFD
91 #include <net/bfd.h>
92 #endif
93 
94 #include <sys/stdarg.h>
95 #include <sys/kernel.h>
96 #include <sys/timeout.h>
97 
98 #define	ROUTESNDQ	8192
99 #define	ROUTERCVQ	8192
100 
101 const struct sockaddr route_src = { 2, PF_ROUTE, };
102 
103 struct walkarg {
104 	int	w_op, w_arg, w_tmemsize;
105 	size_t	w_given, w_needed;
106 	caddr_t	w_where, w_tmem;
107 };
108 
109 void	route_prinit(void);
110 void	rcb_ref(void *, void *);
111 void	rcb_unref(void *, void *);
112 int	route_output(struct mbuf *, struct socket *);
113 int	route_ctloutput(int, struct socket *, int, int, struct mbuf *);
114 int	route_attach(struct socket *, int, int);
115 int	route_detach(struct socket *);
116 int	route_disconnect(struct socket *);
117 int	route_shutdown(struct socket *);
118 void	route_rcvd(struct socket *);
119 int	route_send(struct socket *, struct mbuf *, struct mbuf *,
120 	    struct mbuf *);
121 int	route_sockaddr(struct socket *, struct mbuf *);
122 int	route_peeraddr(struct socket *, struct mbuf *);
123 void	route_input(struct mbuf *m0, struct socket *, sa_family_t);
124 int	route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
125 int	route_cleargateway(struct rtentry *, void *, unsigned int);
126 void	rtm_senddesync_timer(void *);
127 void	rtm_senddesync(struct socket *);
128 int	rtm_sendup(struct socket *, struct mbuf *);
129 
130 int	rtm_getifa(struct rt_addrinfo *, unsigned int);
131 int	rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *,
132 	    uint8_t, unsigned int);
133 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int);
134 struct mbuf	*rtm_msg1(int, struct rt_addrinfo *);
135 int		 rtm_msg2(int, int, struct rt_addrinfo *, caddr_t,
136 		     struct walkarg *);
137 int		 rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
138 int		 rtm_validate_proposal(struct rt_addrinfo *);
139 void		 rtm_setmetrics(u_long, const struct rt_metrics *,
140 		     struct rt_kmetrics *);
141 void		 rtm_getmetrics(const struct rtentry *,
142 		     struct rt_metrics *);
143 
144 int		 sysctl_iflist(int, struct walkarg *);
145 int		 sysctl_ifnames(struct walkarg *);
146 int		 sysctl_rtable_rtstat(void *, size_t *, void *);
147 
148 int		 rt_setsource(unsigned int, const struct sockaddr *);
149 
150 /*
151  * Locks used to protect struct members
152  *       I       immutable after creation
153  *       s       solock
154  */
155 struct rtpcb {
156 	struct socket		*rop_socket;		/* [I] */
157 
158 	SRPL_ENTRY(rtpcb)	rop_list;
159 	struct refcnt		rop_refcnt;
160 	struct timeout		rop_timeout;
161 	unsigned int		rop_msgfilter;		/* [s] */
162 	unsigned int		rop_flagfilter;		/* [s] */
163 	unsigned int		rop_flags;		/* [s] */
164 	u_int			rop_rtableid;		/* [s] */
165 	unsigned short		rop_proto;		/* [I] */
166 	u_char			rop_priority;		/* [s] */
167 };
168 #define	sotortpcb(so)	((struct rtpcb *)(so)->so_pcb)
169 
170 struct rtptable {
171 	SRPL_HEAD(, rtpcb)	rtp_list;
172 	struct srpl_rc		rtp_rc;
173 	struct rwlock		rtp_lk;
174 	unsigned int		rtp_count;
175 };
176 
177 struct pool rtpcb_pool;
178 struct rtptable rtptable;
179 
180 /*
181  * These flags and timeout are used for indicating to userland (via a
182  * RTM_DESYNC msg) when the route socket has overflowed and messages
183  * have been lost.
184  */
185 #define ROUTECB_FLAG_DESYNC	0x1	/* Route socket out of memory */
186 #define ROUTECB_FLAG_FLUSH	0x2	/* Wait until socket is empty before
187 					   queueing more packets */
188 
189 #define ROUTE_DESYNC_RESEND_TIMEOUT	200	/* In ms */
190 
191 void
route_prinit(void)192 route_prinit(void)
193 {
194 	srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
195 	rw_init(&rtptable.rtp_lk, "rtsock");
196 	SRPL_INIT(&rtptable.rtp_list);
197 	pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
198 	    IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL);
199 }
200 
201 void
rcb_ref(void * null,void * v)202 rcb_ref(void *null, void *v)
203 {
204 	struct rtpcb *rop = v;
205 
206 	refcnt_take(&rop->rop_refcnt);
207 }
208 
209 void
rcb_unref(void * null,void * v)210 rcb_unref(void *null, void *v)
211 {
212 	struct rtpcb *rop = v;
213 
214 	refcnt_rele_wake(&rop->rop_refcnt);
215 }
216 
217 int
route_attach(struct socket * so,int proto,int wait)218 route_attach(struct socket *so, int proto, int wait)
219 {
220 	struct rtpcb	*rop;
221 	int		 error;
222 
223 	error = soreserve(so, ROUTESNDQ, ROUTERCVQ);
224 	if (error)
225 		return (error);
226 	/*
227 	 * use the rawcb but allocate a rtpcb, this
228 	 * code does not care about the additional fields
229 	 * and works directly on the raw socket.
230 	 */
231 	rop = pool_get(&rtpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) |
232 	    PR_ZERO);
233 	if (rop == NULL)
234 		return (ENOBUFS);
235 	so->so_pcb = rop;
236 	/* Init the timeout structure */
237 	timeout_set_flags(&rop->rop_timeout, rtm_senddesync_timer, so,
238 	    KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE);
239 	refcnt_init(&rop->rop_refcnt);
240 
241 	rop->rop_socket = so;
242 	rop->rop_proto = proto;
243 
244 	rop->rop_rtableid = curproc->p_p->ps_rtableid;
245 
246 	soisconnected(so);
247 	so->so_options |= SO_USELOOPBACK;
248 
249 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
250 	SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
251 	    rop_list);
252 	rtptable.rtp_count++;
253 	rw_exit(&rtptable.rtp_lk);
254 
255 	return (0);
256 }
257 
258 int
route_detach(struct socket * so)259 route_detach(struct socket *so)
260 {
261 	struct rtpcb	*rop;
262 
263 	soassertlocked(so);
264 
265 	rop = sotortpcb(so);
266 	if (rop == NULL)
267 		return (EINVAL);
268 
269 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
270 
271 	rtptable.rtp_count--;
272 	SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
273 	    rop_list);
274 	rw_exit(&rtptable.rtp_lk);
275 
276 	sounlock(so);
277 
278 	/* wait for all references to drop */
279 	refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
280 	timeout_del_barrier(&rop->rop_timeout);
281 
282 	solock(so);
283 
284 	so->so_pcb = NULL;
285 	KASSERT((so->so_state & SS_NOFDREF) == 0);
286 	pool_put(&rtpcb_pool, rop);
287 
288 	return (0);
289 }
290 
291 int
route_disconnect(struct socket * so)292 route_disconnect(struct socket *so)
293 {
294 	soisdisconnected(so);
295 	return (0);
296 }
297 
298 int
route_shutdown(struct socket * so)299 route_shutdown(struct socket *so)
300 {
301 	socantsendmore(so);
302 	return (0);
303 }
304 
305 void
route_rcvd(struct socket * so)306 route_rcvd(struct socket *so)
307 {
308 	struct rtpcb *rop = sotortpcb(so);
309 
310 	soassertlocked(so);
311 
312 	/*
313 	 * If we are in a FLUSH state, check if the buffer is
314 	 * empty so that we can clear the flag.
315 	 */
316 
317 	mtx_enter(&so->so_rcv.sb_mtx);
318 	if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) &&
319 	    ((sbspace(so, &so->so_rcv) == so->so_rcv.sb_hiwat)))
320 		rop->rop_flags &= ~ROUTECB_FLAG_FLUSH;
321 	mtx_leave(&so->so_rcv.sb_mtx);
322 }
323 
324 int
route_send(struct socket * so,struct mbuf * m,struct mbuf * nam,struct mbuf * control)325 route_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
326     struct mbuf *control)
327 {
328 	int error;
329 
330 	soassertlocked(so);
331 
332 	if (control && control->m_len) {
333 		error = EOPNOTSUPP;
334 		goto out;
335 	}
336 
337 	if (nam) {
338 		error = EISCONN;
339 		goto out;
340 	}
341 
342 	error = route_output(m, so);
343 	m = NULL;
344 
345 out:
346 	m_freem(control);
347 	m_freem(m);
348 
349 	return (error);
350 }
351 
352 int
route_sockaddr(struct socket * so,struct mbuf * nam)353 route_sockaddr(struct socket *so, struct mbuf *nam)
354 {
355 	return (EINVAL);
356 }
357 
358 int
route_peeraddr(struct socket * so,struct mbuf * nam)359 route_peeraddr(struct socket *so, struct mbuf *nam)
360 {
361 	/* minimal support, just implement a fake peer address */
362 	bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len);
363 	nam->m_len = route_src.sa_len;
364 	return (0);
365 }
366 
367 int
route_ctloutput(int op,struct socket * so,int level,int optname,struct mbuf * m)368 route_ctloutput(int op, struct socket *so, int level, int optname,
369     struct mbuf *m)
370 {
371 	struct rtpcb *rop = sotortpcb(so);
372 	int error = 0;
373 	unsigned int tid, prio;
374 
375 	if (level != AF_ROUTE)
376 		return (EINVAL);
377 
378 	switch (op) {
379 	case PRCO_SETOPT:
380 		switch (optname) {
381 		case ROUTE_MSGFILTER:
382 			if (m == NULL || m->m_len != sizeof(unsigned int))
383 				error = EINVAL;
384 			else
385 				rop->rop_msgfilter = *mtod(m, unsigned int *);
386 			break;
387 		case ROUTE_TABLEFILTER:
388 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
389 				error = EINVAL;
390 				break;
391 			}
392 			tid = *mtod(m, unsigned int *);
393 			if (tid != RTABLE_ANY && !rtable_exists(tid))
394 				error = ENOENT;
395 			else
396 				rop->rop_rtableid = tid;
397 			break;
398 		case ROUTE_PRIOFILTER:
399 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
400 				error = EINVAL;
401 				break;
402 			}
403 			prio = *mtod(m, unsigned int *);
404 			if (prio > RTP_MAX)
405 				error = EINVAL;
406 			else
407 				rop->rop_priority = prio;
408 			break;
409 		case ROUTE_FLAGFILTER:
410 			if (m == NULL || m->m_len != sizeof(unsigned int))
411 				error = EINVAL;
412 			else
413 				rop->rop_flagfilter = *mtod(m, unsigned int *);
414 			break;
415 		default:
416 			error = ENOPROTOOPT;
417 			break;
418 		}
419 		break;
420 	case PRCO_GETOPT:
421 		switch (optname) {
422 		case ROUTE_MSGFILTER:
423 			m->m_len = sizeof(unsigned int);
424 			*mtod(m, unsigned int *) = rop->rop_msgfilter;
425 			break;
426 		case ROUTE_TABLEFILTER:
427 			m->m_len = sizeof(unsigned int);
428 			*mtod(m, unsigned int *) = rop->rop_rtableid;
429 			break;
430 		case ROUTE_PRIOFILTER:
431 			m->m_len = sizeof(unsigned int);
432 			*mtod(m, unsigned int *) = rop->rop_priority;
433 			break;
434 		case ROUTE_FLAGFILTER:
435 			m->m_len = sizeof(unsigned int);
436 			*mtod(m, unsigned int *) = rop->rop_flagfilter;
437 			break;
438 		default:
439 			error = ENOPROTOOPT;
440 			break;
441 		}
442 	}
443 	return (error);
444 }
445 
446 void
rtm_senddesync_timer(void * xso)447 rtm_senddesync_timer(void *xso)
448 {
449 	struct socket	*so = xso;
450 
451 	solock(so);
452 	rtm_senddesync(so);
453 	sounlock(so);
454 }
455 
456 void
rtm_senddesync(struct socket * so)457 rtm_senddesync(struct socket *so)
458 {
459 	struct rtpcb	*rop = sotortpcb(so);
460 	struct mbuf	*desync_mbuf;
461 
462 	soassertlocked(so);
463 
464 	/*
465 	 * Dying socket is disconnected by upper layer and there is
466 	 * no reason to send packet. Also we shouldn't reschedule
467 	 * timeout(9), otherwise timeout_del_barrier(9) can't help us.
468 	 */
469 	if ((so->so_state & SS_ISCONNECTED) == 0 ||
470 	    (so->so_rcv.sb_state & SS_CANTRCVMORE))
471 		return;
472 
473 	/* If we are in a DESYNC state, try to send a RTM_DESYNC packet */
474 	if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0)
475 		return;
476 
477 	/*
478 	 * If we fail to alloc memory or if sbappendaddr()
479 	 * fails, re-add timeout and try again.
480 	 */
481 	desync_mbuf = rtm_msg1(RTM_DESYNC, NULL);
482 	if (desync_mbuf != NULL) {
483 		int ret;
484 
485 		mtx_enter(&so->so_rcv.sb_mtx);
486 		ret = sbappendaddr(so, &so->so_rcv, &route_src,
487 		    desync_mbuf, NULL);
488 		mtx_leave(&so->so_rcv.sb_mtx);
489 
490 		if (ret != 0) {
491 			rop->rop_flags &= ~ROUTECB_FLAG_DESYNC;
492 			sorwakeup(rop->rop_socket);
493 			return;
494 		}
495 		m_freem(desync_mbuf);
496 	}
497 	/* Re-add timeout to try sending msg again */
498 	timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT);
499 }
500 
501 void
route_input(struct mbuf * m0,struct socket * so0,sa_family_t sa_family)502 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family)
503 {
504 	struct socket *so;
505 	struct rtpcb *rop;
506 	struct rt_msghdr *rtm;
507 	struct mbuf *m = m0;
508 	struct srp_ref sr;
509 
510 	/* ensure that we can access the rtm_type via mtod() */
511 	if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
512 		m_freem(m);
513 		return;
514 	}
515 
516 	SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
517 		/*
518 		 * If route socket is bound to an address family only send
519 		 * messages that match the address family. Address family
520 		 * agnostic messages are always sent.
521 		 */
522 		if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC &&
523 		    rop->rop_proto != sa_family)
524 			continue;
525 
526 
527 		so = rop->rop_socket;
528 		solock(so);
529 
530 		/*
531 		 * Check to see if we don't want our own messages and
532 		 * if we can receive anything.
533 		 */
534 		if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) ||
535 		    !(so->so_state & SS_ISCONNECTED) ||
536 		    (so->so_rcv.sb_state & SS_CANTRCVMORE))
537 			goto next;
538 
539 		/* filter messages that the process does not want */
540 		rtm = mtod(m, struct rt_msghdr *);
541 		/* but RTM_DESYNC can't be filtered */
542 		if (rtm->rtm_type != RTM_DESYNC) {
543 			if (rop->rop_msgfilter != 0 &&
544 			    !(rop->rop_msgfilter & (1U << rtm->rtm_type)))
545 				goto next;
546 			if (ISSET(rop->rop_flagfilter, rtm->rtm_flags))
547 				goto next;
548 		}
549 		switch (rtm->rtm_type) {
550 		case RTM_IFANNOUNCE:
551 		case RTM_DESYNC:
552 			/* no tableid */
553 			break;
554 		case RTM_RESOLVE:
555 		case RTM_NEWADDR:
556 		case RTM_DELADDR:
557 		case RTM_IFINFO:
558 		case RTM_80211INFO:
559 		case RTM_BFD:
560 			/* check against rdomain id */
561 			if (rop->rop_rtableid != RTABLE_ANY &&
562 			    rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid)
563 				goto next;
564 			break;
565 		default:
566 			if (rop->rop_priority != 0 &&
567 			    rop->rop_priority < rtm->rtm_priority)
568 				goto next;
569 			/* check against rtable id */
570 			if (rop->rop_rtableid != RTABLE_ANY &&
571 			    rop->rop_rtableid != rtm->rtm_tableid)
572 				goto next;
573 			break;
574 		}
575 
576 		/*
577 		 * Check to see if the flush flag is set. If so, don't queue
578 		 * any more messages until the flag is cleared.
579 		 */
580 		if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0)
581 			goto next;
582 
583 		rtm_sendup(so, m);
584 next:
585 		sounlock(so);
586 	}
587 	SRPL_LEAVE(&sr);
588 
589 	m_freem(m);
590 }
591 
592 int
rtm_sendup(struct socket * so,struct mbuf * m0)593 rtm_sendup(struct socket *so, struct mbuf *m0)
594 {
595 	struct rtpcb *rop = sotortpcb(so);
596 	struct mbuf *m;
597 	int send_desync = 0;
598 
599 	soassertlocked(so);
600 
601 	m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
602 	if (m == NULL)
603 		return (ENOMEM);
604 
605 	mtx_enter(&so->so_rcv.sb_mtx);
606 	if (sbspace(so, &so->so_rcv) < (2 * MSIZE) ||
607 	    sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0)
608 		send_desync = 1;
609 	mtx_leave(&so->so_rcv.sb_mtx);
610 
611 	if (send_desync) {
612 		/* Flag socket as desync'ed and flush required */
613 		rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH;
614 		rtm_senddesync(so);
615 		m_freem(m);
616 		return (ENOBUFS);
617 	}
618 
619 	sorwakeup(so);
620 	return (0);
621 }
622 
623 struct rt_msghdr *
rtm_report(struct rtentry * rt,u_char type,int seq,int tableid)624 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid)
625 {
626 	struct rt_msghdr	*rtm;
627 	struct rt_addrinfo	 info;
628 	struct sockaddr_rtlabel	 sa_rl;
629 	struct sockaddr_in6	 sa_mask;
630 #ifdef BFD
631 	struct sockaddr_bfd	 sa_bfd;
632 #endif
633 	struct ifnet		*ifp = NULL;
634 	int			 len;
635 
636 	bzero(&info, sizeof(info));
637 	info.rti_info[RTAX_DST] = rt_key(rt);
638 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
639 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
640 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
641 #ifdef BFD
642 	if (rt->rt_flags & RTF_BFD) {
643 		KERNEL_LOCK();
644 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
645 		KERNEL_UNLOCK();
646 	}
647 #endif
648 #ifdef MPLS
649 	if (rt->rt_flags & RTF_MPLS) {
650 		struct sockaddr_mpls	 sa_mpls;
651 
652 		bzero(&sa_mpls, sizeof(sa_mpls));
653 		sa_mpls.smpls_family = AF_MPLS;
654 		sa_mpls.smpls_len = sizeof(sa_mpls);
655 		sa_mpls.smpls_label = ((struct rt_mpls *)
656 		    rt->rt_llinfo)->mpls_label;
657 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
658 		info.rti_mpls = ((struct rt_mpls *)
659 		    rt->rt_llinfo)->mpls_operation;
660 	}
661 #endif
662 	ifp = if_get(rt->rt_ifidx);
663 	if (ifp != NULL) {
664 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
665 		info.rti_info[RTAX_IFA] = rtable_getsource(tableid,
666 		    info.rti_info[RTAX_DST]->sa_family);
667 		if (info.rti_info[RTAX_IFA] == NULL)
668 			info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
669 		if (ifp->if_flags & IFF_POINTOPOINT)
670 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
671 	}
672 	if_put(ifp);
673 	/* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */
674 
675 	/* build new route message */
676 	len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL);
677 	rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO);
678 
679 	rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL);
680 	rtm->rtm_type = type;
681 	rtm->rtm_index = rt->rt_ifidx;
682 	rtm->rtm_tableid = tableid;
683 	rtm->rtm_priority = rt->rt_priority & RTP_MASK;
684 	rtm->rtm_flags = rt->rt_flags;
685 	rtm->rtm_pid = curproc->p_p->ps_pid;
686 	rtm->rtm_seq = seq;
687 	rtm_getmetrics(rt, &rtm->rtm_rmx);
688 	rtm->rtm_addrs = info.rti_addrs;
689 #ifdef MPLS
690 	rtm->rtm_mpls = info.rti_mpls;
691 #endif
692 	return rtm;
693 }
694 
695 int
route_output(struct mbuf * m,struct socket * so)696 route_output(struct mbuf *m, struct socket *so)
697 {
698 	struct rt_msghdr	*rtm = NULL;
699 	struct rtentry		*rt = NULL;
700 	struct rt_addrinfo	 info;
701 	struct ifnet		*ifp;
702 	int			 len, seq, useloopback, error = 0;
703 	u_int			 tableid;
704 	u_int8_t		 prio;
705 	u_char			 vers, type;
706 
707 	if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
708 	    (m = m_pullup(m, sizeof(int32_t))) == NULL))
709 		return (ENOBUFS);
710 	if ((m->m_flags & M_PKTHDR) == 0)
711 		panic("route_output");
712 
713 	useloopback = so->so_options & SO_USELOOPBACK;
714 
715 	/*
716 	 * The socket can't be closed concurrently because the file
717 	 * descriptor reference is still held.
718 	 */
719 
720 	sounlock(so);
721 
722 	len = m->m_pkthdr.len;
723 	if (len < offsetof(struct rt_msghdr, rtm_hdrlen) +
724 	    sizeof(rtm->rtm_hdrlen) ||
725 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
726 		error = EINVAL;
727 		goto fail;
728 	}
729 	vers = mtod(m, struct rt_msghdr *)->rtm_version;
730 	switch (vers) {
731 	case RTM_VERSION:
732 		if (len < sizeof(struct rt_msghdr)) {
733 			error = EINVAL;
734 			goto fail;
735 		}
736 		if (len > RTM_MAXSIZE) {
737 			error = EMSGSIZE;
738 			goto fail;
739 		}
740 		rtm = malloc(len, M_RTABLE, M_WAITOK);
741 		m_copydata(m, 0, len, rtm);
742 		break;
743 	default:
744 		error = EPROTONOSUPPORT;
745 		goto fail;
746 	}
747 
748 	/* Verify that the caller is sending an appropriate message early */
749 	switch (rtm->rtm_type) {
750 	case RTM_ADD:
751 	case RTM_DELETE:
752 	case RTM_GET:
753 	case RTM_CHANGE:
754 	case RTM_PROPOSAL:
755 	case RTM_SOURCE:
756 		break;
757 	default:
758 		error = EOPNOTSUPP;
759 		goto fail;
760 	}
761 	/*
762 	 * Verify that the header length is valid.
763 	 * All messages from userland start with a struct rt_msghdr.
764 	 */
765 	if (rtm->rtm_hdrlen == 0)	/* old client */
766 		rtm->rtm_hdrlen = sizeof(struct rt_msghdr);
767 	if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) ||
768 	    len < rtm->rtm_hdrlen) {
769 		error = EINVAL;
770 		goto fail;
771 	}
772 
773 	rtm->rtm_pid = curproc->p_p->ps_pid;
774 
775 	/*
776 	 * Verify that the caller has the appropriate privilege; RTM_GET
777 	 * is the only operation the non-superuser is allowed.
778 	 */
779 	if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) {
780 		error = EACCES;
781 		goto fail;
782 	}
783 	tableid = rtm->rtm_tableid;
784 	if (!rtable_exists(tableid)) {
785 		if (rtm->rtm_type == RTM_ADD) {
786 			if ((error = rtable_add(tableid)) != 0)
787 				goto fail;
788 		} else {
789 			error = EINVAL;
790 			goto fail;
791 		}
792 	}
793 
794 	/* Do not let userland play with kernel-only flags. */
795 	if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) {
796 		error = EINVAL;
797 		goto fail;
798 	}
799 
800 	/* make sure that kernel-only bits are not set */
801 	rtm->rtm_priority &= RTP_MASK;
802 	rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED);
803 	rtm->rtm_fmask &= RTF_FMASK;
804 
805 	if (rtm->rtm_priority != 0) {
806 		if (rtm->rtm_priority > RTP_MAX ||
807 		    rtm->rtm_priority == RTP_LOCAL) {
808 			error = EINVAL;
809 			goto fail;
810 		}
811 		prio = rtm->rtm_priority;
812 	} else if (rtm->rtm_type != RTM_ADD)
813 		prio = RTP_ANY;
814 	else if (rtm->rtm_flags & RTF_STATIC)
815 		prio = 0;
816 	else
817 		prio = RTP_DEFAULT;
818 
819 	bzero(&info, sizeof(info));
820 	info.rti_addrs = rtm->rtm_addrs;
821 	if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm,
822 	    len + (caddr_t)rtm, &info)) != 0)
823 		goto fail;
824 
825 	info.rti_flags = rtm->rtm_flags;
826 
827 	if (rtm->rtm_type != RTM_SOURCE &&
828 	    rtm->rtm_type != RTM_PROPOSAL &&
829 	    (info.rti_info[RTAX_DST] == NULL ||
830 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
831 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
832 	    info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) ||
833 	    info.rti_info[RTAX_GENMASK] != NULL)) {
834 		error = EINVAL;
835 		goto fail;
836 	}
837 #ifdef MPLS
838 	info.rti_mpls = rtm->rtm_mpls;
839 #endif
840 
841 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
842 	    info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
843 	    (info.rti_flags & RTF_CLONING) == 0) {
844 		info.rti_flags |= RTF_LLINFO;
845 	}
846 
847 	/*
848 	 * Validate RTM_PROPOSAL and pass it along or error out.
849 	 */
850 	if (rtm->rtm_type == RTM_PROPOSAL) {
851 		if (rtm_validate_proposal(&info) == -1) {
852 			error = EINVAL;
853 			goto fail;
854 		}
855 		/*
856 		 * If this is a solicitation proposal forward request to
857 		 * all interfaces. Most handlers will ignore it but at least
858 		 * umb(4) will send a response to this event.
859 		 */
860 		if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) {
861 			NET_LOCK();
862 			TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
863 				ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL);
864 			}
865 			NET_UNLOCK();
866 		}
867 	} else if (rtm->rtm_type == RTM_SOURCE) {
868 		if (info.rti_info[RTAX_IFA] == NULL) {
869 			error = EINVAL;
870 			goto fail;
871 		}
872 		NET_LOCK();
873 		error = rt_setsource(tableid, info.rti_info[RTAX_IFA]);
874 		NET_UNLOCK();
875 		if (error)
876 			goto fail;
877 	} else {
878 		error = rtm_output(rtm, &rt, &info, prio, tableid);
879 		if (!error) {
880 			type = rtm->rtm_type;
881 			seq = rtm->rtm_seq;
882 			free(rtm, M_RTABLE, len);
883 			NET_LOCK_SHARED();
884 			rtm = rtm_report(rt, type, seq, tableid);
885 			NET_UNLOCK_SHARED();
886 			len = rtm->rtm_msglen;
887 		}
888 	}
889 
890 	rtfree(rt);
891 	if (error) {
892 		rtm->rtm_errno = error;
893 	} else {
894 		rtm->rtm_flags |= RTF_DONE;
895 	}
896 
897 	/*
898 	 * Check to see if we don't want our own messages.
899 	 */
900 	if (!useloopback) {
901 		if (rtptable.rtp_count == 0) {
902 			/* no other listener and no loopback of messages */
903 			goto fail;
904 		}
905 	}
906 	if (m_copyback(m, 0, len, rtm, M_NOWAIT)) {
907 		m_freem(m);
908 		m = NULL;
909 	} else if (m->m_pkthdr.len > len)
910 		m_adj(m, len - m->m_pkthdr.len);
911 	free(rtm, M_RTABLE, len);
912 	if (m)
913 		route_input(m, so, info.rti_info[RTAX_DST] ?
914 		    info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC);
915 	solock(so);
916 
917 	return (error);
918 fail:
919 	free(rtm, M_RTABLE, len);
920 	m_freem(m);
921 	solock(so);
922 
923 	return (error);
924 }
925 
926 int
rtm_output(struct rt_msghdr * rtm,struct rtentry ** prt,struct rt_addrinfo * info,uint8_t prio,unsigned int tableid)927 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt,
928     struct rt_addrinfo *info, uint8_t prio, unsigned int tableid)
929 {
930 	struct rtentry		*rt = *prt;
931 	struct ifnet		*ifp = NULL;
932 	int			 plen, newgate = 0, error = 0;
933 
934 	switch (rtm->rtm_type) {
935 	case RTM_ADD:
936 		if (info->rti_info[RTAX_GATEWAY] == NULL) {
937 			error = EINVAL;
938 			break;
939 		}
940 
941 		rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL);
942 		if ((error = route_arp_conflict(rt, info))) {
943 			rtfree(rt);
944 			rt = NULL;
945 			break;
946 		}
947 
948 		/*
949 		 * We cannot go through a delete/create/insert cycle for
950 		 * cached route because this can lead to races in the
951 		 * receive path.  Instead we update the L2 cache.
952 		 */
953 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) {
954 			ifp = if_get(rt->rt_ifidx);
955 			if (ifp == NULL) {
956 				rtfree(rt);
957 				rt = NULL;
958 				error = ESRCH;
959 				break;
960 			}
961 
962 			goto change;
963 		}
964 
965 		rtfree(rt);
966 		rt = NULL;
967 
968 		NET_LOCK();
969 		if ((error = rtm_getifa(info, tableid)) != 0) {
970 			NET_UNLOCK();
971 			break;
972 		}
973 		error = rtrequest(RTM_ADD, info, prio, &rt, tableid);
974 		NET_UNLOCK();
975 		if (error == 0)
976 			rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
977 			    &rt->rt_rmx);
978 		break;
979 	case RTM_DELETE:
980 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
981 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
982 		    prio);
983 		if (rt == NULL) {
984 			error = ESRCH;
985 			break;
986 		}
987 
988 		/*
989 		 * If we got multipath routes, we require users to specify
990 		 * a matching gateway.
991 		 */
992 		if (ISSET(rt->rt_flags, RTF_MPATH) &&
993 		    info->rti_info[RTAX_GATEWAY] == NULL) {
994 			error = ESRCH;
995 			break;
996 		}
997 
998 		ifp = if_get(rt->rt_ifidx);
999 		if (ifp == NULL) {
1000 			rtfree(rt);
1001 			rt = NULL;
1002 			error = ESRCH;
1003 			break;
1004 		}
1005 
1006 		/*
1007 		 * Invalidate the cache of automagically created and
1008 		 * referenced L2 entries to make sure that ``rt_gwroute''
1009 		 * pointer stays valid for other CPUs.
1010 		 */
1011 		if ((ISSET(rt->rt_flags, RTF_CACHED))) {
1012 			NET_LOCK();
1013 			ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt);
1014 			/* Reset the MTU of the gateway route. */
1015 			rtable_walk(tableid, rt_key(rt)->sa_family, NULL,
1016 			    route_cleargateway, rt);
1017 			NET_UNLOCK();
1018 			break;
1019 		}
1020 
1021 		/*
1022 		 * Make sure that local routes are only modified by the
1023 		 * kernel.
1024 		 */
1025 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
1026 			error = EINVAL;
1027 			break;
1028 		}
1029 
1030 		rtfree(rt);
1031 		rt = NULL;
1032 
1033 		NET_LOCK();
1034 		error = rtrequest_delete(info, prio, ifp, &rt, tableid);
1035 		NET_UNLOCK();
1036 		break;
1037 	case RTM_CHANGE:
1038 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1039 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1040 		    prio);
1041 		/*
1042 		 * If we got multipath routes, we require users to specify
1043 		 * a matching gateway.
1044 		 */
1045 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) &&
1046 		    (info->rti_info[RTAX_GATEWAY] == NULL)) {
1047 			rtfree(rt);
1048 			rt = NULL;
1049 		}
1050 
1051 		/*
1052 		 * If RTAX_GATEWAY is the argument we're trying to
1053 		 * change, try to find a compatible route.
1054 		 */
1055 		if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) {
1056 			rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1057 			    info->rti_info[RTAX_NETMASK], NULL, prio);
1058 			/* Ensure we don't pick a multipath one. */
1059 			if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) {
1060 				rtfree(rt);
1061 				rt = NULL;
1062 			}
1063 		}
1064 
1065 		if (rt == NULL) {
1066 			error = ESRCH;
1067 			break;
1068 		}
1069 
1070 		/*
1071 		 * Make sure that local routes are only modified by the
1072 		 * kernel.
1073 		 */
1074 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
1075 			error = EINVAL;
1076 			break;
1077 		}
1078 
1079 		ifp = if_get(rt->rt_ifidx);
1080 		if (ifp == NULL) {
1081 			rtfree(rt);
1082 			rt = NULL;
1083 			error = ESRCH;
1084 			break;
1085 		}
1086 
1087 		/*
1088 		 * RTM_CHANGE needs a perfect match.
1089 		 */
1090 		plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family,
1091 		    info->rti_info[RTAX_NETMASK]);
1092 		if (rt_plen(rt) != plen) {
1093 			error = ESRCH;
1094 			break;
1095 		}
1096 
1097 		if (info->rti_info[RTAX_GATEWAY] != NULL)
1098 			if (rt->rt_gateway == NULL ||
1099 			    bcmp(rt->rt_gateway,
1100 			    info->rti_info[RTAX_GATEWAY],
1101 			    info->rti_info[RTAX_GATEWAY]->sa_len)) {
1102 				newgate = 1;
1103 			}
1104 		/*
1105 		 * Check reachable gateway before changing the route.
1106 		 * New gateway could require new ifaddr, ifp;
1107 		 * flags may also be different; ifp may be specified
1108 		 * by ll sockaddr when protocol address is ambiguous.
1109 		 */
1110 		if (newgate || info->rti_info[RTAX_IFP] != NULL ||
1111 		    info->rti_info[RTAX_IFA] != NULL) {
1112 			struct ifaddr	*ifa = NULL;
1113 
1114 			NET_LOCK();
1115 			if ((error = rtm_getifa(info, tableid)) != 0) {
1116 				NET_UNLOCK();
1117 				break;
1118 			}
1119 			ifa = info->rti_ifa;
1120 			if (rt->rt_ifa != ifa) {
1121 				ifp->if_rtrequest(ifp, RTM_DELETE, rt);
1122 				ifafree(rt->rt_ifa);
1123 
1124 				rt->rt_ifa = ifaref(ifa);
1125 				rt->rt_ifidx = ifa->ifa_ifp->if_index;
1126 				/* recheck link state after ifp change */
1127 				rt_if_linkstate_change(rt, ifa->ifa_ifp,
1128 				    tableid);
1129 			}
1130 			NET_UNLOCK();
1131 		}
1132 change:
1133 		if (info->rti_info[RTAX_GATEWAY] != NULL) {
1134 			/* When updating the gateway, make sure it is valid. */
1135 			if (!newgate && rt->rt_gateway->sa_family !=
1136 			    info->rti_info[RTAX_GATEWAY]->sa_family) {
1137 				error = EINVAL;
1138 				break;
1139 			}
1140 
1141 			NET_LOCK();
1142 			error = rt_setgate(rt,
1143 			    info->rti_info[RTAX_GATEWAY], tableid);
1144 			NET_UNLOCK();
1145 			if (error)
1146 				break;
1147 		}
1148 #ifdef MPLS
1149 		if (rtm->rtm_flags & RTF_MPLS) {
1150 			NET_LOCK();
1151 			error = rt_mpls_set(rt,
1152 			    info->rti_info[RTAX_SRC], info->rti_mpls);
1153 			NET_UNLOCK();
1154 			if (error)
1155 				break;
1156 		} else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) {
1157 			NET_LOCK();
1158 			/* if gateway changed remove MPLS information */
1159 			rt_mpls_clear(rt);
1160 			NET_UNLOCK();
1161 		}
1162 #endif
1163 
1164 #ifdef BFD
1165 		if (ISSET(rtm->rtm_flags, RTF_BFD)) {
1166 			KERNEL_LOCK();
1167 			error = bfdset(rt);
1168 			KERNEL_UNLOCK();
1169 			if (error)
1170 				break;
1171 		} else if (!ISSET(rtm->rtm_flags, RTF_BFD) &&
1172 		    ISSET(rtm->rtm_fmask, RTF_BFD)) {
1173 			KERNEL_LOCK();
1174 			bfdclear(rt);
1175 			KERNEL_UNLOCK();
1176 		}
1177 #endif
1178 
1179 		NET_LOCK();
1180 		/* Hack to allow some flags to be toggled */
1181 		if (rtm->rtm_fmask) {
1182 			/* MPLS flag it is set by rt_mpls_set() */
1183 			rtm->rtm_fmask &= ~RTF_MPLS;
1184 			rtm->rtm_flags &= ~RTF_MPLS;
1185 			rt->rt_flags =
1186 			    (rt->rt_flags & ~rtm->rtm_fmask) |
1187 			    (rtm->rtm_flags & rtm->rtm_fmask);
1188 		}
1189 		rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx);
1190 
1191 		ifp->if_rtrequest(ifp, RTM_ADD, rt);
1192 
1193 		if (info->rti_info[RTAX_LABEL] != NULL) {
1194 			const char *rtlabel = ((const struct sockaddr_rtlabel *)
1195 			    info->rti_info[RTAX_LABEL])->sr_label;
1196 			rtlabel_unref(rt->rt_labelid);
1197 			rt->rt_labelid = rtlabel_name2id(rtlabel);
1198 		}
1199 		if_group_routechange(info->rti_info[RTAX_DST],
1200 		    info->rti_info[RTAX_NETMASK]);
1201 		rt->rt_locks &= ~(rtm->rtm_inits);
1202 		rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
1203 		NET_UNLOCK();
1204 		break;
1205 	case RTM_GET:
1206 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1207 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1208 		    prio);
1209 		if (rt == NULL)
1210 			error = ESRCH;
1211 		break;
1212 	}
1213 
1214 	if_put(ifp);
1215 	*prt = rt;
1216 	return (error);
1217 }
1218 
1219 struct ifaddr *
ifa_ifwithroute(int flags,const struct sockaddr * dst,const struct sockaddr * gateway,unsigned int rtableid)1220 ifa_ifwithroute(int flags, const struct sockaddr *dst,
1221     const struct sockaddr *gateway, unsigned int rtableid)
1222 {
1223 	struct ifaddr	*ifa;
1224 
1225 	if ((flags & RTF_GATEWAY) == 0) {
1226 		/*
1227 		 * If we are adding a route to an interface,
1228 		 * and the interface is a pt to pt link
1229 		 * we should search for the destination
1230 		 * as our clue to the interface.  Otherwise
1231 		 * we can use the local address.
1232 		 */
1233 		ifa = NULL;
1234 		if (flags & RTF_HOST)
1235 			ifa = ifa_ifwithdstaddr(dst, rtableid);
1236 		if (ifa == NULL)
1237 			ifa = ifa_ifwithaddr(gateway, rtableid);
1238 	} else {
1239 		/*
1240 		 * If we are adding a route to a remote net
1241 		 * or host, the gateway may still be on the
1242 		 * other end of a pt to pt link.
1243 		 */
1244 		ifa = ifa_ifwithdstaddr(gateway, rtableid);
1245 	}
1246 	if (ifa == NULL) {
1247 		if (gateway->sa_family == AF_LINK) {
1248 			const struct sockaddr_dl *sdl;
1249 			struct ifnet *ifp;
1250 
1251 			sdl = satosdl_const(gateway);
1252 			ifp = if_get(sdl->sdl_index);
1253 			if (ifp != NULL)
1254 				ifa = ifaof_ifpforaddr(dst, ifp);
1255 			if_put(ifp);
1256 		} else {
1257 			struct rtentry *rt;
1258 
1259 			rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid));
1260 			if (rt != NULL)
1261 				ifa = rt->rt_ifa;
1262 			rtfree(rt);
1263 		}
1264 	}
1265 	if (ifa == NULL)
1266 		return (NULL);
1267 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
1268 		struct ifaddr	*oifa = ifa;
1269 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1270 		if (ifa == NULL)
1271 			ifa = oifa;
1272 	}
1273 	return (ifa);
1274 }
1275 
1276 int
rtm_getifa(struct rt_addrinfo * info,unsigned int rtid)1277 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid)
1278 {
1279 	struct ifnet	*ifp = NULL;
1280 
1281 	/*
1282 	 * The "returned" `ifa' is guaranteed to be alive only if
1283 	 * the NET_LOCK() is held.
1284 	 */
1285 	NET_ASSERT_LOCKED();
1286 
1287 	/*
1288 	 * ifp may be specified by sockaddr_dl when protocol address
1289 	 * is ambiguous
1290 	 */
1291 	if (info->rti_info[RTAX_IFP] != NULL) {
1292 		const struct sockaddr_dl *sdl;
1293 
1294 		sdl = satosdl_const(info->rti_info[RTAX_IFP]);
1295 		ifp = if_get(sdl->sdl_index);
1296 	}
1297 
1298 #ifdef IPSEC
1299 	/*
1300 	 * If the destination is a PF_KEY address, we'll look
1301 	 * for the existence of a encap interface number or address
1302 	 * in the options list of the gateway. By default, we'll return
1303 	 * enc0.
1304 	 */
1305 	if (info->rti_info[RTAX_DST] &&
1306 	    info->rti_info[RTAX_DST]->sa_family == PF_KEY)
1307 		info->rti_ifa = enc_getifa(rtid, 0);
1308 #endif
1309 
1310 	if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
1311 		info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid);
1312 
1313 	if (info->rti_ifa == NULL) {
1314 		const struct sockaddr	*sa;
1315 
1316 		if ((sa = info->rti_info[RTAX_IFA]) == NULL)
1317 			if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
1318 				sa = info->rti_info[RTAX_DST];
1319 
1320 		if (sa != NULL && ifp != NULL)
1321 			info->rti_ifa = ifaof_ifpforaddr(sa, ifp);
1322 		else if (info->rti_info[RTAX_DST] != NULL &&
1323 		    info->rti_info[RTAX_GATEWAY] != NULL)
1324 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1325 			    info->rti_info[RTAX_DST],
1326 			    info->rti_info[RTAX_GATEWAY],
1327 			    rtid);
1328 		else if (sa != NULL)
1329 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1330 			    sa, sa, rtid);
1331 	}
1332 
1333 	if_put(ifp);
1334 
1335 	if (info->rti_ifa == NULL)
1336 		return (ENETUNREACH);
1337 
1338 	return (0);
1339 }
1340 
1341 int
route_cleargateway(struct rtentry * rt,void * arg,unsigned int rtableid)1342 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid)
1343 {
1344 	struct rtentry *nhrt = arg;
1345 
1346 	if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
1347 	    !ISSET(rt->rt_locks, RTV_MTU))
1348 		rt->rt_mtu = 0;
1349 
1350 	return (0);
1351 }
1352 
1353 /*
1354  * Check if the user request to insert an ARP entry does not conflict
1355  * with existing ones.
1356  *
1357  * Only two entries are allowed for a given IP address: a private one
1358  * (priv) and a public one (pub).
1359  */
1360 int
route_arp_conflict(struct rtentry * rt,struct rt_addrinfo * info)1361 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info)
1362 {
1363 	int		 proxy = (info->rti_flags & RTF_ANNOUNCE);
1364 
1365 	if ((info->rti_flags & RTF_LLINFO) == 0 ||
1366 	    (info->rti_info[RTAX_DST]->sa_family != AF_INET))
1367 		return (0);
1368 
1369 	if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO))
1370 		return (0);
1371 
1372 	/* If the entry is cached, it can be updated. */
1373 	if (ISSET(rt->rt_flags, RTF_CACHED))
1374 		return (0);
1375 
1376 	/*
1377 	 * Same destination, not cached and both "priv" or "pub" conflict.
1378 	 * If a second entry exists, it always conflict.
1379 	 */
1380 	if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) ||
1381 	    ISSET(rt->rt_flags, RTF_MPATH))
1382 		return (EEXIST);
1383 
1384 	/* No conflict but an entry exist so we need to force mpath. */
1385 	info->rti_flags |= RTF_MPATH;
1386 	return (0);
1387 }
1388 
1389 void
rtm_setmetrics(u_long which,const struct rt_metrics * in,struct rt_kmetrics * out)1390 rtm_setmetrics(u_long which, const struct rt_metrics *in,
1391     struct rt_kmetrics *out)
1392 {
1393 	int64_t expire;
1394 
1395 	if (which & RTV_MTU)
1396 		out->rmx_mtu = in->rmx_mtu;
1397 	if (which & RTV_EXPIRE) {
1398 		expire = in->rmx_expire;
1399 		if (expire != 0) {
1400 			expire -= gettime();
1401 			expire += getuptime();
1402 		}
1403 
1404 		out->rmx_expire = expire;
1405 	}
1406 }
1407 
1408 void
rtm_getmetrics(const struct rtentry * rt,struct rt_metrics * out)1409 rtm_getmetrics(const struct rtentry *rt, struct rt_metrics *out)
1410 {
1411 	const struct rt_kmetrics *in = &rt->rt_rmx;
1412 	int64_t expire;
1413 
1414 	expire = in->rmx_expire;
1415 	if (expire == 0)
1416 		expire = rt_timer_get_expire(rt);
1417 	if (expire != 0) {
1418 		expire -= getuptime();
1419 		expire += gettime();
1420 	}
1421 
1422 	bzero(out, sizeof(*out));
1423 	out->rmx_locks = in->rmx_locks;
1424 	out->rmx_mtu = in->rmx_mtu;
1425 	out->rmx_expire = expire;
1426 	out->rmx_pksent = in->rmx_pksent;
1427 }
1428 
1429 #define ROUNDUP(a) \
1430 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1431 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
1432 
1433 int
rtm_xaddrs(caddr_t cp,caddr_t cplim,struct rt_addrinfo * rtinfo)1434 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1435 {
1436 	int i;
1437 
1438 	/*
1439 	 * Parse address bits, split address storage in chunks, and
1440 	 * set info pointers.  Use sa_len for traversing the memory
1441 	 * and check that we stay within in the limit.
1442 	 */
1443 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1444 	for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) {
1445 		struct sockaddr *sa;
1446 
1447 		if ((rtinfo->rti_addrs & (1U << i)) == 0)
1448 			continue;
1449 		if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim)
1450 			return (EINVAL);
1451 		sa = (struct sockaddr *)cp;
1452 		if (cp + sa->sa_len > cplim)
1453 			return (EINVAL);
1454 		rtinfo->rti_info[i] = sa;
1455 		ADVANCE(cp, sa);
1456 	}
1457 	/*
1458 	 * Check that the address family is suitable for the route address
1459 	 * type.  Check that each address has a size that fits its family
1460 	 * and its length is within the size.  Strings within addresses must
1461 	 * be NUL terminated.
1462 	 */
1463 	for (i = 0; i < RTAX_MAX; i++) {
1464 		const struct sockaddr *sa;
1465 		size_t len, maxlen, size;
1466 
1467 		sa = rtinfo->rti_info[i];
1468 		if (sa == NULL)
1469 			continue;
1470 		maxlen = size = 0;
1471 		switch (i) {
1472 		case RTAX_DST:
1473 		case RTAX_GATEWAY:
1474 		case RTAX_SRC:
1475 			switch (sa->sa_family) {
1476 			case AF_INET:
1477 				size = sizeof(struct sockaddr_in);
1478 				break;
1479 			case AF_LINK:
1480 				size = sizeof(struct sockaddr_dl);
1481 				break;
1482 #ifdef INET6
1483 			case AF_INET6:
1484 				size = sizeof(struct sockaddr_in6);
1485 				break;
1486 #endif
1487 #ifdef MPLS
1488 			case AF_MPLS:
1489 				size = sizeof(struct sockaddr_mpls);
1490 				break;
1491 #endif
1492 			}
1493 			break;
1494 		case RTAX_IFP:
1495 			if (sa->sa_family != AF_LINK)
1496 				return (EAFNOSUPPORT);
1497 			/*
1498 			 * XXX Should be sizeof(struct sockaddr_dl), but
1499 			 * route(8) has a bug and provides less memory.
1500 			 * arp(8) has another bug and uses sizeof pointer.
1501 			 */
1502 			size = 4;
1503 			break;
1504 		case RTAX_IFA:
1505 			switch (sa->sa_family) {
1506 			case AF_INET:
1507 				size = sizeof(struct sockaddr_in);
1508 				break;
1509 #ifdef INET6
1510 			case AF_INET6:
1511 				size = sizeof(struct sockaddr_in6);
1512 				break;
1513 #endif
1514 			default:
1515 				return (EAFNOSUPPORT);
1516 			}
1517 			break;
1518 		case RTAX_LABEL:
1519 			if (sa->sa_family != AF_UNSPEC)
1520 				return (EAFNOSUPPORT);
1521 			maxlen = RTLABEL_LEN;
1522 			size = sizeof(struct sockaddr_rtlabel);
1523 			break;
1524 #ifdef BFD
1525 		case RTAX_BFD:
1526 			if (sa->sa_family != AF_UNSPEC)
1527 				return (EAFNOSUPPORT);
1528 			size = sizeof(struct sockaddr_bfd);
1529 			break;
1530 #endif
1531 		case RTAX_DNS:
1532 			/* more validation in rtm_validate_proposal */
1533 			if (sa->sa_len > sizeof(struct sockaddr_rtdns))
1534 				return (EINVAL);
1535 			if (sa->sa_len < offsetof(struct sockaddr_rtdns,
1536 			    sr_dns))
1537 				return (EINVAL);
1538 			switch (sa->sa_family) {
1539 			case AF_INET:
1540 #ifdef INET6
1541 			case AF_INET6:
1542 #endif
1543 				break;
1544 			default:
1545 				return (EAFNOSUPPORT);
1546 			}
1547 			break;
1548 		case RTAX_STATIC:
1549 			switch (sa->sa_family) {
1550 			case AF_INET:
1551 #ifdef INET6
1552 			case AF_INET6:
1553 #endif
1554 				break;
1555 			default:
1556 				return (EAFNOSUPPORT);
1557 			}
1558 			maxlen = RTSTATIC_LEN;
1559 			size = sizeof(struct sockaddr_rtstatic);
1560 			break;
1561 		case RTAX_SEARCH:
1562 			if (sa->sa_family != AF_UNSPEC)
1563 				return (EAFNOSUPPORT);
1564 			maxlen = RTSEARCH_LEN;
1565 			size = sizeof(struct sockaddr_rtsearch);
1566 			break;
1567 		}
1568 		if (size) {
1569 			/* memory for the full struct must be provided */
1570 			if (sa->sa_len < size)
1571 				return (EINVAL);
1572 		}
1573 		if (maxlen) {
1574 			/* this should not happen */
1575 			if (2 + maxlen > size)
1576 				return (EINVAL);
1577 			/* strings must be NUL terminated within the struct */
1578 			len = strnlen(sa->sa_data, maxlen);
1579 			if (len >= maxlen || 2 + len >= sa->sa_len)
1580 				return (EINVAL);
1581 			break;
1582 		}
1583 	}
1584 	return (0);
1585 }
1586 
1587 struct mbuf *
rtm_msg1(int type,struct rt_addrinfo * rtinfo)1588 rtm_msg1(int type, struct rt_addrinfo *rtinfo)
1589 {
1590 	struct rt_msghdr	*rtm;
1591 	struct mbuf		*m;
1592 	int			 i;
1593 	const struct sockaddr	*sa;
1594 	int			 len, dlen, hlen;
1595 
1596 	switch (type) {
1597 	case RTM_DELADDR:
1598 	case RTM_NEWADDR:
1599 		hlen = sizeof(struct ifa_msghdr);
1600 		break;
1601 	case RTM_IFINFO:
1602 		hlen = sizeof(struct if_msghdr);
1603 		break;
1604 	case RTM_IFANNOUNCE:
1605 		hlen = sizeof(struct if_announcemsghdr);
1606 		break;
1607 #ifdef BFD
1608 	case RTM_BFD:
1609 		hlen = sizeof(struct bfd_msghdr);
1610 		break;
1611 #endif
1612 	case RTM_80211INFO:
1613 		hlen = sizeof(struct if_ieee80211_msghdr);
1614 		break;
1615 	default:
1616 		hlen = sizeof(struct rt_msghdr);
1617 		break;
1618 	}
1619 	len = hlen;
1620 	for (i = 0; i < RTAX_MAX; i++) {
1621 		if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1622 			continue;
1623 		len += ROUNDUP(sa->sa_len);
1624 	}
1625 	if (len > MCLBYTES)
1626 		panic("rtm_msg1");
1627 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1628 	if (m && len > MHLEN) {
1629 		MCLGET(m, M_DONTWAIT);
1630 		if ((m->m_flags & M_EXT) == 0) {
1631 			m_free(m);
1632 			m = NULL;
1633 		}
1634 	}
1635 	if (m == NULL)
1636 		return (m);
1637 	m->m_pkthdr.len = m->m_len = len;
1638 	m->m_pkthdr.ph_ifidx = 0;
1639 	rtm = mtod(m, struct rt_msghdr *);
1640 	bzero(rtm, len);
1641 	len = hlen;
1642 	for (i = 0; i < RTAX_MAX; i++) {
1643 		if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1644 			continue;
1645 		rtinfo->rti_addrs |= (1U << i);
1646 		dlen = ROUNDUP(sa->sa_len);
1647 		if (m_copyback(m, len, sa->sa_len, sa, M_NOWAIT)) {
1648 			m_freem(m);
1649 			return (NULL);
1650 		}
1651 		len += dlen;
1652 	}
1653 	rtm->rtm_msglen = len;
1654 	rtm->rtm_hdrlen = hlen;
1655 	rtm->rtm_version = RTM_VERSION;
1656 	rtm->rtm_type = type;
1657 	return (m);
1658 }
1659 
1660 int
rtm_msg2(int type,int vers,struct rt_addrinfo * rtinfo,caddr_t cp,struct walkarg * w)1661 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp,
1662     struct walkarg *w)
1663 {
1664 	int		i;
1665 	int		len, dlen, hlen, second_time = 0;
1666 	caddr_t		cp0;
1667 
1668 	rtinfo->rti_addrs = 0;
1669 again:
1670 	switch (type) {
1671 	case RTM_DELADDR:
1672 	case RTM_NEWADDR:
1673 		len = sizeof(struct ifa_msghdr);
1674 		break;
1675 	case RTM_IFINFO:
1676 		len = sizeof(struct if_msghdr);
1677 		break;
1678 	default:
1679 		len = sizeof(struct rt_msghdr);
1680 		break;
1681 	}
1682 	hlen = len;
1683 	if ((cp0 = cp) != NULL)
1684 		cp += len;
1685 	for (i = 0; i < RTAX_MAX; i++) {
1686 		const struct sockaddr *sa;
1687 
1688 		if ((sa = rtinfo->rti_info[i]) == NULL)
1689 			continue;
1690 		rtinfo->rti_addrs |= (1U << i);
1691 		dlen = ROUNDUP(sa->sa_len);
1692 		if (cp) {
1693 			bcopy(sa, cp, sa->sa_len);
1694 			bzero(cp + sa->sa_len, dlen - sa->sa_len);
1695 			cp += dlen;
1696 		}
1697 		len += dlen;
1698 	}
1699 	/* align message length to the next natural boundary */
1700 	len = ALIGN(len);
1701 	if (cp == 0 && w != NULL && !second_time) {
1702 		w->w_needed += len;
1703 		if (w->w_needed <= w->w_given && w->w_where) {
1704 			if (w->w_tmemsize < len) {
1705 				free(w->w_tmem, M_RTABLE, w->w_tmemsize);
1706 				w->w_tmem = malloc(len, M_RTABLE,
1707 				    M_NOWAIT | M_ZERO);
1708 				if (w->w_tmem)
1709 					w->w_tmemsize = len;
1710 			}
1711 			if (w->w_tmem) {
1712 				cp = w->w_tmem;
1713 				second_time = 1;
1714 				goto again;
1715 			} else
1716 				w->w_where = 0;
1717 		}
1718 	}
1719 	if (cp && w)		/* clear the message header */
1720 		bzero(cp0, hlen);
1721 
1722 	if (cp) {
1723 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
1724 
1725 		rtm->rtm_version = RTM_VERSION;
1726 		rtm->rtm_type = type;
1727 		rtm->rtm_msglen = len;
1728 		rtm->rtm_hdrlen = hlen;
1729 	}
1730 	return (len);
1731 }
1732 
1733 void
rtm_send(struct rtentry * rt,int cmd,int error,unsigned int rtableid)1734 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid)
1735 {
1736 	struct rt_addrinfo	 info;
1737 	struct ifnet		*ifp;
1738 	struct sockaddr_rtlabel	 sa_rl;
1739 	struct sockaddr_in6	 sa_mask;
1740 
1741 	memset(&info, 0, sizeof(info));
1742 	info.rti_info[RTAX_DST] = rt_key(rt);
1743 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1744 	if (!ISSET(rt->rt_flags, RTF_HOST))
1745 		info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1746 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1747 	ifp = if_get(rt->rt_ifidx);
1748 	if (ifp != NULL) {
1749 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1750 		info.rti_info[RTAX_IFA] = rtable_getsource(rtableid,
1751 		    info.rti_info[RTAX_DST]->sa_family);
1752 		if (info.rti_info[RTAX_IFA] == NULL)
1753 			info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1754 	}
1755 
1756 	rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error,
1757 	    rtableid);
1758 	if_put(ifp);
1759 }
1760 
1761 /*
1762  * This routine is called to generate a message from the routing
1763  * socket indicating that a redirect has occurred, a routing lookup
1764  * has failed, or that a protocol has detected timeouts to a particular
1765  * destination.
1766  */
1767 void
rtm_miss(int type,struct rt_addrinfo * rtinfo,int flags,uint8_t prio,u_int ifidx,int error,u_int tableid)1768 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio,
1769     u_int ifidx, int error, u_int tableid)
1770 {
1771 	struct rt_msghdr	*rtm;
1772 	struct mbuf		*m;
1773 	const struct sockaddr	*sa = rtinfo->rti_info[RTAX_DST];
1774 
1775 	if (rtptable.rtp_count == 0)
1776 		return;
1777 	m = rtm_msg1(type, rtinfo);
1778 	if (m == NULL)
1779 		return;
1780 	rtm = mtod(m, struct rt_msghdr *);
1781 	rtm->rtm_flags = RTF_DONE | flags;
1782 	rtm->rtm_priority = prio;
1783 	rtm->rtm_errno = error;
1784 	rtm->rtm_tableid = tableid;
1785 	rtm->rtm_addrs = rtinfo->rti_addrs;
1786 	rtm->rtm_index = ifidx;
1787 	route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC);
1788 }
1789 
1790 /*
1791  * This routine is called to generate a message from the routing
1792  * socket indicating that the status of a network interface has changed.
1793  */
1794 void
rtm_ifchg(struct ifnet * ifp)1795 rtm_ifchg(struct ifnet *ifp)
1796 {
1797 	struct rt_addrinfo	 info;
1798 	struct if_msghdr	*ifm;
1799 	struct mbuf		*m;
1800 
1801 	if (rtptable.rtp_count == 0)
1802 		return;
1803 	memset(&info, 0, sizeof(info));
1804 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1805 	m = rtm_msg1(RTM_IFINFO, &info);
1806 	if (m == NULL)
1807 		return;
1808 	ifm = mtod(m, struct if_msghdr *);
1809 	ifm->ifm_index = ifp->if_index;
1810 	ifm->ifm_tableid = ifp->if_rdomain;
1811 	ifm->ifm_flags = ifp->if_flags;
1812 	ifm->ifm_xflags = ifp->if_xflags;
1813 	if_getdata(ifp, &ifm->ifm_data);
1814 	ifm->ifm_addrs = info.rti_addrs;
1815 	route_input(m, NULL, AF_UNSPEC);
1816 }
1817 
1818 /*
1819  * This is called to generate messages from the routing socket
1820  * indicating a network interface has had addresses associated with it.
1821  * if we ever reverse the logic and replace messages TO the routing
1822  * socket indicate a request to configure interfaces, then it will
1823  * be unnecessary as the routing socket will automatically generate
1824  * copies of it.
1825  */
1826 void
rtm_addr(int cmd,struct ifaddr * ifa)1827 rtm_addr(int cmd, struct ifaddr *ifa)
1828 {
1829 	struct ifnet		*ifp = ifa->ifa_ifp;
1830 	struct mbuf		*m;
1831 	struct rt_addrinfo	 info;
1832 	struct ifa_msghdr	*ifam;
1833 
1834 	if (rtptable.rtp_count == 0)
1835 		return;
1836 
1837 	memset(&info, 0, sizeof(info));
1838 	info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1839 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1840 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1841 	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1842 	if ((m = rtm_msg1(cmd, &info)) == NULL)
1843 		return;
1844 	ifam = mtod(m, struct ifa_msghdr *);
1845 	ifam->ifam_index = ifp->if_index;
1846 	ifam->ifam_metric = ifa->ifa_metric;
1847 	ifam->ifam_flags = ifa->ifa_flags;
1848 	ifam->ifam_addrs = info.rti_addrs;
1849 	ifam->ifam_tableid = ifp->if_rdomain;
1850 
1851 	route_input(m, NULL,
1852 	    ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC);
1853 }
1854 
1855 /*
1856  * This is called to generate routing socket messages indicating
1857  * network interface arrival and departure.
1858  */
1859 void
rtm_ifannounce(struct ifnet * ifp,int what)1860 rtm_ifannounce(struct ifnet *ifp, int what)
1861 {
1862 	struct if_announcemsghdr	*ifan;
1863 	struct mbuf			*m;
1864 
1865 	if (rtptable.rtp_count == 0)
1866 		return;
1867 	m = rtm_msg1(RTM_IFANNOUNCE, NULL);
1868 	if (m == NULL)
1869 		return;
1870 	ifan = mtod(m, struct if_announcemsghdr *);
1871 	ifan->ifan_index = ifp->if_index;
1872 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
1873 	ifan->ifan_what = what;
1874 	route_input(m, NULL, AF_UNSPEC);
1875 }
1876 
1877 #ifdef BFD
1878 /*
1879  * This is used to generate routing socket messages indicating
1880  * the state of a BFD session.
1881  */
1882 void
rtm_bfd(struct bfd_config * bfd)1883 rtm_bfd(struct bfd_config *bfd)
1884 {
1885 	struct bfd_msghdr	*bfdm;
1886 	struct sockaddr_bfd	 sa_bfd;
1887 	struct mbuf		*m;
1888 	struct rt_addrinfo	 info;
1889 
1890 	if (rtptable.rtp_count == 0)
1891 		return;
1892 	memset(&info, 0, sizeof(info));
1893 	info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt);
1894 	info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr;
1895 
1896 	m = rtm_msg1(RTM_BFD, &info);
1897 	if (m == NULL)
1898 		return;
1899 	bfdm = mtod(m, struct bfd_msghdr *);
1900 	bfdm->bm_addrs = info.rti_addrs;
1901 
1902 	KERNEL_ASSERT_LOCKED();
1903 	bfd2sa(bfd->bc_rt, &sa_bfd);
1904 	memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd));
1905 
1906 	route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family);
1907 }
1908 #endif /* BFD */
1909 
1910 /*
1911  * This is used to generate routing socket messages indicating
1912  * the state of an ieee80211 interface.
1913  */
1914 void
rtm_80211info(struct ifnet * ifp,struct if_ieee80211_data * ifie)1915 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie)
1916 {
1917 	struct if_ieee80211_msghdr	*ifim;
1918 	struct mbuf			*m;
1919 
1920 	if (rtptable.rtp_count == 0)
1921 		return;
1922 	m = rtm_msg1(RTM_80211INFO, NULL);
1923 	if (m == NULL)
1924 		return;
1925 	ifim = mtod(m, struct if_ieee80211_msghdr *);
1926 	ifim->ifim_index = ifp->if_index;
1927 	ifim->ifim_tableid = ifp->if_rdomain;
1928 
1929 	memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie));
1930 	route_input(m, NULL, AF_UNSPEC);
1931 }
1932 
1933 /*
1934  * This is used to generate routing socket messages indicating
1935  * the address selection proposal from an interface.
1936  */
1937 void
rtm_proposal(struct ifnet * ifp,struct rt_addrinfo * rtinfo,int flags,uint8_t prio)1938 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags,
1939     uint8_t prio)
1940 {
1941 	struct rt_msghdr	*rtm;
1942 	struct mbuf		*m;
1943 
1944 	m = rtm_msg1(RTM_PROPOSAL, rtinfo);
1945 	if (m == NULL)
1946 		return;
1947 	rtm = mtod(m, struct rt_msghdr *);
1948 	rtm->rtm_flags = RTF_DONE | flags;
1949 	rtm->rtm_priority = prio;
1950 	rtm->rtm_tableid = ifp->if_rdomain;
1951 	rtm->rtm_index = ifp->if_index;
1952 	rtm->rtm_addrs = rtinfo->rti_addrs;
1953 
1954 	route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family);
1955 }
1956 
1957 /*
1958  * This is used in dumping the kernel table via sysctl().
1959  */
1960 int
sysctl_dumpentry(struct rtentry * rt,void * v,unsigned int id)1961 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id)
1962 {
1963 	struct walkarg		*w = v;
1964 	int			 error = 0, size;
1965 	struct rt_addrinfo	 info;
1966 	struct ifnet		*ifp;
1967 #ifdef BFD
1968 	struct sockaddr_bfd	 sa_bfd;
1969 #endif
1970 	struct sockaddr_rtlabel	 sa_rl;
1971 	struct sockaddr_in6	 sa_mask;
1972 
1973 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1974 		return 0;
1975 	if (w->w_op == NET_RT_DUMP && w->w_arg) {
1976 		u_int8_t prio = w->w_arg & RTP_MASK;
1977 		if (w->w_arg < 0) {
1978 			prio = (-w->w_arg) & RTP_MASK;
1979 			/* Show all routes that are not this priority */
1980 			if (prio == (rt->rt_priority & RTP_MASK))
1981 				return 0;
1982 		} else {
1983 			if (prio != (rt->rt_priority & RTP_MASK) &&
1984 			    prio != RTP_ANY)
1985 				return 0;
1986 		}
1987 	}
1988 	bzero(&info, sizeof(info));
1989 	info.rti_info[RTAX_DST] = rt_key(rt);
1990 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1991 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1992 	ifp = if_get(rt->rt_ifidx);
1993 	if (ifp != NULL) {
1994 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1995 		info.rti_info[RTAX_IFA] =
1996 		    rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family);
1997 		if (info.rti_info[RTAX_IFA] == NULL)
1998 			info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1999 		if (ifp->if_flags & IFF_POINTOPOINT)
2000 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
2001 	}
2002 	if_put(ifp);
2003 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
2004 #ifdef BFD
2005 	if (rt->rt_flags & RTF_BFD) {
2006 		KERNEL_ASSERT_LOCKED();
2007 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
2008 	}
2009 #endif
2010 #ifdef MPLS
2011 	if (rt->rt_flags & RTF_MPLS) {
2012 		struct sockaddr_mpls	 sa_mpls;
2013 
2014 		bzero(&sa_mpls, sizeof(sa_mpls));
2015 		sa_mpls.smpls_family = AF_MPLS;
2016 		sa_mpls.smpls_len = sizeof(sa_mpls);
2017 		sa_mpls.smpls_label = ((struct rt_mpls *)
2018 		    rt->rt_llinfo)->mpls_label;
2019 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
2020 		info.rti_mpls = ((struct rt_mpls *)
2021 		    rt->rt_llinfo)->mpls_operation;
2022 	}
2023 #endif
2024 
2025 	size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w);
2026 	if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) {
2027 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
2028 
2029 		rtm->rtm_pid = curproc->p_p->ps_pid;
2030 		rtm->rtm_flags = RTF_DONE | rt->rt_flags;
2031 		rtm->rtm_priority = rt->rt_priority & RTP_MASK;
2032 		rtm_getmetrics(rt, &rtm->rtm_rmx);
2033 		/* Do not account the routing table's reference. */
2034 		rtm->rtm_rmx.rmx_refcnt = refcnt_read(&rt->rt_refcnt) - 1;
2035 		rtm->rtm_index = rt->rt_ifidx;
2036 		rtm->rtm_addrs = info.rti_addrs;
2037 		rtm->rtm_tableid = id;
2038 #ifdef MPLS
2039 		rtm->rtm_mpls = info.rti_mpls;
2040 #endif
2041 		if ((error = copyout(rtm, w->w_where, size)) != 0)
2042 			w->w_where = NULL;
2043 		else
2044 			w->w_where += size;
2045 	}
2046 	return (error);
2047 }
2048 
2049 int
sysctl_iflist(int af,struct walkarg * w)2050 sysctl_iflist(int af, struct walkarg *w)
2051 {
2052 	struct ifnet		*ifp;
2053 	struct ifaddr		*ifa;
2054 	struct rt_addrinfo	 info;
2055 	int			 len, error = 0;
2056 
2057 	bzero(&info, sizeof(info));
2058 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2059 		if (w->w_arg && w->w_arg != ifp->if_index)
2060 			continue;
2061 		/* Copy the link-layer address first */
2062 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
2063 		len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w);
2064 		if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) {
2065 			struct if_msghdr *ifm;
2066 
2067 			ifm = (struct if_msghdr *)w->w_tmem;
2068 			ifm->ifm_index = ifp->if_index;
2069 			ifm->ifm_tableid = ifp->if_rdomain;
2070 			ifm->ifm_flags = ifp->if_flags;
2071 			if_getdata(ifp, &ifm->ifm_data);
2072 			ifm->ifm_addrs = info.rti_addrs;
2073 			error = copyout(ifm, w->w_where, len);
2074 			if (error)
2075 				return (error);
2076 			w->w_where += len;
2077 		}
2078 		info.rti_info[RTAX_IFP] = NULL;
2079 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2080 			KASSERT(ifa->ifa_addr->sa_family != AF_LINK);
2081 			if (af && af != ifa->ifa_addr->sa_family)
2082 				continue;
2083 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2084 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2085 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2086 			len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w);
2087 			if (w->w_where && w->w_tmem &&
2088 			    w->w_needed <= w->w_given) {
2089 				struct ifa_msghdr *ifam;
2090 
2091 				ifam = (struct ifa_msghdr *)w->w_tmem;
2092 				ifam->ifam_index = ifa->ifa_ifp->if_index;
2093 				ifam->ifam_flags = ifa->ifa_flags;
2094 				ifam->ifam_metric = ifa->ifa_metric;
2095 				ifam->ifam_addrs = info.rti_addrs;
2096 				error = copyout(w->w_tmem, w->w_where, len);
2097 				if (error)
2098 					return (error);
2099 				w->w_where += len;
2100 			}
2101 		}
2102 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2103 		    info.rti_info[RTAX_BRD] = NULL;
2104 	}
2105 	return (0);
2106 }
2107 
2108 int
sysctl_ifnames(struct walkarg * w)2109 sysctl_ifnames(struct walkarg *w)
2110 {
2111 	struct if_nameindex_msg ifn;
2112 	struct ifnet *ifp;
2113 	int error = 0;
2114 
2115 	/* XXX ignore tableid for now */
2116 	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
2117 		if (w->w_arg && w->w_arg != ifp->if_index)
2118 			continue;
2119 		w->w_needed += sizeof(ifn);
2120 		if (w->w_where && w->w_needed <= w->w_given) {
2121 
2122 			memset(&ifn, 0, sizeof(ifn));
2123 			ifn.if_index = ifp->if_index;
2124 			strlcpy(ifn.if_name, ifp->if_xname,
2125 			    sizeof(ifn.if_name));
2126 			error = copyout(&ifn, w->w_where, sizeof(ifn));
2127 			if (error)
2128 				return (error);
2129 			w->w_where += sizeof(ifn);
2130 		}
2131 	}
2132 
2133 	return (0);
2134 }
2135 
2136 int
sysctl_source(int af,u_int tableid,struct walkarg * w)2137 sysctl_source(int af, u_int tableid, struct walkarg *w)
2138 {
2139 	struct sockaddr	*sa;
2140 	int		 size, error = 0;
2141 
2142 	sa = rtable_getsource(tableid, af);
2143 	if (sa) {
2144 		switch (sa->sa_family) {
2145 		case AF_INET:
2146 			size = sizeof(struct sockaddr_in);
2147 			break;
2148 #ifdef INET6
2149 		case AF_INET6:
2150 			size = sizeof(struct sockaddr_in6);
2151 			break;
2152 #endif
2153 		default:
2154 			return (0);
2155 		}
2156 		w->w_needed += size;
2157 		if (w->w_where && w->w_needed <= w->w_given) {
2158 			if ((error = copyout(sa, w->w_where, size)))
2159 				return (error);
2160 			w->w_where += size;
2161 		}
2162 	}
2163 	return (0);
2164 }
2165 
2166 int
sysctl_rtable(int * name,u_int namelen,void * where,size_t * given,void * new,size_t newlen)2167 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new,
2168     size_t newlen)
2169 {
2170 	int			 i, error = EINVAL;
2171 	u_char			 af;
2172 	struct walkarg		 w;
2173 	struct rt_tableinfo	 tableinfo;
2174 	u_int			 tableid = 0;
2175 
2176 	if (new)
2177 		return (EPERM);
2178 	if (namelen < 3 || namelen > 4)
2179 		return (EINVAL);
2180 	af = name[0];
2181 	bzero(&w, sizeof(w));
2182 	w.w_where = where;
2183 	w.w_given = *given;
2184 	w.w_op = name[1];
2185 	w.w_arg = name[2];
2186 
2187 	if (namelen == 4) {
2188 		tableid = name[3];
2189 		if (!rtable_exists(tableid))
2190 			return (ENOENT);
2191 	} else
2192 		tableid = curproc->p_p->ps_rtableid;
2193 
2194 	switch (w.w_op) {
2195 	case NET_RT_DUMP:
2196 	case NET_RT_FLAGS:
2197 		NET_LOCK_SHARED();
2198 		for (i = 1; i <= AF_MAX; i++) {
2199 			if (af != 0 && af != i)
2200 				continue;
2201 
2202 			error = rtable_walk(tableid, i, NULL, sysctl_dumpentry,
2203 			    &w);
2204 			if (error == EAFNOSUPPORT)
2205 				error = 0;
2206 			if (error)
2207 				break;
2208 		}
2209 		NET_UNLOCK_SHARED();
2210 		break;
2211 
2212 	case NET_RT_IFLIST:
2213 		NET_LOCK_SHARED();
2214 		error = sysctl_iflist(af, &w);
2215 		NET_UNLOCK_SHARED();
2216 		break;
2217 
2218 	case NET_RT_STATS:
2219 		return (sysctl_rtable_rtstat(where, given, new));
2220 	case NET_RT_TABLE:
2221 		tableid = w.w_arg;
2222 		if (!rtable_exists(tableid))
2223 			return (ENOENT);
2224 		memset(&tableinfo, 0, sizeof tableinfo);
2225 		tableinfo.rti_tableid = tableid;
2226 		tableinfo.rti_domainid = rtable_l2(tableid);
2227 		error = sysctl_rdstruct(where, given, new,
2228 		    &tableinfo, sizeof(tableinfo));
2229 		return (error);
2230 	case NET_RT_IFNAMES:
2231 		NET_LOCK_SHARED();
2232 		error = sysctl_ifnames(&w);
2233 		NET_UNLOCK_SHARED();
2234 		break;
2235 	case NET_RT_SOURCE:
2236 		tableid = w.w_arg;
2237 		if (!rtable_exists(tableid))
2238 			return (ENOENT);
2239 		NET_LOCK_SHARED();
2240 		for (i = 1; i <= AF_MAX; i++) {
2241 			if (af != 0 && af != i)
2242 				continue;
2243 
2244 			error = sysctl_source(i, tableid, &w);
2245 			if (error == EAFNOSUPPORT)
2246 				error = 0;
2247 			if (error)
2248 				break;
2249 		}
2250 		NET_UNLOCK_SHARED();
2251 		break;
2252 	}
2253 	free(w.w_tmem, M_RTABLE, w.w_tmemsize);
2254 	if (where) {
2255 		*given = w.w_where - (caddr_t)where;
2256 		if (w.w_needed > w.w_given)
2257 			return (ENOMEM);
2258 	} else if (w.w_needed == 0) {
2259 		*given = 0;
2260 	} else {
2261 		*given = roundup(w.w_needed + MAX(w.w_needed / 10, 1024),
2262 		    PAGE_SIZE);
2263 	}
2264 	return (error);
2265 }
2266 
2267 int
sysctl_rtable_rtstat(void * oldp,size_t * oldlenp,void * newp)2268 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp)
2269 {
2270 	extern struct cpumem *rtcounters;
2271 	uint64_t counters[rts_ncounters];
2272 	struct rtstat rtstat;
2273 	uint32_t *words = (uint32_t *)&rtstat;
2274 	int i;
2275 
2276 	CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t)));
2277 	memset(&rtstat, 0, sizeof rtstat);
2278 	counters_read(rtcounters, counters, nitems(counters), NULL);
2279 
2280 	for (i = 0; i < nitems(counters); i++)
2281 		words[i] = (uint32_t)counters[i];
2282 
2283 	return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat)));
2284 }
2285 
2286 int
rtm_validate_proposal(struct rt_addrinfo * info)2287 rtm_validate_proposal(struct rt_addrinfo *info)
2288 {
2289 	if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC |
2290 	    RTA_SEARCH)) {
2291 		return -1;
2292 	}
2293 
2294 	if (ISSET(info->rti_addrs, RTA_NETMASK)) {
2295 		const struct sockaddr *sa = info->rti_info[RTAX_NETMASK];
2296 		if (sa == NULL)
2297 			return -1;
2298 		switch (sa->sa_family) {
2299 		case AF_INET:
2300 			if (sa->sa_len != sizeof(struct sockaddr_in))
2301 				return -1;
2302 			break;
2303 		case AF_INET6:
2304 			if (sa->sa_len != sizeof(struct sockaddr_in6))
2305 				return -1;
2306 			break;
2307 		default:
2308 			return -1;
2309 		}
2310 	}
2311 
2312 	if (ISSET(info->rti_addrs, RTA_IFA)) {
2313 		const struct sockaddr *sa = info->rti_info[RTAX_IFA];
2314 		if (sa == NULL)
2315 			return -1;
2316 		switch (sa->sa_family) {
2317 		case AF_INET:
2318 			if (sa->sa_len != sizeof(struct sockaddr_in))
2319 				return -1;
2320 			break;
2321 		case AF_INET6:
2322 			if (sa->sa_len != sizeof(struct sockaddr_in6))
2323 				return -1;
2324 			break;
2325 		default:
2326 			return -1;
2327 		}
2328 	}
2329 
2330 	if (ISSET(info->rti_addrs, RTA_DNS)) {
2331 		const struct sockaddr_rtdns *rtdns =
2332 		    (const struct sockaddr_rtdns *)info->rti_info[RTAX_DNS];
2333 		if (rtdns == NULL)
2334 			return -1;
2335 		if (rtdns->sr_len > sizeof(*rtdns))
2336 			return -1;
2337 		if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns))
2338 			return -1;
2339 		switch (rtdns->sr_family) {
2340 		case AF_INET:
2341 			if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2342 			    sr_dns)) % sizeof(struct in_addr) != 0)
2343 				return -1;
2344 			break;
2345 #ifdef INET6
2346 		case AF_INET6:
2347 			if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2348 			    sr_dns)) % sizeof(struct in6_addr) != 0)
2349 				return -1;
2350 			break;
2351 #endif
2352 		default:
2353 			return -1;
2354 		}
2355 	}
2356 
2357 	if (ISSET(info->rti_addrs, RTA_STATIC)) {
2358 		const struct sockaddr_rtstatic *rtstatic = (const struct
2359 		    sockaddr_rtstatic *)info->rti_info[RTAX_STATIC];
2360 		if (rtstatic == NULL)
2361 			return -1;
2362 		if (rtstatic->sr_len > sizeof(*rtstatic))
2363 			return -1;
2364 		if (rtstatic->sr_len <=
2365 		    offsetof(struct sockaddr_rtstatic, sr_static))
2366 			return -1;
2367 	}
2368 
2369 	if (ISSET(info->rti_addrs, RTA_SEARCH)) {
2370 		const struct sockaddr_rtsearch *rtsearch = (const struct
2371 		    sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH];
2372 		if (rtsearch == NULL)
2373 			return -1;
2374 		if (rtsearch->sr_len > sizeof(*rtsearch))
2375 			return -1;
2376 		if (rtsearch->sr_len <=
2377 		    offsetof(struct sockaddr_rtsearch, sr_search))
2378 			return -1;
2379 	}
2380 
2381 	return 0;
2382 }
2383 
2384 int
rt_setsource(unsigned int rtableid,const struct sockaddr * src)2385 rt_setsource(unsigned int rtableid, const struct sockaddr *src)
2386 {
2387 	struct ifaddr	*ifa;
2388 	/*
2389 	 * If source address is 0.0.0.0 or ::
2390 	 * use automatic source selection
2391 	 */
2392 	switch(src->sa_family) {
2393 	case AF_INET:
2394 		if(satosin_const(src)->sin_addr.s_addr == INADDR_ANY) {
2395 			rtable_setsource(rtableid, AF_INET, NULL);
2396 			return (0);
2397 		}
2398 		break;
2399 #ifdef INET6
2400 	case AF_INET6:
2401 		if (IN6_IS_ADDR_UNSPECIFIED(&satosin6_const(src)->sin6_addr)) {
2402 			rtable_setsource(rtableid, AF_INET6, NULL);
2403 			return (0);
2404 		}
2405 		break;
2406 #endif
2407 	default:
2408 		return (EAFNOSUPPORT);
2409 	}
2410 
2411 	/*
2412 	 * Check if source address is assigned to an interface in the
2413 	 * same rdomain
2414 	 */
2415 	if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL)
2416 		return (EINVAL);
2417 
2418 	return rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr);
2419 }
2420 
2421 /*
2422  * Definitions of protocols supported in the ROUTE domain.
2423  */
2424 
2425 const struct pr_usrreqs route_usrreqs = {
2426 	.pru_attach	= route_attach,
2427 	.pru_detach	= route_detach,
2428 	.pru_disconnect	= route_disconnect,
2429 	.pru_shutdown	= route_shutdown,
2430 	.pru_rcvd	= route_rcvd,
2431 	.pru_send	= route_send,
2432 	.pru_sockaddr	= route_sockaddr,
2433 	.pru_peeraddr	= route_peeraddr,
2434 };
2435 
2436 const struct protosw routesw[] = {
2437 {
2438   .pr_type	= SOCK_RAW,
2439   .pr_domain	= &routedomain,
2440   .pr_flags	= PR_ATOMIC|PR_ADDR|PR_WANTRCVD,
2441   .pr_ctloutput	= route_ctloutput,
2442   .pr_usrreqs	= &route_usrreqs,
2443   .pr_init	= route_prinit,
2444   .pr_sysctl	= sysctl_rtable
2445 }
2446 };
2447 
2448 const struct domain routedomain = {
2449   .dom_family = PF_ROUTE,
2450   .dom_name = "route",
2451   .dom_init = route_init,
2452   .dom_protosw = routesw,
2453   .dom_protoswNPROTOSW = &routesw[nitems(routesw)]
2454 };
2455