xref: /openbsd/sys/net/rtsock.c (revision d89ec533)
1 /*	$OpenBSD: rtsock.c,v 1.323 2021/12/16 09:33:56 claudio Exp $	*/
2 /*	$NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1988, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
62  */
63 
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/proc.h>
67 #include <sys/sysctl.h>
68 #include <sys/mbuf.h>
69 #include <sys/socket.h>
70 #include <sys/socketvar.h>
71 #include <sys/domain.h>
72 #include <sys/pool.h>
73 #include <sys/protosw.h>
74 #include <sys/srp.h>
75 
76 #include <net/if.h>
77 #include <net/if_dl.h>
78 #include <net/if_var.h>
79 #include <net/route.h>
80 
81 #include <netinet/in.h>
82 
83 #ifdef MPLS
84 #include <netmpls/mpls.h>
85 #endif
86 #ifdef IPSEC
87 #include <netinet/ip_ipsp.h>
88 #include <net/if_enc.h>
89 #endif
90 #ifdef BFD
91 #include <net/bfd.h>
92 #endif
93 
94 #include <sys/stdarg.h>
95 #include <sys/kernel.h>
96 #include <sys/timeout.h>
97 
98 #define	ROUTESNDQ	8192
99 #define	ROUTERCVQ	8192
100 
101 const struct sockaddr route_src = { 2, PF_ROUTE, };
102 
103 struct walkarg {
104 	int	w_op, w_arg, w_given, w_needed, w_tmemsize;
105 	caddr_t	w_where, w_tmem;
106 };
107 
108 void	route_prinit(void);
109 void	rcb_ref(void *, void *);
110 void	rcb_unref(void *, void *);
111 int	route_output(struct mbuf *, struct socket *, struct sockaddr *,
112 	    struct mbuf *);
113 int	route_ctloutput(int, struct socket *, int, int, struct mbuf *);
114 int	route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *,
115 	    struct mbuf *, struct proc *);
116 void	route_input(struct mbuf *m0, struct socket *, sa_family_t);
117 int	route_arp_conflict(struct rtentry *, struct rt_addrinfo *);
118 int	route_cleargateway(struct rtentry *, void *, unsigned int);
119 void	rtm_senddesync_timer(void *);
120 void	rtm_senddesync(struct socket *);
121 int	rtm_sendup(struct socket *, struct mbuf *);
122 
123 int	rtm_getifa(struct rt_addrinfo *, unsigned int);
124 int	rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *,
125 	    uint8_t, unsigned int);
126 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int);
127 struct mbuf	*rtm_msg1(int, struct rt_addrinfo *);
128 int		 rtm_msg2(int, int, struct rt_addrinfo *, caddr_t,
129 		     struct walkarg *);
130 int		 rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
131 int		 rtm_validate_proposal(struct rt_addrinfo *);
132 void		 rtm_setmetrics(u_long, const struct rt_metrics *,
133 		     struct rt_kmetrics *);
134 void		 rtm_getmetrics(const struct rt_kmetrics *,
135 		     struct rt_metrics *);
136 
137 int		 sysctl_iflist(int, struct walkarg *);
138 int		 sysctl_ifnames(struct walkarg *);
139 int		 sysctl_rtable_rtstat(void *, size_t *, void *);
140 
141 int		 rt_setsource(unsigned int, struct sockaddr *);
142 
143 /*
144  * Locks used to protect struct members
145  *       I       immutable after creation
146  *       s       solock
147  */
148 struct rtpcb {
149 	struct socket		*rop_socket;		/* [I] */
150 
151 	SRPL_ENTRY(rtpcb)	rop_list;
152 	struct refcnt		rop_refcnt;
153 	struct timeout		rop_timeout;
154 	unsigned int		rop_msgfilter;		/* [s] */
155 	unsigned int		rop_flagfilter;		/* [s] */
156 	unsigned int		rop_flags;		/* [s] */
157 	u_int			rop_rtableid;		/* [s] */
158 	unsigned short		rop_proto;		/* [I] */
159 	u_char			rop_priority;		/* [s] */
160 };
161 #define	sotortpcb(so)	((struct rtpcb *)(so)->so_pcb)
162 
163 struct rtptable {
164 	SRPL_HEAD(, rtpcb)	rtp_list;
165 	struct srpl_rc		rtp_rc;
166 	struct rwlock		rtp_lk;
167 	unsigned int		rtp_count;
168 };
169 
170 struct pool rtpcb_pool;
171 struct rtptable rtptable;
172 
173 /*
174  * These flags and timeout are used for indicating to userland (via a
175  * RTM_DESYNC msg) when the route socket has overflowed and messages
176  * have been lost.
177  */
178 #define ROUTECB_FLAG_DESYNC	0x1	/* Route socket out of memory */
179 #define ROUTECB_FLAG_FLUSH	0x2	/* Wait until socket is empty before
180 					   queueing more packets */
181 
182 #define ROUTE_DESYNC_RESEND_TIMEOUT	200	/* In ms */
183 
184 void
185 route_prinit(void)
186 {
187 	srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL);
188 	rw_init(&rtptable.rtp_lk, "rtsock");
189 	SRPL_INIT(&rtptable.rtp_list);
190 	pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0,
191 	    IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL);
192 }
193 
194 void
195 rcb_ref(void *null, void *v)
196 {
197 	struct rtpcb *rop = v;
198 
199 	refcnt_take(&rop->rop_refcnt);
200 }
201 
202 void
203 rcb_unref(void *null, void *v)
204 {
205 	struct rtpcb *rop = v;
206 
207 	refcnt_rele_wake(&rop->rop_refcnt);
208 }
209 
210 int
211 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
212     struct mbuf *control, struct proc *p)
213 {
214 	struct rtpcb	*rop;
215 	int		 error = 0;
216 
217 	if (req == PRU_CONTROL)
218 		return (EOPNOTSUPP);
219 
220 	soassertlocked(so);
221 
222 	if (control && control->m_len) {
223 		error = EOPNOTSUPP;
224 		goto release;
225 	}
226 
227 	rop = sotortpcb(so);
228 	if (rop == NULL) {
229 		error = EINVAL;
230 		goto release;
231 	}
232 
233 	switch (req) {
234 	/* no connect, bind, accept. Socket is connected from the start */
235 	case PRU_CONNECT:
236 	case PRU_BIND:
237 	case PRU_CONNECT2:
238 	case PRU_LISTEN:
239 	case PRU_ACCEPT:
240 		error = EOPNOTSUPP;
241 		break;
242 
243 	case PRU_DISCONNECT:
244 	case PRU_ABORT:
245 		soisdisconnected(so);
246 		break;
247 	case PRU_SHUTDOWN:
248 		socantsendmore(so);
249 		break;
250 	case PRU_SENSE:
251 		/* stat: don't bother with a blocksize. */
252 		break;
253 
254 	/* minimal support, just implement a fake peer address */
255 	case PRU_SOCKADDR:
256 		error = EINVAL;
257 		break;
258 	case PRU_PEERADDR:
259 		bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len);
260 		nam->m_len = route_src.sa_len;
261 		break;
262 
263 	case PRU_RCVD:
264 		/*
265 		 * If we are in a FLUSH state, check if the buffer is
266 		 * empty so that we can clear the flag.
267 		 */
268 		if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) &&
269 		    ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) ==
270 		    rop->rop_socket->so_rcv.sb_hiwat)))
271 			rop->rop_flags &= ~ROUTECB_FLAG_FLUSH;
272 		break;
273 
274 	case PRU_RCVOOB:
275 	case PRU_SENDOOB:
276 		error = EOPNOTSUPP;
277 		break;
278 	case PRU_SEND:
279 		if (nam) {
280 			error = EISCONN;
281 			break;
282 		}
283 		error = (*so->so_proto->pr_output)(m, so, NULL, NULL);
284 		m = NULL;
285 		break;
286 	default:
287 		panic("route_usrreq");
288 	}
289 
290  release:
291 	if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) {
292 		m_freem(control);
293 		m_freem(m);
294 	}
295 	return (error);
296 }
297 
298 int
299 route_attach(struct socket *so, int proto)
300 {
301 	struct rtpcb	*rop;
302 	int		 error;
303 
304 	error = soreserve(so, ROUTESNDQ, ROUTERCVQ);
305 	if (error)
306 		return (error);
307 	/*
308 	 * use the rawcb but allocate a rtpcb, this
309 	 * code does not care about the additional fields
310 	 * and works directly on the raw socket.
311 	 */
312 	rop = pool_get(&rtpcb_pool, PR_WAITOK|PR_ZERO);
313 	so->so_pcb = rop;
314 	/* Init the timeout structure */
315 	timeout_set_proc(&rop->rop_timeout, rtm_senddesync_timer, so);
316 	refcnt_init(&rop->rop_refcnt);
317 
318 	rop->rop_socket = so;
319 	rop->rop_proto = proto;
320 
321 	rop->rop_rtableid = curproc->p_p->ps_rtableid;
322 
323 	soisconnected(so);
324 	so->so_options |= SO_USELOOPBACK;
325 
326 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
327 	SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop,
328 	    rop_list);
329 	rtptable.rtp_count++;
330 	rw_exit(&rtptable.rtp_lk);
331 
332 	return (0);
333 }
334 
335 int
336 route_detach(struct socket *so)
337 {
338 	struct rtpcb	*rop;
339 
340 	soassertlocked(so);
341 
342 	rop = sotortpcb(so);
343 	if (rop == NULL)
344 		return (EINVAL);
345 
346 	rw_enter(&rtptable.rtp_lk, RW_WRITE);
347 
348 	rtptable.rtp_count--;
349 	SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb,
350 	    rop_list);
351 	rw_exit(&rtptable.rtp_lk);
352 
353 	sounlock(so, SL_LOCKED);
354 
355 	/* wait for all references to drop */
356 	refcnt_finalize(&rop->rop_refcnt, "rtsockrefs");
357 	timeout_del_barrier(&rop->rop_timeout);
358 
359 	solock(so);
360 
361 	so->so_pcb = NULL;
362 	KASSERT((so->so_state & SS_NOFDREF) == 0);
363 	pool_put(&rtpcb_pool, rop);
364 
365 	return (0);
366 }
367 
368 int
369 route_ctloutput(int op, struct socket *so, int level, int optname,
370     struct mbuf *m)
371 {
372 	struct rtpcb *rop = sotortpcb(so);
373 	int error = 0;
374 	unsigned int tid, prio;
375 
376 	if (level != AF_ROUTE)
377 		return (EINVAL);
378 
379 	switch (op) {
380 	case PRCO_SETOPT:
381 		switch (optname) {
382 		case ROUTE_MSGFILTER:
383 			if (m == NULL || m->m_len != sizeof(unsigned int))
384 				error = EINVAL;
385 			else
386 				rop->rop_msgfilter = *mtod(m, unsigned int *);
387 			break;
388 		case ROUTE_TABLEFILTER:
389 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
390 				error = EINVAL;
391 				break;
392 			}
393 			tid = *mtod(m, unsigned int *);
394 			if (tid != RTABLE_ANY && !rtable_exists(tid))
395 				error = ENOENT;
396 			else
397 				rop->rop_rtableid = tid;
398 			break;
399 		case ROUTE_PRIOFILTER:
400 			if (m == NULL || m->m_len != sizeof(unsigned int)) {
401 				error = EINVAL;
402 				break;
403 			}
404 			prio = *mtod(m, unsigned int *);
405 			if (prio > RTP_MAX)
406 				error = EINVAL;
407 			else
408 				rop->rop_priority = prio;
409 			break;
410 		case ROUTE_FLAGFILTER:
411 			if (m == NULL || m->m_len != sizeof(unsigned int))
412 				error = EINVAL;
413 			else
414 				rop->rop_flagfilter = *mtod(m, unsigned int *);
415 			break;
416 		default:
417 			error = ENOPROTOOPT;
418 			break;
419 		}
420 		break;
421 	case PRCO_GETOPT:
422 		switch (optname) {
423 		case ROUTE_MSGFILTER:
424 			m->m_len = sizeof(unsigned int);
425 			*mtod(m, unsigned int *) = rop->rop_msgfilter;
426 			break;
427 		case ROUTE_TABLEFILTER:
428 			m->m_len = sizeof(unsigned int);
429 			*mtod(m, unsigned int *) = rop->rop_rtableid;
430 			break;
431 		case ROUTE_PRIOFILTER:
432 			m->m_len = sizeof(unsigned int);
433 			*mtod(m, unsigned int *) = rop->rop_priority;
434 			break;
435 		case ROUTE_FLAGFILTER:
436 			m->m_len = sizeof(unsigned int);
437 			*mtod(m, unsigned int *) = rop->rop_flagfilter;
438 			break;
439 		default:
440 			error = ENOPROTOOPT;
441 			break;
442 		}
443 	}
444 	return (error);
445 }
446 
447 void
448 rtm_senddesync_timer(void *xso)
449 {
450 	struct socket	*so = xso;
451 	int		 s;
452 
453 	s = solock(so);
454 	rtm_senddesync(so);
455 	sounlock(so, s);
456 }
457 
458 void
459 rtm_senddesync(struct socket *so)
460 {
461 	struct rtpcb	*rop = sotortpcb(so);
462 	struct mbuf	*desync_mbuf;
463 
464 	soassertlocked(so);
465 
466 	/*
467 	 * Dying socket is disconnected by upper layer and there is
468 	 * no reason to send packet. Also we shouldn't reschedule
469 	 * timeout(9), otherwise timeout_del_barrier(9) can't help us.
470 	 */
471 	if ((so->so_state & SS_ISCONNECTED) == 0 ||
472 	    (so->so_state & SS_CANTRCVMORE))
473 		return;
474 
475 	/* If we are in a DESYNC state, try to send a RTM_DESYNC packet */
476 	if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0)
477 		return;
478 
479 	/*
480 	 * If we fail to alloc memory or if sbappendaddr()
481 	 * fails, re-add timeout and try again.
482 	 */
483 	desync_mbuf = rtm_msg1(RTM_DESYNC, NULL);
484 	if (desync_mbuf != NULL) {
485 		if (sbappendaddr(so, &so->so_rcv, &route_src,
486 		    desync_mbuf, NULL) != 0) {
487 			rop->rop_flags &= ~ROUTECB_FLAG_DESYNC;
488 			sorwakeup(rop->rop_socket);
489 			return;
490 		}
491 		m_freem(desync_mbuf);
492 	}
493 	/* Re-add timeout to try sending msg again */
494 	timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT);
495 }
496 
497 void
498 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family)
499 {
500 	struct socket *so;
501 	struct rtpcb *rop;
502 	struct rt_msghdr *rtm;
503 	struct mbuf *m = m0;
504 	struct srp_ref sr;
505 	int s;
506 
507 	/* ensure that we can access the rtm_type via mtod() */
508 	if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) {
509 		m_freem(m);
510 		return;
511 	}
512 
513 	SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) {
514 		/*
515 		 * If route socket is bound to an address family only send
516 		 * messages that match the address family. Address family
517 		 * agnostic messages are always sent.
518 		 */
519 		if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC &&
520 		    rop->rop_proto != sa_family)
521 			continue;
522 
523 
524 		so = rop->rop_socket;
525 		s = solock(so);
526 
527 		/*
528 		 * Check to see if we don't want our own messages and
529 		 * if we can receive anything.
530 		 */
531 		if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) ||
532 		    !(so->so_state & SS_ISCONNECTED) ||
533 		    (so->so_state & SS_CANTRCVMORE))
534 			goto next;
535 
536 		/* filter messages that the process does not want */
537 		rtm = mtod(m, struct rt_msghdr *);
538 		/* but RTM_DESYNC can't be filtered */
539 		if (rtm->rtm_type != RTM_DESYNC) {
540 			if (rop->rop_msgfilter != 0 &&
541 			    !(rop->rop_msgfilter & (1 << rtm->rtm_type)))
542 				goto next;
543 			if (ISSET(rop->rop_flagfilter, rtm->rtm_flags))
544 				goto next;
545 		}
546 		switch (rtm->rtm_type) {
547 		case RTM_IFANNOUNCE:
548 		case RTM_DESYNC:
549 			/* no tableid */
550 			break;
551 		case RTM_RESOLVE:
552 		case RTM_NEWADDR:
553 		case RTM_DELADDR:
554 		case RTM_IFINFO:
555 		case RTM_80211INFO:
556 		case RTM_BFD:
557 			/* check against rdomain id */
558 			if (rop->rop_rtableid != RTABLE_ANY &&
559 			    rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid)
560 				goto next;
561 			break;
562 		default:
563 			if (rop->rop_priority != 0 &&
564 			    rop->rop_priority < rtm->rtm_priority)
565 				goto next;
566 			/* check against rtable id */
567 			if (rop->rop_rtableid != RTABLE_ANY &&
568 			    rop->rop_rtableid != rtm->rtm_tableid)
569 				goto next;
570 			break;
571 		}
572 
573 		/*
574 		 * Check to see if the flush flag is set. If so, don't queue
575 		 * any more messages until the flag is cleared.
576 		 */
577 		if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0)
578 			goto next;
579 
580 		rtm_sendup(so, m);
581 next:
582 		sounlock(so, s);
583 	}
584 	SRPL_LEAVE(&sr);
585 
586 	m_freem(m);
587 }
588 
589 int
590 rtm_sendup(struct socket *so, struct mbuf *m0)
591 {
592 	struct rtpcb *rop = sotortpcb(so);
593 	struct mbuf *m;
594 
595 	soassertlocked(so);
596 
597 	m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
598 	if (m == NULL)
599 		return (ENOMEM);
600 
601 	if (sbspace(so, &so->so_rcv) < (2 * MSIZE) ||
602 	    sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) {
603 		/* Flag socket as desync'ed and flush required */
604 		rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH;
605 		rtm_senddesync(so);
606 		m_freem(m);
607 		return (ENOBUFS);
608 	}
609 
610 	sorwakeup(so);
611 	return (0);
612 }
613 
614 struct rt_msghdr *
615 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid)
616 {
617 	struct rt_msghdr	*rtm;
618 	struct rt_addrinfo	 info;
619 	struct sockaddr_rtlabel	 sa_rl;
620 	struct sockaddr_in6	 sa_mask;
621 #ifdef BFD
622 	struct sockaddr_bfd	 sa_bfd;
623 #endif
624 	struct ifnet		*ifp = NULL;
625 	int			 len;
626 
627 	bzero(&info, sizeof(info));
628 	info.rti_info[RTAX_DST] = rt_key(rt);
629 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
630 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
631 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
632 #ifdef BFD
633 	if (rt->rt_flags & RTF_BFD) {
634 		KERNEL_LOCK();
635 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
636 		KERNEL_UNLOCK();
637 	}
638 #endif
639 #ifdef MPLS
640 	if (rt->rt_flags & RTF_MPLS) {
641 		struct sockaddr_mpls	 sa_mpls;
642 
643 		bzero(&sa_mpls, sizeof(sa_mpls));
644 		sa_mpls.smpls_family = AF_MPLS;
645 		sa_mpls.smpls_len = sizeof(sa_mpls);
646 		sa_mpls.smpls_label = ((struct rt_mpls *)
647 		    rt->rt_llinfo)->mpls_label;
648 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
649 		info.rti_mpls = ((struct rt_mpls *)
650 		    rt->rt_llinfo)->mpls_operation;
651 	}
652 #endif
653 	ifp = if_get(rt->rt_ifidx);
654 	if (ifp != NULL) {
655 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
656 		info.rti_info[RTAX_IFA] =
657 		    rtable_getsource(tableid, info.rti_info[RTAX_DST]->sa_family);
658 		if (info.rti_info[RTAX_IFA] == NULL)
659 			info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
660 		if (ifp->if_flags & IFF_POINTOPOINT)
661 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
662 	}
663 	if_put(ifp);
664 	/* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */
665 
666 	/* build new route message */
667 	len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL);
668 	rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO);
669 
670 	rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL);
671 	rtm->rtm_type = type;
672 	rtm->rtm_index = rt->rt_ifidx;
673 	rtm->rtm_tableid = tableid;
674 	rtm->rtm_priority = rt->rt_priority & RTP_MASK;
675 	rtm->rtm_flags = rt->rt_flags;
676 	rtm->rtm_pid = curproc->p_p->ps_pid;
677 	rtm->rtm_seq = seq;
678 	rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
679 	rtm->rtm_addrs = info.rti_addrs;
680 #ifdef MPLS
681 	rtm->rtm_mpls = info.rti_mpls;
682 #endif
683 	return rtm;
684 }
685 
686 int
687 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr,
688     struct mbuf *control)
689 {
690 	struct rt_msghdr	*rtm = NULL;
691 	struct rtentry		*rt = NULL;
692 	struct rt_addrinfo	 info;
693 	struct ifnet		*ifp;
694 	int			 len, seq, useloopback, error = 0;
695 	u_int			 tableid;
696 	u_int8_t		 prio;
697 	u_char			 vers, type;
698 
699 	if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
700 	    (m = m_pullup(m, sizeof(int32_t))) == 0))
701 		return (ENOBUFS);
702 	if ((m->m_flags & M_PKTHDR) == 0)
703 		panic("route_output");
704 
705 	useloopback = so->so_options & SO_USELOOPBACK;
706 
707 	/*
708 	 * The socket can't be closed concurrently because the file
709 	 * descriptor reference is still held.
710 	 */
711 
712 	sounlock(so, SL_LOCKED);
713 
714 	len = m->m_pkthdr.len;
715 	if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 1 ||
716 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
717 		error = EINVAL;
718 		goto fail;
719 	}
720 	vers = mtod(m, struct rt_msghdr *)->rtm_version;
721 	switch (vers) {
722 	case RTM_VERSION:
723 		if (len < sizeof(struct rt_msghdr)) {
724 			error = EINVAL;
725 			goto fail;
726 		}
727 		if (len > RTM_MAXSIZE) {
728 			error = EMSGSIZE;
729 			goto fail;
730 		}
731 		rtm = malloc(len, M_RTABLE, M_WAITOK);
732 		m_copydata(m, 0, len, rtm);
733 		break;
734 	default:
735 		error = EPROTONOSUPPORT;
736 		goto fail;
737 	}
738 
739 	/* Verify that the caller is sending an appropriate message early */
740 	switch (rtm->rtm_type) {
741 	case RTM_ADD:
742 	case RTM_DELETE:
743 	case RTM_GET:
744 	case RTM_CHANGE:
745 	case RTM_PROPOSAL:
746 	case RTM_SOURCE:
747 		break;
748 	default:
749 		error = EOPNOTSUPP;
750 		goto fail;
751 	}
752 	/*
753 	 * Verify that the header length is valid.
754 	 * All messages from userland start with a struct rt_msghdr.
755 	 */
756 	if (rtm->rtm_hdrlen == 0)	/* old client */
757 		rtm->rtm_hdrlen = sizeof(struct rt_msghdr);
758 	if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) ||
759 	    len < rtm->rtm_hdrlen) {
760 		error = EINVAL;
761 		goto fail;
762 	}
763 
764 	rtm->rtm_pid = curproc->p_p->ps_pid;
765 
766 	/*
767 	 * Verify that the caller has the appropriate privilege; RTM_GET
768 	 * is the only operation the non-superuser is allowed.
769 	 */
770 	if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) {
771 		error = EACCES;
772 		goto fail;
773 	}
774 	tableid = rtm->rtm_tableid;
775 	if (!rtable_exists(tableid)) {
776 		if (rtm->rtm_type == RTM_ADD) {
777 			if ((error = rtable_add(tableid)) != 0)
778 				goto fail;
779 		} else {
780 			error = EINVAL;
781 			goto fail;
782 		}
783 	}
784 
785 	/* Do not let userland play with kernel-only flags. */
786 	if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) {
787 		error = EINVAL;
788 		goto fail;
789 	}
790 
791 	/* make sure that kernel-only bits are not set */
792 	rtm->rtm_priority &= RTP_MASK;
793 	rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED);
794 	rtm->rtm_fmask &= RTF_FMASK;
795 
796 	if (rtm->rtm_priority != 0) {
797 		if (rtm->rtm_priority > RTP_MAX ||
798 		    rtm->rtm_priority == RTP_LOCAL) {
799 			error = EINVAL;
800 			goto fail;
801 		}
802 		prio = rtm->rtm_priority;
803 	} else if (rtm->rtm_type != RTM_ADD)
804 		prio = RTP_ANY;
805 	else if (rtm->rtm_flags & RTF_STATIC)
806 		prio = 0;
807 	else
808 		prio = RTP_DEFAULT;
809 
810 	bzero(&info, sizeof(info));
811 	info.rti_addrs = rtm->rtm_addrs;
812 	if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm,
813 	    len + (caddr_t)rtm, &info)) != 0)
814 		goto fail;
815 
816 	info.rti_flags = rtm->rtm_flags;
817 
818 	if (rtm->rtm_type != RTM_SOURCE &&
819 	    rtm->rtm_type != RTM_PROPOSAL &&
820 	    (info.rti_info[RTAX_DST] == NULL ||
821 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
822 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
823 	    info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) ||
824 	    info.rti_info[RTAX_GENMASK] != NULL)) {
825 		error = EINVAL;
826 		goto fail;
827 	}
828 #ifdef MPLS
829 	info.rti_mpls = rtm->rtm_mpls;
830 #endif
831 
832 	if (info.rti_info[RTAX_GATEWAY] != NULL &&
833 	    info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK &&
834 	    (info.rti_flags & RTF_CLONING) == 0) {
835 		info.rti_flags |= RTF_LLINFO;
836 	}
837 
838 	/*
839 	 * Validate RTM_PROPOSAL and pass it along or error out.
840 	 */
841 	if (rtm->rtm_type == RTM_PROPOSAL) {
842 		if (rtm_validate_proposal(&info) == -1) {
843 			error = EINVAL;
844 			goto fail;
845 		}
846 		/*
847 		 * If this is a solicitation proposal forward request to
848 		 * all interfaces. Most handlers will ignore it but at least
849 		 * umb(4) will send a response to this event.
850 		 */
851 		if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) {
852 			NET_LOCK();
853 			TAILQ_FOREACH(ifp, &ifnet, if_list) {
854 				ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL);
855 			}
856 			NET_UNLOCK();
857 		}
858 	} else if (rtm->rtm_type == RTM_SOURCE) {
859 		if (info.rti_info[RTAX_IFA] == NULL) {
860 			error = EINVAL;
861 			goto fail;
862 		}
863 		if ((error =
864 		    rt_setsource(tableid, info.rti_info[RTAX_IFA])) != 0)
865 			goto fail;
866 	} else {
867 		error = rtm_output(rtm, &rt, &info, prio, tableid);
868 		if (!error) {
869 			type = rtm->rtm_type;
870 			seq = rtm->rtm_seq;
871 			free(rtm, M_RTABLE, len);
872 			rtm = rtm_report(rt, type, seq, tableid);
873 			len = rtm->rtm_msglen;
874 		}
875 	}
876 
877 	rtfree(rt);
878 	if (error) {
879 		rtm->rtm_errno = error;
880 	} else {
881 		rtm->rtm_flags |= RTF_DONE;
882 	}
883 
884 	/*
885 	 * Check to see if we don't want our own messages.
886 	 */
887 	if (!useloopback) {
888 		if (rtptable.rtp_count == 0) {
889 			/* no other listener and no loopback of messages */
890 			goto fail;
891 		}
892 	}
893 	if (m_copyback(m, 0, len, rtm, M_NOWAIT)) {
894 		m_freem(m);
895 		m = NULL;
896 	} else if (m->m_pkthdr.len > len)
897 		m_adj(m, len - m->m_pkthdr.len);
898 	free(rtm, M_RTABLE, len);
899 	if (m)
900 		route_input(m, so, info.rti_info[RTAX_DST] ?
901 		    info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC);
902 	solock(so);
903 
904 	return (error);
905 fail:
906 	free(rtm, M_RTABLE, len);
907 	m_freem(m);
908 	solock(so);
909 
910 	return (error);
911 }
912 
913 int
914 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt,
915     struct rt_addrinfo *info, uint8_t prio, unsigned int tableid)
916 {
917 	struct rtentry		*rt = *prt;
918 	struct ifnet		*ifp = NULL;
919 	int			 plen, newgate = 0, error = 0;
920 
921 	switch (rtm->rtm_type) {
922 	case RTM_ADD:
923 		if (info->rti_info[RTAX_GATEWAY] == NULL) {
924 			error = EINVAL;
925 			break;
926 		}
927 
928 		rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL);
929 		if ((error = route_arp_conflict(rt, info))) {
930 			rtfree(rt);
931 			rt = NULL;
932 			break;
933 		}
934 
935 		/*
936 		 * We cannot go through a delete/create/insert cycle for
937 		 * cached route because this can lead to races in the
938 		 * receive path.  Instead we update the L2 cache.
939 		 */
940 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) {
941 			ifp = if_get(rt->rt_ifidx);
942 			if (ifp == NULL) {
943 				rtfree(rt);
944 				rt = NULL;
945 				error = ESRCH;
946 				break;
947 			}
948 
949 			goto change;
950 		}
951 
952 		rtfree(rt);
953 		rt = NULL;
954 
955 		NET_LOCK();
956 		if ((error = rtm_getifa(info, tableid)) != 0) {
957 			NET_UNLOCK();
958 			break;
959 		}
960 		error = rtrequest(RTM_ADD, info, prio, &rt, tableid);
961 		NET_UNLOCK();
962 		if (error == 0)
963 			rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
964 			    &rt->rt_rmx);
965 		break;
966 	case RTM_DELETE:
967 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
968 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
969 		    prio);
970 		if (rt == NULL) {
971 			error = ESRCH;
972 			break;
973 		}
974 
975 		/*
976 		 * If we got multipath routes, we require users to specify
977 		 * a matching gateway.
978 		 */
979 		if (ISSET(rt->rt_flags, RTF_MPATH) &&
980 		    info->rti_info[RTAX_GATEWAY] == NULL) {
981 			error = ESRCH;
982 			break;
983 		}
984 
985 		ifp = if_get(rt->rt_ifidx);
986 		if (ifp == NULL) {
987 			rtfree(rt);
988 			rt = NULL;
989 			error = ESRCH;
990 			break;
991 		}
992 
993 		/*
994 		 * Invalidate the cache of automagically created and
995 		 * referenced L2 entries to make sure that ``rt_gwroute''
996 		 * pointer stays valid for other CPUs.
997 		 */
998 		if ((ISSET(rt->rt_flags, RTF_CACHED))) {
999 			NET_LOCK();
1000 			ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt);
1001 			/* Reset the MTU of the gateway route. */
1002 			rtable_walk(tableid, rt_key(rt)->sa_family, NULL,
1003 			    route_cleargateway, rt);
1004 			NET_UNLOCK();
1005 			break;
1006 		}
1007 
1008 		/*
1009 		 * Make sure that local routes are only modified by the
1010 		 * kernel.
1011 		 */
1012 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
1013 			error = EINVAL;
1014 			break;
1015 		}
1016 
1017 		rtfree(rt);
1018 		rt = NULL;
1019 
1020 		NET_LOCK();
1021 		error = rtrequest_delete(info, prio, ifp, &rt, tableid);
1022 		NET_UNLOCK();
1023 		break;
1024 	case RTM_CHANGE:
1025 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1026 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1027 		    prio);
1028 		/*
1029 		 * If we got multipath routes, we require users to specify
1030 		 * a matching gateway.
1031 		 */
1032 		if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) &&
1033 		    (info->rti_info[RTAX_GATEWAY] == NULL)) {
1034 			rtfree(rt);
1035 			rt = NULL;
1036 		}
1037 
1038 		/*
1039 		 * If RTAX_GATEWAY is the argument we're trying to
1040 		 * change, try to find a compatible route.
1041 		 */
1042 		if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) {
1043 			rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1044 			    info->rti_info[RTAX_NETMASK], NULL, prio);
1045 			/* Ensure we don't pick a multipath one. */
1046 			if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) {
1047 				rtfree(rt);
1048 				rt = NULL;
1049 			}
1050 		}
1051 
1052 		if (rt == NULL) {
1053 			error = ESRCH;
1054 			break;
1055 		}
1056 
1057 		/*
1058 		 * Make sure that local routes are only modified by the
1059 		 * kernel.
1060 		 */
1061 		if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) {
1062 			error = EINVAL;
1063 			break;
1064 		}
1065 
1066 		ifp = if_get(rt->rt_ifidx);
1067 		if (ifp == NULL) {
1068 			rtfree(rt);
1069 			rt = NULL;
1070 			error = ESRCH;
1071 			break;
1072 		}
1073 
1074 		/*
1075 		 * RTM_CHANGE needs a perfect match.
1076 		 */
1077 		plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family,
1078 		    info->rti_info[RTAX_NETMASK]);
1079 		if (rt_plen(rt) != plen) {
1080 			error = ESRCH;
1081 			break;
1082 		}
1083 
1084 		if (info->rti_info[RTAX_GATEWAY] != NULL)
1085 			if (rt->rt_gateway == NULL ||
1086 			    bcmp(rt->rt_gateway,
1087 			    info->rti_info[RTAX_GATEWAY],
1088 			    info->rti_info[RTAX_GATEWAY]->sa_len)) {
1089 				newgate = 1;
1090 			}
1091 		/*
1092 		 * Check reachable gateway before changing the route.
1093 		 * New gateway could require new ifaddr, ifp;
1094 		 * flags may also be different; ifp may be specified
1095 		 * by ll sockaddr when protocol address is ambiguous.
1096 		 */
1097 		if (newgate || info->rti_info[RTAX_IFP] != NULL ||
1098 		    info->rti_info[RTAX_IFA] != NULL) {
1099 			struct ifaddr	*ifa = NULL;
1100 
1101 			NET_LOCK();
1102 			if ((error = rtm_getifa(info, tableid)) != 0) {
1103 				NET_UNLOCK();
1104 				break;
1105 			}
1106 			ifa = info->rti_ifa;
1107 			if (rt->rt_ifa != ifa) {
1108 				ifp->if_rtrequest(ifp, RTM_DELETE, rt);
1109 				ifafree(rt->rt_ifa);
1110 
1111 				ifa->ifa_refcnt++;
1112 				rt->rt_ifa = ifa;
1113 				rt->rt_ifidx = ifa->ifa_ifp->if_index;
1114 				/* recheck link state after ifp change */
1115 				rt_if_linkstate_change(rt, ifa->ifa_ifp,
1116 				    tableid);
1117 			}
1118 			NET_UNLOCK();
1119 		}
1120 change:
1121 		if (info->rti_info[RTAX_GATEWAY] != NULL) {
1122 			/* When updating the gateway, make sure it is valid. */
1123 			if (!newgate && rt->rt_gateway->sa_family !=
1124 			    info->rti_info[RTAX_GATEWAY]->sa_family) {
1125 				error = EINVAL;
1126 				break;
1127 			}
1128 
1129 			NET_LOCK();
1130 			error = rt_setgate(rt,
1131 			    info->rti_info[RTAX_GATEWAY], tableid);
1132 			NET_UNLOCK();
1133 			if (error)
1134 				break;
1135 		}
1136 #ifdef MPLS
1137 		if (rtm->rtm_flags & RTF_MPLS) {
1138 			NET_LOCK();
1139 			error = rt_mpls_set(rt,
1140 			    info->rti_info[RTAX_SRC], info->rti_mpls);
1141 			NET_UNLOCK();
1142 			if (error)
1143 				break;
1144 		} else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) {
1145 			NET_LOCK();
1146 			/* if gateway changed remove MPLS information */
1147 			rt_mpls_clear(rt);
1148 			NET_UNLOCK();
1149 		}
1150 #endif
1151 
1152 #ifdef BFD
1153 		if (ISSET(rtm->rtm_flags, RTF_BFD)) {
1154 			KERNEL_LOCK();
1155 			error = bfdset(rt);
1156 			KERNEL_UNLOCK();
1157 			if (error)
1158 				break;
1159 		} else if (!ISSET(rtm->rtm_flags, RTF_BFD) &&
1160 		    ISSET(rtm->rtm_fmask, RTF_BFD)) {
1161 			KERNEL_LOCK();
1162 			bfdclear(rt);
1163 			KERNEL_UNLOCK();
1164 		}
1165 #endif
1166 
1167 		NET_LOCK();
1168 		/* Hack to allow some flags to be toggled */
1169 		if (rtm->rtm_fmask) {
1170 			/* MPLS flag it is set by rt_mpls_set() */
1171 			rtm->rtm_fmask &= ~RTF_MPLS;
1172 			rtm->rtm_flags &= ~RTF_MPLS;
1173 			rt->rt_flags =
1174 			    (rt->rt_flags & ~rtm->rtm_fmask) |
1175 			    (rtm->rtm_flags & rtm->rtm_fmask);
1176 		}
1177 		rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx);
1178 
1179 		ifp->if_rtrequest(ifp, RTM_ADD, rt);
1180 
1181 		if (info->rti_info[RTAX_LABEL] != NULL) {
1182 			char *rtlabel = ((struct sockaddr_rtlabel *)
1183 			    info->rti_info[RTAX_LABEL])->sr_label;
1184 			rtlabel_unref(rt->rt_labelid);
1185 			rt->rt_labelid = rtlabel_name2id(rtlabel);
1186 		}
1187 		if_group_routechange(info->rti_info[RTAX_DST],
1188 		    info->rti_info[RTAX_NETMASK]);
1189 		rt->rt_locks &= ~(rtm->rtm_inits);
1190 		rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
1191 		NET_UNLOCK();
1192 		break;
1193 	case RTM_GET:
1194 		rt = rtable_lookup(tableid, info->rti_info[RTAX_DST],
1195 		    info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY],
1196 		    prio);
1197 		if (rt == NULL)
1198 			error = ESRCH;
1199 		break;
1200 	}
1201 
1202 	if_put(ifp);
1203 	*prt = rt;
1204 	return (error);
1205 }
1206 
1207 struct ifaddr *
1208 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway,
1209     unsigned int rtableid)
1210 {
1211 	struct ifaddr	*ifa;
1212 
1213 	if ((flags & RTF_GATEWAY) == 0) {
1214 		/*
1215 		 * If we are adding a route to an interface,
1216 		 * and the interface is a pt to pt link
1217 		 * we should search for the destination
1218 		 * as our clue to the interface.  Otherwise
1219 		 * we can use the local address.
1220 		 */
1221 		ifa = NULL;
1222 		if (flags & RTF_HOST)
1223 			ifa = ifa_ifwithdstaddr(dst, rtableid);
1224 		if (ifa == NULL)
1225 			ifa = ifa_ifwithaddr(gateway, rtableid);
1226 	} else {
1227 		/*
1228 		 * If we are adding a route to a remote net
1229 		 * or host, the gateway may still be on the
1230 		 * other end of a pt to pt link.
1231 		 */
1232 		ifa = ifa_ifwithdstaddr(gateway, rtableid);
1233 	}
1234 	if (ifa == NULL) {
1235 		if (gateway->sa_family == AF_LINK) {
1236 			struct sockaddr_dl *sdl = satosdl(gateway);
1237 			struct ifnet *ifp = if_get(sdl->sdl_index);
1238 
1239 			if (ifp != NULL)
1240 				ifa = ifaof_ifpforaddr(dst, ifp);
1241 			if_put(ifp);
1242 		} else {
1243 			struct rtentry *rt;
1244 
1245 			rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid));
1246 			if (rt != NULL)
1247 				ifa = rt->rt_ifa;
1248 			rtfree(rt);
1249 		}
1250 	}
1251 	if (ifa == NULL)
1252 		return (NULL);
1253 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
1254 		struct ifaddr	*oifa = ifa;
1255 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1256 		if (ifa == NULL)
1257 			ifa = oifa;
1258 	}
1259 	return (ifa);
1260 }
1261 
1262 int
1263 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid)
1264 {
1265 	struct ifnet	*ifp = NULL;
1266 
1267 	/*
1268 	 * The "returned" `ifa' is guaranteed to be alive only if
1269 	 * the NET_LOCK() is held.
1270 	 */
1271 	NET_ASSERT_LOCKED();
1272 
1273 	/*
1274 	 * ifp may be specified by sockaddr_dl when protocol address
1275 	 * is ambiguous
1276 	 */
1277 	if (info->rti_info[RTAX_IFP] != NULL) {
1278 		struct sockaddr_dl *sdl;
1279 
1280 		sdl = satosdl(info->rti_info[RTAX_IFP]);
1281 		ifp = if_get(sdl->sdl_index);
1282 	}
1283 
1284 #ifdef IPSEC
1285 	/*
1286 	 * If the destination is a PF_KEY address, we'll look
1287 	 * for the existence of a encap interface number or address
1288 	 * in the options list of the gateway. By default, we'll return
1289 	 * enc0.
1290 	 */
1291 	if (info->rti_info[RTAX_DST] &&
1292 	    info->rti_info[RTAX_DST]->sa_family == PF_KEY)
1293 		info->rti_ifa = enc_getifa(rtid, 0);
1294 #endif
1295 
1296 	if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL)
1297 		info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid);
1298 
1299 	if (info->rti_ifa == NULL) {
1300 		struct sockaddr	*sa;
1301 
1302 		if ((sa = info->rti_info[RTAX_IFA]) == NULL)
1303 			if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL)
1304 				sa = info->rti_info[RTAX_DST];
1305 
1306 		if (sa != NULL && ifp != NULL)
1307 			info->rti_ifa = ifaof_ifpforaddr(sa, ifp);
1308 		else if (info->rti_info[RTAX_DST] != NULL &&
1309 		    info->rti_info[RTAX_GATEWAY] != NULL)
1310 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1311 			    info->rti_info[RTAX_DST],
1312 			    info->rti_info[RTAX_GATEWAY],
1313 			    rtid);
1314 		else if (sa != NULL)
1315 			info->rti_ifa = ifa_ifwithroute(info->rti_flags,
1316 			    sa, sa, rtid);
1317 	}
1318 
1319 	if_put(ifp);
1320 
1321 	if (info->rti_ifa == NULL)
1322 		return (ENETUNREACH);
1323 
1324 	return (0);
1325 }
1326 
1327 int
1328 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid)
1329 {
1330 	struct rtentry *nhrt = arg;
1331 
1332 	if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
1333 	    !ISSET(rt->rt_locks, RTV_MTU))
1334 		rt->rt_mtu = 0;
1335 
1336 	return (0);
1337 }
1338 
1339 /*
1340  * Check if the user request to insert an ARP entry does not conflict
1341  * with existing ones.
1342  *
1343  * Only two entries are allowed for a given IP address: a private one
1344  * (priv) and a public one (pub).
1345  */
1346 int
1347 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info)
1348 {
1349 	int		 proxy = (info->rti_flags & RTF_ANNOUNCE);
1350 
1351 	if ((info->rti_flags & RTF_LLINFO) == 0 ||
1352 	    (info->rti_info[RTAX_DST]->sa_family != AF_INET))
1353 		return (0);
1354 
1355 	if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO))
1356 		return (0);
1357 
1358 	/* If the entry is cached, it can be updated. */
1359 	if (ISSET(rt->rt_flags, RTF_CACHED))
1360 		return (0);
1361 
1362 	/*
1363 	 * Same destination, not cached and both "priv" or "pub" conflict.
1364 	 * If a second entry exists, it always conflict.
1365 	 */
1366 	if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) ||
1367 	    ISSET(rt->rt_flags, RTF_MPATH))
1368 		return (EEXIST);
1369 
1370 	/* No conflict but an entry exist so we need to force mpath. */
1371 	info->rti_flags |= RTF_MPATH;
1372 	return (0);
1373 }
1374 
1375 void
1376 rtm_setmetrics(u_long which, const struct rt_metrics *in,
1377     struct rt_kmetrics *out)
1378 {
1379 	int64_t expire;
1380 
1381 	if (which & RTV_MTU)
1382 		out->rmx_mtu = in->rmx_mtu;
1383 	if (which & RTV_EXPIRE) {
1384 		expire = in->rmx_expire;
1385 		if (expire != 0) {
1386 			expire -= gettime();
1387 			expire += getuptime();
1388 		}
1389 
1390 		out->rmx_expire = expire;
1391 	}
1392 }
1393 
1394 void
1395 rtm_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out)
1396 {
1397 	int64_t expire;
1398 
1399 	expire = in->rmx_expire;
1400 	if (expire != 0) {
1401 		expire -= getuptime();
1402 		expire += gettime();
1403 	}
1404 
1405 	bzero(out, sizeof(*out));
1406 	out->rmx_locks = in->rmx_locks;
1407 	out->rmx_mtu = in->rmx_mtu;
1408 	out->rmx_expire = expire;
1409 	out->rmx_pksent = in->rmx_pksent;
1410 }
1411 
1412 #define ROUNDUP(a) \
1413 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1414 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
1415 
1416 int
1417 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1418 {
1419 	struct sockaddr	*sa;
1420 	int		 i;
1421 
1422 	/*
1423 	 * Parse address bits, split address storage in chunks, and
1424 	 * set info pointers.  Use sa_len for traversing the memory
1425 	 * and check that we stay within in the limit.
1426 	 */
1427 	bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info));
1428 	for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) {
1429 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
1430 			continue;
1431 		if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim)
1432 			return (EINVAL);
1433 		sa = (struct sockaddr *)cp;
1434 		if (cp + sa->sa_len > cplim)
1435 			return (EINVAL);
1436 		rtinfo->rti_info[i] = sa;
1437 		ADVANCE(cp, sa);
1438 	}
1439 	/*
1440 	 * Check that the address family is suitable for the route address
1441 	 * type.  Check that each address has a size that fits its family
1442 	 * and its length is within the size.  Strings within addresses must
1443 	 * be NUL terminated.
1444 	 */
1445 	for (i = 0; i < RTAX_MAX; i++) {
1446 		size_t len, maxlen, size;
1447 
1448 		sa = rtinfo->rti_info[i];
1449 		if (sa == NULL)
1450 			continue;
1451 		maxlen = size = 0;
1452 		switch (i) {
1453 		case RTAX_DST:
1454 		case RTAX_GATEWAY:
1455 		case RTAX_SRC:
1456 			switch (sa->sa_family) {
1457 			case AF_INET:
1458 				size = sizeof(struct sockaddr_in);
1459 				break;
1460 			case AF_LINK:
1461 				size = sizeof(struct sockaddr_dl);
1462 				break;
1463 #ifdef INET6
1464 			case AF_INET6:
1465 				size = sizeof(struct sockaddr_in6);
1466 				break;
1467 #endif
1468 #ifdef MPLS
1469 			case AF_MPLS:
1470 				size = sizeof(struct sockaddr_mpls);
1471 				break;
1472 #endif
1473 			}
1474 			break;
1475 		case RTAX_IFP:
1476 			if (sa->sa_family != AF_LINK)
1477 				return (EAFNOSUPPORT);
1478 			/*
1479 			 * XXX Should be sizeof(struct sockaddr_dl), but
1480 			 * route(8) has a bug and provides less memory.
1481 			 * arp(8) has another bug and uses sizeof pointer.
1482 			 */
1483 			size = 4;
1484 			break;
1485 		case RTAX_IFA:
1486 			switch (sa->sa_family) {
1487 			case AF_INET:
1488 				size = sizeof(struct sockaddr_in);
1489 				break;
1490 #ifdef INET6
1491 			case AF_INET6:
1492 				size = sizeof(struct sockaddr_in6);
1493 				break;
1494 #endif
1495 			default:
1496 				return (EAFNOSUPPORT);
1497 			}
1498 			break;
1499 		case RTAX_LABEL:
1500 			sa->sa_family = AF_UNSPEC;
1501 			maxlen = RTLABEL_LEN;
1502 			size = sizeof(struct sockaddr_rtlabel);
1503 			break;
1504 #ifdef BFD
1505 		case RTAX_BFD:
1506 			sa->sa_family = AF_UNSPEC;
1507 			size = sizeof(struct sockaddr_bfd);
1508 			break;
1509 #endif
1510 		case RTAX_DNS:
1511 			/* more validation in rtm_validate_proposal */
1512 			if (sa->sa_len > sizeof(struct sockaddr_rtdns))
1513 				return (EINVAL);
1514 			if (sa->sa_len < offsetof(struct sockaddr_rtdns,
1515 			    sr_dns))
1516 				return (EINVAL);
1517 			switch (sa->sa_family) {
1518 			case AF_INET:
1519 #ifdef INET6
1520 			case AF_INET6:
1521 #endif
1522 				break;
1523 			default:
1524 				return (EAFNOSUPPORT);
1525 			}
1526 			break;
1527 		case RTAX_STATIC:
1528 			sa->sa_family = AF_UNSPEC;
1529 			maxlen = RTSTATIC_LEN;
1530 			size = sizeof(struct sockaddr_rtstatic);
1531 			break;
1532 		case RTAX_SEARCH:
1533 			sa->sa_family = AF_UNSPEC;
1534 			maxlen = RTSEARCH_LEN;
1535 			size = sizeof(struct sockaddr_rtsearch);
1536 			break;
1537 		}
1538 		if (size) {
1539 			/* memory for the full struct must be provided */
1540 			if (sa->sa_len < size)
1541 				return (EINVAL);
1542 		}
1543 		if (maxlen) {
1544 			/* this should not happen */
1545 			if (2 + maxlen > size)
1546 				return (EINVAL);
1547 			/* strings must be NUL terminated within the struct */
1548 			len = strnlen(sa->sa_data, maxlen);
1549 			if (len >= maxlen || 2 + len >= sa->sa_len)
1550 				return (EINVAL);
1551 			break;
1552 		}
1553 	}
1554 	return (0);
1555 }
1556 
1557 struct mbuf *
1558 rtm_msg1(int type, struct rt_addrinfo *rtinfo)
1559 {
1560 	struct rt_msghdr	*rtm;
1561 	struct mbuf		*m;
1562 	int			 i;
1563 	struct sockaddr		*sa;
1564 	int			 len, dlen, hlen;
1565 
1566 	switch (type) {
1567 	case RTM_DELADDR:
1568 	case RTM_NEWADDR:
1569 		len = sizeof(struct ifa_msghdr);
1570 		break;
1571 	case RTM_IFINFO:
1572 		len = sizeof(struct if_msghdr);
1573 		break;
1574 	case RTM_IFANNOUNCE:
1575 		len = sizeof(struct if_announcemsghdr);
1576 		break;
1577 #ifdef BFD
1578 	case RTM_BFD:
1579 		len = sizeof(struct bfd_msghdr);
1580 		break;
1581 #endif
1582 	case RTM_80211INFO:
1583 		len = sizeof(struct if_ieee80211_msghdr);
1584 		break;
1585 	default:
1586 		len = sizeof(struct rt_msghdr);
1587 		break;
1588 	}
1589 	if (len > MCLBYTES)
1590 		panic("rtm_msg1");
1591 	m = m_gethdr(M_DONTWAIT, MT_DATA);
1592 	if (m && len > MHLEN) {
1593 		MCLGET(m, M_DONTWAIT);
1594 		if ((m->m_flags & M_EXT) == 0) {
1595 			m_free(m);
1596 			m = NULL;
1597 		}
1598 	}
1599 	if (m == NULL)
1600 		return (m);
1601 	m->m_pkthdr.len = m->m_len = hlen = len;
1602 	m->m_pkthdr.ph_ifidx = 0;
1603 	rtm = mtod(m, struct rt_msghdr *);
1604 	bzero(rtm, len);
1605 	for (i = 0; i < RTAX_MAX; i++) {
1606 		if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL)
1607 			continue;
1608 		rtinfo->rti_addrs |= (1 << i);
1609 		dlen = ROUNDUP(sa->sa_len);
1610 		if (m_copyback(m, len, dlen, sa, M_NOWAIT)) {
1611 			m_freem(m);
1612 			return (NULL);
1613 		}
1614 		len += dlen;
1615 	}
1616 	rtm->rtm_msglen = len;
1617 	rtm->rtm_hdrlen = hlen;
1618 	rtm->rtm_version = RTM_VERSION;
1619 	rtm->rtm_type = type;
1620 	return (m);
1621 }
1622 
1623 int
1624 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp,
1625     struct walkarg *w)
1626 {
1627 	int		i;
1628 	int		len, dlen, hlen, second_time = 0;
1629 	caddr_t		cp0;
1630 
1631 	rtinfo->rti_addrs = 0;
1632 again:
1633 	switch (type) {
1634 	case RTM_DELADDR:
1635 	case RTM_NEWADDR:
1636 		len = sizeof(struct ifa_msghdr);
1637 		break;
1638 	case RTM_IFINFO:
1639 		len = sizeof(struct if_msghdr);
1640 		break;
1641 	default:
1642 		len = sizeof(struct rt_msghdr);
1643 		break;
1644 	}
1645 	hlen = len;
1646 	if ((cp0 = cp) != NULL)
1647 		cp += len;
1648 	for (i = 0; i < RTAX_MAX; i++) {
1649 		struct sockaddr *sa;
1650 
1651 		if ((sa = rtinfo->rti_info[i]) == NULL)
1652 			continue;
1653 		rtinfo->rti_addrs |= (1 << i);
1654 		dlen = ROUNDUP(sa->sa_len);
1655 		if (cp) {
1656 			bcopy(sa, cp, (size_t)dlen);
1657 			cp += dlen;
1658 		}
1659 		len += dlen;
1660 	}
1661 	/* align message length to the next natural boundary */
1662 	len = ALIGN(len);
1663 	if (cp == 0 && w != NULL && !second_time) {
1664 		w->w_needed += len;
1665 		if (w->w_needed <= 0 && w->w_where) {
1666 			if (w->w_tmemsize < len) {
1667 				free(w->w_tmem, M_RTABLE, w->w_tmemsize);
1668 				w->w_tmem = malloc(len, M_RTABLE,
1669 				    M_NOWAIT | M_ZERO);
1670 				if (w->w_tmem)
1671 					w->w_tmemsize = len;
1672 			}
1673 			if (w->w_tmem) {
1674 				cp = w->w_tmem;
1675 				second_time = 1;
1676 				goto again;
1677 			} else
1678 				w->w_where = 0;
1679 		}
1680 	}
1681 	if (cp && w)		/* clear the message header */
1682 		bzero(cp0, hlen);
1683 
1684 	if (cp) {
1685 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
1686 
1687 		rtm->rtm_version = RTM_VERSION;
1688 		rtm->rtm_type = type;
1689 		rtm->rtm_msglen = len;
1690 		rtm->rtm_hdrlen = hlen;
1691 	}
1692 	return (len);
1693 }
1694 
1695 void
1696 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid)
1697 {
1698 	struct rt_addrinfo	 info;
1699 	struct ifnet		*ifp;
1700 	struct sockaddr_rtlabel	 sa_rl;
1701 	struct sockaddr_in6	 sa_mask;
1702 
1703 	memset(&info, 0, sizeof(info));
1704 	info.rti_info[RTAX_DST] = rt_key(rt);
1705 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1706 	if (!ISSET(rt->rt_flags, RTF_HOST))
1707 		info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1708 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1709 	ifp = if_get(rt->rt_ifidx);
1710 	if (ifp != NULL) {
1711 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1712 		info.rti_info[RTAX_IFA] =
1713 		    rtable_getsource(rtableid, info.rti_info[RTAX_DST]->sa_family);
1714 		if (info.rti_info[RTAX_IFA] == NULL)
1715 			info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1716 	}
1717 
1718 	rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error,
1719 	    rtableid);
1720 	if_put(ifp);
1721 }
1722 
1723 /*
1724  * This routine is called to generate a message from the routing
1725  * socket indicating that a redirect has occurred, a routing lookup
1726  * has failed, or that a protocol has detected timeouts to a particular
1727  * destination.
1728  */
1729 void
1730 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio,
1731     u_int ifidx, int error, u_int tableid)
1732 {
1733 	struct rt_msghdr	*rtm;
1734 	struct mbuf		*m;
1735 	struct sockaddr		*sa = rtinfo->rti_info[RTAX_DST];
1736 
1737 	if (rtptable.rtp_count == 0)
1738 		return;
1739 	m = rtm_msg1(type, rtinfo);
1740 	if (m == NULL)
1741 		return;
1742 	rtm = mtod(m, struct rt_msghdr *);
1743 	rtm->rtm_flags = RTF_DONE | flags;
1744 	rtm->rtm_priority = prio;
1745 	rtm->rtm_errno = error;
1746 	rtm->rtm_tableid = tableid;
1747 	rtm->rtm_addrs = rtinfo->rti_addrs;
1748 	rtm->rtm_index = ifidx;
1749 	route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC);
1750 }
1751 
1752 /*
1753  * This routine is called to generate a message from the routing
1754  * socket indicating that the status of a network interface has changed.
1755  */
1756 void
1757 rtm_ifchg(struct ifnet *ifp)
1758 {
1759 	struct rt_addrinfo	 info;
1760 	struct if_msghdr	*ifm;
1761 	struct mbuf		*m;
1762 
1763 	if (rtptable.rtp_count == 0)
1764 		return;
1765 	memset(&info, 0, sizeof(info));
1766 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1767 	m = rtm_msg1(RTM_IFINFO, &info);
1768 	if (m == NULL)
1769 		return;
1770 	ifm = mtod(m, struct if_msghdr *);
1771 	ifm->ifm_index = ifp->if_index;
1772 	ifm->ifm_tableid = ifp->if_rdomain;
1773 	ifm->ifm_flags = ifp->if_flags;
1774 	ifm->ifm_xflags = ifp->if_xflags;
1775 	if_getdata(ifp, &ifm->ifm_data);
1776 	ifm->ifm_addrs = info.rti_addrs;
1777 	route_input(m, NULL, AF_UNSPEC);
1778 }
1779 
1780 /*
1781  * This is called to generate messages from the routing socket
1782  * indicating a network interface has had addresses associated with it.
1783  * if we ever reverse the logic and replace messages TO the routing
1784  * socket indicate a request to configure interfaces, then it will
1785  * be unnecessary as the routing socket will automatically generate
1786  * copies of it.
1787  */
1788 void
1789 rtm_addr(int cmd, struct ifaddr *ifa)
1790 {
1791 	struct ifnet		*ifp = ifa->ifa_ifp;
1792 	struct mbuf		*m;
1793 	struct rt_addrinfo	 info;
1794 	struct ifa_msghdr	*ifam;
1795 
1796 	if (rtptable.rtp_count == 0)
1797 		return;
1798 
1799 	memset(&info, 0, sizeof(info));
1800 	info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1801 	info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1802 	info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1803 	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1804 	if ((m = rtm_msg1(cmd, &info)) == NULL)
1805 		return;
1806 	ifam = mtod(m, struct ifa_msghdr *);
1807 	ifam->ifam_index = ifp->if_index;
1808 	ifam->ifam_metric = ifa->ifa_metric;
1809 	ifam->ifam_flags = ifa->ifa_flags;
1810 	ifam->ifam_addrs = info.rti_addrs;
1811 	ifam->ifam_tableid = ifp->if_rdomain;
1812 
1813 	route_input(m, NULL,
1814 	    ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC);
1815 }
1816 
1817 /*
1818  * This is called to generate routing socket messages indicating
1819  * network interface arrival and departure.
1820  */
1821 void
1822 rtm_ifannounce(struct ifnet *ifp, int what)
1823 {
1824 	struct if_announcemsghdr	*ifan;
1825 	struct mbuf			*m;
1826 
1827 	if (rtptable.rtp_count == 0)
1828 		return;
1829 	m = rtm_msg1(RTM_IFANNOUNCE, NULL);
1830 	if (m == NULL)
1831 		return;
1832 	ifan = mtod(m, struct if_announcemsghdr *);
1833 	ifan->ifan_index = ifp->if_index;
1834 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
1835 	ifan->ifan_what = what;
1836 	route_input(m, NULL, AF_UNSPEC);
1837 }
1838 
1839 #ifdef BFD
1840 /*
1841  * This is used to generate routing socket messages indicating
1842  * the state of a BFD session.
1843  */
1844 void
1845 rtm_bfd(struct bfd_config *bfd)
1846 {
1847 	struct bfd_msghdr	*bfdm;
1848 	struct sockaddr_bfd	 sa_bfd;
1849 	struct mbuf		*m;
1850 	struct rt_addrinfo	 info;
1851 
1852 	if (rtptable.rtp_count == 0)
1853 		return;
1854 	memset(&info, 0, sizeof(info));
1855 	info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt);
1856 	info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr;
1857 
1858 	m = rtm_msg1(RTM_BFD, &info);
1859 	if (m == NULL)
1860 		return;
1861 	bfdm = mtod(m, struct bfd_msghdr *);
1862 	bfdm->bm_addrs = info.rti_addrs;
1863 
1864 	KERNEL_ASSERT_LOCKED();
1865 	bfd2sa(bfd->bc_rt, &sa_bfd);
1866 	memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd));
1867 
1868 	route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family);
1869 }
1870 #endif /* BFD */
1871 
1872 /*
1873  * This is used to generate routing socket messages indicating
1874  * the state of an ieee80211 interface.
1875  */
1876 void
1877 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie)
1878 {
1879 	struct if_ieee80211_msghdr	*ifim;
1880 	struct mbuf			*m;
1881 
1882 	if (rtptable.rtp_count == 0)
1883 		return;
1884 	m = rtm_msg1(RTM_80211INFO, NULL);
1885 	if (m == NULL)
1886 		return;
1887 	ifim = mtod(m, struct if_ieee80211_msghdr *);
1888 	ifim->ifim_index = ifp->if_index;
1889 	ifim->ifim_tableid = ifp->if_rdomain;
1890 
1891 	memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie));
1892 	route_input(m, NULL, AF_UNSPEC);
1893 }
1894 
1895 /*
1896  * This is used to generate routing socket messages indicating
1897  * the address selection proposal from an interface.
1898  */
1899 void
1900 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags,
1901     uint8_t prio)
1902 {
1903 	struct rt_msghdr	*rtm;
1904 	struct mbuf		*m;
1905 
1906 	m = rtm_msg1(RTM_PROPOSAL, rtinfo);
1907 	if (m == NULL)
1908 		return;
1909 	rtm = mtod(m, struct rt_msghdr *);
1910 	rtm->rtm_flags = RTF_DONE | flags;
1911 	rtm->rtm_priority = prio;
1912 	rtm->rtm_tableid = ifp->if_rdomain;
1913 	rtm->rtm_index = ifp->if_index;
1914 	rtm->rtm_addrs = rtinfo->rti_addrs;
1915 
1916 	route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family);
1917 }
1918 
1919 /*
1920  * This is used in dumping the kernel table via sysctl().
1921  */
1922 int
1923 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id)
1924 {
1925 	struct walkarg		*w = v;
1926 	int			 error = 0, size;
1927 	struct rt_addrinfo	 info;
1928 	struct ifnet		*ifp;
1929 #ifdef BFD
1930 	struct sockaddr_bfd	 sa_bfd;
1931 #endif
1932 	struct sockaddr_rtlabel	 sa_rl;
1933 	struct sockaddr_in6	 sa_mask;
1934 
1935 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1936 		return 0;
1937 	if (w->w_op == NET_RT_DUMP && w->w_arg) {
1938 		u_int8_t prio = w->w_arg & RTP_MASK;
1939 		if (w->w_arg < 0) {
1940 			prio = (-w->w_arg) & RTP_MASK;
1941 			/* Show all routes that are not this priority */
1942 			if (prio == (rt->rt_priority & RTP_MASK))
1943 				return 0;
1944 		} else {
1945 			if (prio != (rt->rt_priority & RTP_MASK) &&
1946 			    prio != RTP_ANY)
1947 				return 0;
1948 		}
1949 	}
1950 	bzero(&info, sizeof(info));
1951 	info.rti_info[RTAX_DST] = rt_key(rt);
1952 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1953 	info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask);
1954 	ifp = if_get(rt->rt_ifidx);
1955 	if (ifp != NULL) {
1956 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
1957 		info.rti_info[RTAX_IFA] =
1958 		    rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family);
1959 		if (info.rti_info[RTAX_IFA] == NULL)
1960 			info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1961 		if (ifp->if_flags & IFF_POINTOPOINT)
1962 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1963 	}
1964 	if_put(ifp);
1965 	info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl);
1966 #ifdef BFD
1967 	if (rt->rt_flags & RTF_BFD) {
1968 		KERNEL_ASSERT_LOCKED();
1969 		info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd);
1970 	}
1971 #endif
1972 #ifdef MPLS
1973 	if (rt->rt_flags & RTF_MPLS) {
1974 		struct sockaddr_mpls	 sa_mpls;
1975 
1976 		bzero(&sa_mpls, sizeof(sa_mpls));
1977 		sa_mpls.smpls_family = AF_MPLS;
1978 		sa_mpls.smpls_len = sizeof(sa_mpls);
1979 		sa_mpls.smpls_label = ((struct rt_mpls *)
1980 		    rt->rt_llinfo)->mpls_label;
1981 		info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls;
1982 		info.rti_mpls = ((struct rt_mpls *)
1983 		    rt->rt_llinfo)->mpls_operation;
1984 	}
1985 #endif
1986 
1987 	size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w);
1988 	if (w->w_where && w->w_tmem && w->w_needed <= 0) {
1989 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1990 
1991 		rtm->rtm_pid = curproc->p_p->ps_pid;
1992 		rtm->rtm_flags = rt->rt_flags;
1993 		rtm->rtm_priority = rt->rt_priority & RTP_MASK;
1994 		rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1995 		/* Do not account the routing table's reference. */
1996 		rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1;
1997 		rtm->rtm_index = rt->rt_ifidx;
1998 		rtm->rtm_addrs = info.rti_addrs;
1999 		rtm->rtm_tableid = id;
2000 #ifdef MPLS
2001 		rtm->rtm_mpls = info.rti_mpls;
2002 #endif
2003 		if ((error = copyout(rtm, w->w_where, size)) != 0)
2004 			w->w_where = NULL;
2005 		else
2006 			w->w_where += size;
2007 	}
2008 	return (error);
2009 }
2010 
2011 int
2012 sysctl_iflist(int af, struct walkarg *w)
2013 {
2014 	struct ifnet		*ifp;
2015 	struct ifaddr		*ifa;
2016 	struct rt_addrinfo	 info;
2017 	int			 len, error = 0;
2018 
2019 	bzero(&info, sizeof(info));
2020 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2021 		if (w->w_arg && w->w_arg != ifp->if_index)
2022 			continue;
2023 		/* Copy the link-layer address first */
2024 		info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl);
2025 		len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w);
2026 		if (w->w_where && w->w_tmem && w->w_needed <= 0) {
2027 			struct if_msghdr *ifm;
2028 
2029 			ifm = (struct if_msghdr *)w->w_tmem;
2030 			ifm->ifm_index = ifp->if_index;
2031 			ifm->ifm_tableid = ifp->if_rdomain;
2032 			ifm->ifm_flags = ifp->if_flags;
2033 			if_getdata(ifp, &ifm->ifm_data);
2034 			ifm->ifm_addrs = info.rti_addrs;
2035 			error = copyout(ifm, w->w_where, len);
2036 			if (error)
2037 				return (error);
2038 			w->w_where += len;
2039 		}
2040 		info.rti_info[RTAX_IFP] = NULL;
2041 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
2042 			KASSERT(ifa->ifa_addr->sa_family != AF_LINK);
2043 			if (af && af != ifa->ifa_addr->sa_family)
2044 				continue;
2045 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2046 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
2047 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2048 			len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w);
2049 			if (w->w_where && w->w_tmem && w->w_needed <= 0) {
2050 				struct ifa_msghdr *ifam;
2051 
2052 				ifam = (struct ifa_msghdr *)w->w_tmem;
2053 				ifam->ifam_index = ifa->ifa_ifp->if_index;
2054 				ifam->ifam_flags = ifa->ifa_flags;
2055 				ifam->ifam_metric = ifa->ifa_metric;
2056 				ifam->ifam_addrs = info.rti_addrs;
2057 				error = copyout(w->w_tmem, w->w_where, len);
2058 				if (error)
2059 					return (error);
2060 				w->w_where += len;
2061 			}
2062 		}
2063 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
2064 		    info.rti_info[RTAX_BRD] = NULL;
2065 	}
2066 	return (0);
2067 }
2068 
2069 int
2070 sysctl_ifnames(struct walkarg *w)
2071 {
2072 	struct if_nameindex_msg ifn;
2073 	struct ifnet *ifp;
2074 	int error = 0;
2075 
2076 	/* XXX ignore tableid for now */
2077 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
2078 		if (w->w_arg && w->w_arg != ifp->if_index)
2079 			continue;
2080 		w->w_needed += sizeof(ifn);
2081 		if (w->w_where && w->w_needed <= 0) {
2082 
2083 			memset(&ifn, 0, sizeof(ifn));
2084 			ifn.if_index = ifp->if_index;
2085 			strlcpy(ifn.if_name, ifp->if_xname,
2086 			    sizeof(ifn.if_name));
2087 			error = copyout(&ifn, w->w_where, sizeof(ifn));
2088 			if (error)
2089 				return (error);
2090 			w->w_where += sizeof(ifn);
2091 		}
2092 	}
2093 
2094 	return (0);
2095 }
2096 
2097 int
2098 sysctl_source(int af, u_int tableid, struct walkarg *w)
2099 {
2100 	struct sockaddr	*sa;
2101 	int		 size, error = 0;
2102 
2103 	sa = rtable_getsource(tableid, af);
2104 	if (sa) {
2105 		switch (sa->sa_family) {
2106 		case AF_INET:
2107 			size = sizeof(struct sockaddr_in);
2108 			break;
2109 #ifdef INET6
2110 		case AF_INET6:
2111 			size = sizeof(struct sockaddr_in6);
2112 			break;
2113 #endif
2114 		default:
2115 			return (0);
2116 		}
2117 		w->w_needed += size;
2118 		if (w->w_where && w->w_needed <= 0) {
2119 			if ((error = copyout(sa, w->w_where, size)))
2120 				return (error);
2121 			w->w_where += size;
2122 		}
2123 	}
2124 	return (0);
2125 }
2126 
2127 int
2128 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new,
2129     size_t newlen)
2130 {
2131 	int			 i, error = EINVAL;
2132 	u_char			 af;
2133 	struct walkarg		 w;
2134 	struct rt_tableinfo	 tableinfo;
2135 	u_int			 tableid = 0;
2136 
2137 	if (new)
2138 		return (EPERM);
2139 	if (namelen < 3 || namelen > 4)
2140 		return (EINVAL);
2141 	af = name[0];
2142 	bzero(&w, sizeof(w));
2143 	w.w_where = where;
2144 	w.w_given = *given;
2145 	w.w_needed = 0 - w.w_given;
2146 	w.w_op = name[1];
2147 	w.w_arg = name[2];
2148 
2149 	if (namelen == 4) {
2150 		tableid = name[3];
2151 		if (!rtable_exists(tableid))
2152 			return (ENOENT);
2153 	} else
2154 		tableid = curproc->p_p->ps_rtableid;
2155 
2156 	switch (w.w_op) {
2157 	case NET_RT_DUMP:
2158 	case NET_RT_FLAGS:
2159 		NET_LOCK();
2160 		for (i = 1; i <= AF_MAX; i++) {
2161 			if (af != 0 && af != i)
2162 				continue;
2163 
2164 			error = rtable_walk(tableid, i, NULL, sysctl_dumpentry,
2165 			    &w);
2166 			if (error == EAFNOSUPPORT)
2167 				error = 0;
2168 			if (error)
2169 				break;
2170 		}
2171 		NET_UNLOCK();
2172 		break;
2173 
2174 	case NET_RT_IFLIST:
2175 		NET_LOCK();
2176 		error = sysctl_iflist(af, &w);
2177 		NET_UNLOCK();
2178 		break;
2179 
2180 	case NET_RT_STATS:
2181 		return (sysctl_rtable_rtstat(where, given, new));
2182 	case NET_RT_TABLE:
2183 		tableid = w.w_arg;
2184 		if (!rtable_exists(tableid))
2185 			return (ENOENT);
2186 		memset(&tableinfo, 0, sizeof tableinfo);
2187 		tableinfo.rti_tableid = tableid;
2188 		tableinfo.rti_domainid = rtable_l2(tableid);
2189 		error = sysctl_rdstruct(where, given, new,
2190 		    &tableinfo, sizeof(tableinfo));
2191 		return (error);
2192 	case NET_RT_IFNAMES:
2193 		NET_LOCK();
2194 		error = sysctl_ifnames(&w);
2195 		NET_UNLOCK();
2196 		break;
2197 	case NET_RT_SOURCE:
2198 		tableid = w.w_arg;
2199 		if (!rtable_exists(tableid))
2200 			return (ENOENT);
2201 		NET_LOCK();
2202 		for (i = 1; i <= AF_MAX; i++) {
2203 			if (af != 0 && af != i)
2204 				continue;
2205 
2206 			error = sysctl_source(i, tableid, &w);
2207 			if (error == EAFNOSUPPORT)
2208 				error = 0;
2209 			if (error)
2210 				break;
2211 		}
2212 		NET_UNLOCK();
2213 		break;
2214 	}
2215 	free(w.w_tmem, M_RTABLE, w.w_tmemsize);
2216 	w.w_needed += w.w_given;
2217 	if (where) {
2218 		*given = w.w_where - (caddr_t)where;
2219 		if (*given < w.w_needed)
2220 			return (ENOMEM);
2221 	} else
2222 		*given = w.w_needed + w.w_needed / 10;
2223 
2224 	return (error);
2225 }
2226 
2227 int
2228 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp)
2229 {
2230 	extern struct cpumem *rtcounters;
2231 	uint64_t counters[rts_ncounters];
2232 	struct rtstat rtstat;
2233 	uint32_t *words = (uint32_t *)&rtstat;
2234 	int i;
2235 
2236 	CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t)));
2237 	memset(&rtstat, 0, sizeof rtstat);
2238 	counters_read(rtcounters, counters, nitems(counters));
2239 
2240 	for (i = 0; i < nitems(counters); i++)
2241 		words[i] = (uint32_t)counters[i];
2242 
2243 	return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat)));
2244 }
2245 
2246 int
2247 rtm_validate_proposal(struct rt_addrinfo *info)
2248 {
2249 	if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC |
2250 	    RTA_SEARCH)) {
2251 		return -1;
2252 	}
2253 
2254 	if (ISSET(info->rti_addrs, RTA_NETMASK)) {
2255 		struct sockaddr *sa = info->rti_info[RTAX_NETMASK];
2256 		if (sa == NULL)
2257 			return -1;
2258 		switch (sa->sa_family) {
2259 		case AF_INET:
2260 			if (sa->sa_len != sizeof(struct sockaddr_in))
2261 				return -1;
2262 			break;
2263 		case AF_INET6:
2264 			if (sa->sa_len != sizeof(struct sockaddr_in6))
2265 				return -1;
2266 			break;
2267 		default:
2268 			return -1;
2269 		}
2270 	}
2271 
2272 	if (ISSET(info->rti_addrs, RTA_IFA)) {
2273 		struct sockaddr *sa = info->rti_info[RTAX_IFA];
2274 		if (sa == NULL)
2275 			return -1;
2276 		switch (sa->sa_family) {
2277 		case AF_INET:
2278 			if (sa->sa_len != sizeof(struct sockaddr_in))
2279 				return -1;
2280 			break;
2281 		case AF_INET6:
2282 			if (sa->sa_len != sizeof(struct sockaddr_in6))
2283 				return -1;
2284 			break;
2285 		default:
2286 			return -1;
2287 		}
2288 	}
2289 
2290 	if (ISSET(info->rti_addrs, RTA_DNS)) {
2291 		struct sockaddr_rtdns *rtdns =
2292 		    (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS];
2293 		if (rtdns == NULL)
2294 			return -1;
2295 		if (rtdns->sr_len > sizeof(*rtdns))
2296 			return -1;
2297 		if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns))
2298 			return -1;
2299 		switch (rtdns->sr_family) {
2300 		case AF_INET:
2301 			if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2302 			    sr_dns)) % sizeof(struct in_addr) != 0)
2303 				return -1;
2304 			break;
2305 #ifdef INET6
2306 		case AF_INET6:
2307 			if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns,
2308 			    sr_dns)) % sizeof(struct in6_addr) != 0)
2309 				return -1;
2310 			break;
2311 #endif
2312 		default:
2313 			return -1;
2314 		}
2315 	}
2316 
2317 	if (ISSET(info->rti_addrs, RTA_STATIC)) {
2318 		struct sockaddr_rtstatic *rtstatic =
2319 		    (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC];
2320 		if (rtstatic == NULL)
2321 			return -1;
2322 		if (rtstatic->sr_len > sizeof(*rtstatic))
2323 			return -1;
2324 		if (rtstatic->sr_len <=
2325 		    offsetof(struct sockaddr_rtstatic, sr_static))
2326 			return -1;
2327 	}
2328 
2329 	if (ISSET(info->rti_addrs, RTA_SEARCH)) {
2330 		struct sockaddr_rtsearch *rtsearch =
2331 		    (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH];
2332 		if (rtsearch == NULL)
2333 			return -1;
2334 		if (rtsearch->sr_len > sizeof(*rtsearch))
2335 			return -1;
2336 		if (rtsearch->sr_len <=
2337 		    offsetof(struct sockaddr_rtsearch, sr_search))
2338 			return -1;
2339 	}
2340 
2341 	return 0;
2342 }
2343 
2344 int
2345 rt_setsource(unsigned int rtableid, struct sockaddr *src)
2346 {
2347 	struct ifaddr	*ifa;
2348 	int		error;
2349 	/*
2350 	 * If source address is 0.0.0.0 or ::
2351 	 * use automatic source selection
2352 	 */
2353 	switch(src->sa_family) {
2354 	case AF_INET:
2355 		if(satosin(src)->sin_addr.s_addr == INADDR_ANY) {
2356 			rtable_setsource(rtableid, AF_INET, NULL);
2357 			return (0);
2358 		}
2359 		break;
2360 #ifdef INET6
2361 	case AF_INET6:
2362 		if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) {
2363 			rtable_setsource(rtableid, AF_INET6, NULL);
2364 			return (0);
2365 		}
2366 		break;
2367 #endif
2368 	default:
2369 		return (EAFNOSUPPORT);
2370 	}
2371 
2372 	KERNEL_LOCK();
2373 	/*
2374 	 * Check if source address is assigned to an interface in the
2375 	 * same rdomain
2376 	 */
2377 	if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL) {
2378 		KERNEL_UNLOCK();
2379 		return (EINVAL);
2380 	}
2381 
2382 	error = rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr);
2383 	KERNEL_UNLOCK();
2384 
2385 	return (error);
2386 }
2387 
2388 /*
2389  * Definitions of protocols supported in the ROUTE domain.
2390  */
2391 
2392 const struct protosw routesw[] = {
2393 {
2394   .pr_type	= SOCK_RAW,
2395   .pr_domain	= &routedomain,
2396   .pr_flags	= PR_ATOMIC|PR_ADDR|PR_WANTRCVD,
2397   .pr_output	= route_output,
2398   .pr_ctloutput	= route_ctloutput,
2399   .pr_usrreq	= route_usrreq,
2400   .pr_attach	= route_attach,
2401   .pr_detach	= route_detach,
2402   .pr_init	= route_prinit,
2403   .pr_sysctl	= sysctl_rtable
2404 }
2405 };
2406 
2407 const struct domain routedomain = {
2408   .dom_family = PF_ROUTE,
2409   .dom_name = "route",
2410   .dom_init = route_init,
2411   .dom_protosw = routesw,
2412   .dom_protoswNPROTOSW = &routesw[nitems(routesw)]
2413 };
2414