xref: /dragonfly/sys/net/rtsock.c (revision b3e108b2)
1 /*
2  * Copyright (c) 1988, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
34  * $FreeBSD: src/sys/net/rtsock.c,v 1.44.2.11 2002/12/04 14:05:41 ru Exp $
35  * $DragonFly: src/sys/net/rtsock.c,v 1.17 2004/12/21 02:54:14 hsu Exp $
36  */
37 
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/sysctl.h>
43 #include <sys/proc.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/protosw.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/domain.h>
50 
51 #include <machine/stdarg.h>
52 
53 #include <net/if.h>
54 #include <net/route.h>
55 #include <net/raw_cb.h>
56 
57 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
58 
59 static struct route_cb {
60 	int	ip_count;
61 	int	ip6_count;
62 	int	ipx_count;
63 	int	ns_count;
64 	int	any_count;
65 } route_cb;
66 
67 static struct	sockaddr route_dst = { 2, PF_ROUTE, };
68 static struct	sockaddr route_src = { 2, PF_ROUTE, };
69 static struct	sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
70 static struct	sockproto route_proto = { PF_ROUTE, };
71 
72 struct walkarg {
73 	int	w_tmemsize;
74 	int	w_op, w_arg;
75 	caddr_t	w_tmem;
76 	struct sysctl_req *w_req;
77 };
78 
79 static struct mbuf *
80 		rt_msg1 (int, struct rt_addrinfo *);
81 static int	rt_msg2 (int, struct rt_addrinfo *, caddr_t, struct walkarg *);
82 static int	rt_xaddrs (char *, char *, struct rt_addrinfo *);
83 static int	sysctl_dumpentry (struct radix_node *rn, void *vw);
84 static int	sysctl_iflist (int af, struct walkarg *w);
85 static int	route_output(struct mbuf *, struct socket *, ...);
86 static void	rt_setmetrics (u_long, struct rt_metrics *,
87 			       struct rt_metrics *);
88 
89 /*
90  * It really doesn't make any sense at all for this code to share much
91  * with raw_usrreq.c, since its functionality is so restricted.  XXX
92  */
93 static int
94 rts_abort(struct socket *so)
95 {
96 	int s, error;
97 
98 	s = splnet();
99 	error = raw_usrreqs.pru_abort(so);
100 	splx(s);
101 	return error;
102 }
103 
104 /* pru_accept is EOPNOTSUPP */
105 
106 static int
107 rts_attach(struct socket *so, int proto, struct pru_attach_info *ai)
108 {
109 	struct rawcb *rp;
110 	int s, error;
111 
112 	if (sotorawcb(so) != NULL)
113 		return EISCONN;	/* XXX panic? */
114 
115 	MALLOC(rp, struct rawcb *, sizeof *rp, M_PCB, M_WAITOK|M_ZERO);
116 	if (rp == NULL)
117 		return ENOBUFS;
118 
119 	/*
120 	 * The splnet() is necessary to block protocols from sending
121 	 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
122 	 * this PCB is extant but incompletely initialized.
123 	 * Probably we should try to do more of this work beforehand and
124 	 * eliminate the spl.
125 	 */
126 	s = splnet();
127 	so->so_pcb = rp;
128 	error = raw_attach(so, proto, ai->sb_rlimit);
129 	rp = sotorawcb(so);
130 	if (error) {
131 		splx(s);
132 		free(rp, M_PCB);
133 		return error;
134 	}
135 	switch(rp->rcb_proto.sp_protocol) {
136 	case AF_INET:
137 		route_cb.ip_count++;
138 		break;
139 	case AF_INET6:
140 		route_cb.ip6_count++;
141 		break;
142 	case AF_IPX:
143 		route_cb.ipx_count++;
144 		break;
145 	case AF_NS:
146 		route_cb.ns_count++;
147 		break;
148 	}
149 	rp->rcb_faddr = &route_src;
150 	route_cb.any_count++;
151 	soisconnected(so);
152 	so->so_options |= SO_USELOOPBACK;
153 	splx(s);
154 	return 0;
155 }
156 
157 static int
158 rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
159 {
160 	int s, error;
161 
162 	s = splnet();
163 	error = raw_usrreqs.pru_bind(so, nam, td); /* xxx just EINVAL */
164 	splx(s);
165 	return error;
166 }
167 
168 static int
169 rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
170 {
171 	int s, error;
172 
173 	s = splnet();
174 	error = raw_usrreqs.pru_connect(so, nam, td); /* XXX just EINVAL */
175 	splx(s);
176 	return error;
177 }
178 
179 /* pru_connect2 is EOPNOTSUPP */
180 /* pru_control is EOPNOTSUPP */
181 
182 static int
183 rts_detach(struct socket *so)
184 {
185 	struct rawcb *rp = sotorawcb(so);
186 	int s, error;
187 
188 	s = splnet();
189 	if (rp != NULL) {
190 		switch(rp->rcb_proto.sp_protocol) {
191 		case AF_INET:
192 			route_cb.ip_count--;
193 			break;
194 		case AF_INET6:
195 			route_cb.ip6_count--;
196 			break;
197 		case AF_IPX:
198 			route_cb.ipx_count--;
199 			break;
200 		case AF_NS:
201 			route_cb.ns_count--;
202 			break;
203 		}
204 		route_cb.any_count--;
205 	}
206 	error = raw_usrreqs.pru_detach(so);
207 	splx(s);
208 	return error;
209 }
210 
211 static int
212 rts_disconnect(struct socket *so)
213 {
214 	int s, error;
215 
216 	s = splnet();
217 	error = raw_usrreqs.pru_disconnect(so);
218 	splx(s);
219 	return error;
220 }
221 
222 /* pru_listen is EOPNOTSUPP */
223 
224 static int
225 rts_peeraddr(struct socket *so, struct sockaddr **nam)
226 {
227 	int s, error;
228 
229 	s = splnet();
230 	error = raw_usrreqs.pru_peeraddr(so, nam);
231 	splx(s);
232 	return error;
233 }
234 
235 /* pru_rcvd is EOPNOTSUPP */
236 /* pru_rcvoob is EOPNOTSUPP */
237 
238 static int
239 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
240 	 struct mbuf *control, struct thread *td)
241 {
242 	int s, error;
243 
244 	s = splnet();
245 	error = raw_usrreqs.pru_send(so, flags, m, nam, control, td);
246 	splx(s);
247 	return error;
248 }
249 
250 /* pru_sense is null */
251 
252 static int
253 rts_shutdown(struct socket *so)
254 {
255 	int s, error;
256 
257 	s = splnet();
258 	error = raw_usrreqs.pru_shutdown(so);
259 	splx(s);
260 	return error;
261 }
262 
263 static int
264 rts_sockaddr(struct socket *so, struct sockaddr **nam)
265 {
266 	int s, error;
267 
268 	s = splnet();
269 	error = raw_usrreqs.pru_sockaddr(so, nam);
270 	splx(s);
271 	return error;
272 }
273 
274 static struct pr_usrreqs route_usrreqs = {
275 	rts_abort, pru_accept_notsupp, rts_attach, rts_bind, rts_connect,
276 	pru_connect2_notsupp, pru_control_notsupp, rts_detach, rts_disconnect,
277 	pru_listen_notsupp, rts_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp,
278 	rts_send, pru_sense_null, rts_shutdown, rts_sockaddr,
279 	sosend, soreceive, sopoll
280 };
281 
282 /*ARGSUSED*/
283 static int
284 route_output(struct mbuf *m, struct socket *so, ...)
285 {
286 	struct rt_msghdr *rtm = NULL;
287 	struct rtentry *rt = NULL;
288 	struct rtentry *saved_nrt = NULL;
289 	struct radix_node_head *rnh;
290 	struct ifnet *ifp = NULL;
291 	struct ifaddr *ifa = NULL;
292 	struct rawcb *rp = NULL;
293 	struct pr_output_info *oi;
294 	struct rt_addrinfo info;
295 	int len, error = 0;
296 	__va_list ap;
297 
298 	__va_start(ap, so);
299 	oi = __va_arg(ap, struct pr_output_info *);
300 	__va_end(ap);
301 
302 #define gotoerr(e) { error = e; goto flush;}
303 	if (m == NULL || ((m->m_len < sizeof(long)) &&
304 		       (m = m_pullup(m, sizeof(long))) == NULL))
305 		return (ENOBUFS);
306 	if (!(m->m_flags & M_PKTHDR))
307 		panic("route_output");
308 	len = m->m_pkthdr.len;
309 	if (len < sizeof(*rtm) ||
310 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
311 		info.sa_dst = NULL;
312 		gotoerr(EINVAL);
313 	}
314 	R_Malloc(rtm, struct rt_msghdr *, len);
315 	if (rtm == NULL) {
316 		info.sa_dst = NULL;
317 		gotoerr(ENOBUFS);
318 	}
319 	m_copydata(m, 0, len, (caddr_t)rtm);
320 	if (rtm->rtm_version != RTM_VERSION) {
321 		info.sa_dst = NULL;
322 		gotoerr(EPROTONOSUPPORT);
323 	}
324 	rtm->rtm_pid = oi->p_pid;
325 	bzero(&info, sizeof(info));
326 	info.rti_addrs = rtm->rtm_addrs;
327 	if (rt_xaddrs((char *)(rtm + 1), len + (char *)rtm, &info)) {
328 		info.sa_dst = NULL;
329 		gotoerr(EINVAL);
330 	}
331 	info.rti_flags = rtm->rtm_flags;
332 	if (info.sa_dst == NULL || info.sa_dst->sa_family >= AF_MAX ||
333 	    (info.sa_gateway != NULL && (info.sa_gateway->sa_family >= AF_MAX)))
334 		gotoerr(EINVAL);
335 
336 	if (info.sa_genmask != NULL) {
337 		struct radix_node *t;
338 		int klen;
339 
340 		t = rn_addmask((char *)info.sa_genmask, TRUE, 1);
341 		if (t != NULL &&
342 		    info.sa_genmask->sa_len >= (klen = *(u_char *)t->rn_key) &&
343 		    bcmp((char *)info.sa_genmask + 1, (char *)t->rn_key + 1,
344 		         klen - 1) == 0)
345 			info.sa_genmask = (struct sockaddr *)(t->rn_key);
346 		else
347 			gotoerr(ENOBUFS);
348 	}
349 
350 	/*
351 	 * Verify that the caller has the appropriate privilege; RTM_GET
352 	 * is the only operation the non-superuser is allowed.
353 	 */
354 	if (rtm->rtm_type != RTM_GET && suser_cred(so->so_cred, 0) != 0)
355 		gotoerr(EPERM);
356 
357 	switch (rtm->rtm_type) {
358 
359 	case RTM_ADD:
360 		if (info.sa_gateway == NULL)
361 			gotoerr(EINVAL);
362 		error = rtrequest1(RTM_ADD, &info, &saved_nrt);
363 		if (error == 0 && saved_nrt != NULL) {
364 			rt_setmetrics(rtm->rtm_inits,
365 				&rtm->rtm_rmx, &saved_nrt->rt_rmx);
366 			saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
367 			saved_nrt->rt_rmx.rmx_locks |=
368 				(rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
369 			saved_nrt->rt_refcnt--;
370 			saved_nrt->rt_genmask = info.sa_genmask;
371 		}
372 		break;
373 
374 	case RTM_DELETE:
375 		error = rtrequest1(RTM_DELETE, &info, &saved_nrt);
376 		if (error == 0) {
377 			if ((rt = saved_nrt))
378 				rt->rt_refcnt++;
379 			goto report;
380 		}
381 		break;
382 
383 	case RTM_GET:
384 	case RTM_CHANGE:
385 	case RTM_LOCK:
386 		if ((rnh = rt_tables[info.sa_dst->sa_family]) == NULL) {
387 			gotoerr(EAFNOSUPPORT);
388 		} else if ((rt = (struct rtentry *) rnh->rnh_lookup(
389 		    (char *)info.sa_dst, (char *)info.sa_netmask, rnh)) != NULL)
390 			rt->rt_refcnt++;
391 		else
392 			gotoerr(ESRCH);
393 		switch(rtm->rtm_type) {
394 
395 		case RTM_GET:
396 		report:
397 			info.sa_dst = rt_key(rt);
398 			info.sa_gateway = rt->rt_gateway;
399 			info.sa_netmask = rt_mask(rt);
400 			info.sa_genmask = rt->rt_genmask;
401 			if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
402 				ifp = rt->rt_ifp;
403 				if (ifp) {
404 					info.sa_ifpaddr =
405 					    TAILQ_FIRST(&ifp->if_addrhead)->
406 						ifa_addr;
407 					info.sa_ifaaddr = rt->rt_ifa->ifa_addr;
408 					if (ifp->if_flags & IFF_POINTOPOINT)
409 						info.sa_bcastaddr =
410 						    rt->rt_ifa->ifa_dstaddr;
411 					rtm->rtm_index = ifp->if_index;
412 				} else {
413 					info.sa_ifpaddr = NULL;
414 					info.sa_ifaaddr = NULL;
415 			    }
416 			}
417 			len = rt_msg2(rtm->rtm_type, &info, NULL, NULL);
418 			if (len > rtm->rtm_msglen) {
419 				struct rt_msghdr *new_rtm;
420 				R_Malloc(new_rtm, struct rt_msghdr *, len);
421 				if (new_rtm == NULL)
422 					gotoerr(ENOBUFS);
423 				bcopy(rtm, new_rtm, rtm->rtm_msglen);
424 				Free(rtm); rtm = new_rtm;
425 			}
426 			rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
427 			rtm->rtm_flags = rt->rt_flags;
428 			rtm->rtm_rmx = rt->rt_rmx;
429 			rtm->rtm_addrs = info.rti_addrs;
430 			break;
431 
432 		case RTM_CHANGE:
433 			/*
434 			 * new gateway could require new ifaddr, ifp;
435 			 * flags may also be different; ifp may be specified
436 			 * by ll sockaddr when protocol address is ambiguous
437 			 */
438 			if (((rt->rt_flags & RTF_GATEWAY) &&
439 			     info.sa_gateway != NULL) ||
440 			    info.sa_ifpaddr != NULL ||
441 			    (info.sa_ifaaddr != NULL &&
442 			     bcmp(info.sa_ifaaddr, rt->rt_ifa->ifa_addr,
443 				  info.sa_ifaaddr->sa_len) == 0)) {
444 				if ((error = rt_getifa(&info)) != 0)
445 					gotoerr(error);
446 			}
447 			if (info.sa_gateway != NULL &&
448 			    (error = rt_setgate(rt, rt_key(rt),
449 						info.sa_gateway)) != 0)
450 				gotoerr(error);
451 			if ((ifa = info.rti_ifa) != NULL) {
452 				struct ifaddr *oifa = rt->rt_ifa;
453 
454 				if (oifa != ifa) {
455 					if (oifa && oifa->ifa_rtrequest)
456 						oifa->ifa_rtrequest(RTM_DELETE,
457 								    rt, &info);
458 					IFAFREE(rt->rt_ifa);
459 					rt->rt_ifa = ifa;
460 					IFAREF(ifa);
461 					rt->rt_ifp = info.rti_ifp;
462 				}
463 			}
464 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
465 			    &rt->rt_rmx);
466 			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
467 			       rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
468 			if (info.sa_genmask != NULL)
469 				rt->rt_genmask = info.sa_genmask;
470 			/*
471 			 * Fall into
472 			 */
473 		case RTM_LOCK:
474 			rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
475 			rt->rt_rmx.rmx_locks |=
476 				(rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
477 			break;
478 		}
479 		break;
480 
481 	default:
482 		gotoerr(EOPNOTSUPP);
483 	}
484 
485 flush:
486 	if (rtm) {
487 		if (error)
488 			rtm->rtm_errno = error;
489 		else
490 			rtm->rtm_flags |= RTF_DONE;
491 	}
492 	if (rt)
493 		rtfree(rt);
494 	/*
495 	 * Check to see if we don't want our own messages.
496 	 */
497 	if (!(so->so_options & SO_USELOOPBACK)) {
498 		if (route_cb.any_count <= 1) {
499 			if (rtm)
500 				Free(rtm);
501 			m_freem(m);
502 			return (error);
503 		}
504 		/* There is another listener, so construct message */
505 		rp = sotorawcb(so);
506 	}
507 	if (rtm) {
508 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
509 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
510 			m_freem(m);
511 			m = NULL;
512 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
513 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
514 		Free(rtm);
515 	}
516 	if (rp != NULL)
517 		rp->rcb_proto.sp_family = 0; /* Avoid us */
518 	if (info.sa_dst != NULL)
519 		route_proto.sp_protocol = info.sa_dst->sa_family;
520 	if (m != NULL)
521 		raw_input(m, &route_proto, &route_src, &route_dst);
522 	if (rp != NULL)
523 		rp->rcb_proto.sp_family = PF_ROUTE;
524 	return (error);
525 }
526 
527 static void
528 rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics *out)
529 {
530 #define setmetric(flag, elt) if (which & (flag)) out->elt = in->elt;
531 	setmetric(RTV_RPIPE, rmx_recvpipe);
532 	setmetric(RTV_SPIPE, rmx_sendpipe);
533 	setmetric(RTV_SSTHRESH, rmx_ssthresh);
534 	setmetric(RTV_RTT, rmx_rtt);
535 	setmetric(RTV_RTTVAR, rmx_rttvar);
536 	setmetric(RTV_HOPCOUNT, rmx_hopcount);
537 	setmetric(RTV_MTU, rmx_mtu);
538 	setmetric(RTV_EXPIRE, rmx_expire);
539 #undef setmetric
540 }
541 
542 #define ROUNDUP(a) \
543 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
544 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
545 
546 /*
547  * Extract the addresses of the passed sockaddrs.
548  * Do a little sanity checking so as to avoid bad memory references.
549  * This data is derived straight from userland.
550  */
551 static int
552 rt_xaddrs(char *cp, char *cplim, struct rt_addrinfo *rtinfo)
553 {
554 	struct sockaddr *sa;
555 	int i;
556 
557 	for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
558 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
559 			continue;
560 		sa = (struct sockaddr *)cp;
561 		/*
562 		 * It won't fit.
563 		 */
564 		if ( (cp + sa->sa_len) > cplim ) {
565 			return (EINVAL);
566 		}
567 
568 		/*
569 		 * There are no more...  Quit now.
570 		 * If there are more bits, they are in error.
571 		 * I've seen this.  route(1) can evidently generate these.
572 		 * This causes kernel to core dump.
573 		 * For compatibility, if we see this, point to a safe address.
574 		 */
575 		if (sa->sa_len == 0) {
576 			rtinfo->rti_info[i] = &sa_zero;
577 			return (0); /* should be EINVAL but for compat */
578 		}
579 
580 		/* Accept the sockaddr. */
581 		rtinfo->rti_info[i] = sa;
582 		ADVANCE(cp, sa);
583 	}
584 	return (0);
585 }
586 
587 static struct mbuf *
588 rt_msg1(type, rtinfo)
589 	int type;
590 	struct rt_addrinfo *rtinfo;
591 {
592 	struct rt_msghdr *rtm;
593 	struct mbuf *m;
594 	int i;
595 	struct sockaddr *sa;
596 	int len, dlen;
597 
598 	switch (type) {
599 
600 	case RTM_DELADDR:
601 	case RTM_NEWADDR:
602 		len = sizeof(struct ifa_msghdr);
603 		break;
604 
605 	case RTM_DELMADDR:
606 	case RTM_NEWMADDR:
607 		len = sizeof(struct ifma_msghdr);
608 		break;
609 
610 	case RTM_IFINFO:
611 		len = sizeof(struct if_msghdr);
612 		break;
613 
614 	case RTM_IFANNOUNCE:
615 		len = sizeof(struct if_announcemsghdr);
616 		break;
617 
618 	default:
619 		len = sizeof(struct rt_msghdr);
620 	}
621 	if (len > MCLBYTES)
622 		panic("rt_msg1");
623 	m = m_gethdr(MB_DONTWAIT, MT_DATA);
624 	if (m && len > MHLEN) {
625 		MCLGET(m, MB_DONTWAIT);
626 		if (!(m->m_flags & M_EXT)) {
627 			m_free(m);
628 			m = NULL;
629 		}
630 	}
631 	if (m == NULL)
632 		return (m);
633 	m->m_pkthdr.len = m->m_len = len;
634 	m->m_pkthdr.rcvif = NULL;
635 	rtm = mtod(m, struct rt_msghdr *);
636 	bzero(rtm, len);
637 	for (i = 0; i < RTAX_MAX; i++) {
638 		if ((sa = rtinfo->rti_info[i]) == NULL)
639 			continue;
640 		rtinfo->rti_addrs |= (1 << i);
641 		dlen = ROUNDUP(sa->sa_len);
642 		m_copyback(m, len, dlen, (caddr_t)sa);
643 		len += dlen;
644 	}
645 	if (m->m_pkthdr.len != len) {
646 		m_freem(m);
647 		return (NULL);
648 	}
649 	rtm->rtm_msglen = len;
650 	rtm->rtm_version = RTM_VERSION;
651 	rtm->rtm_type = type;
652 	return (m);
653 }
654 
655 static int
656 rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
657 {
658 	int i;
659 	int len, dlen;
660 	boolean_t second_time = FALSE;
661 	caddr_t cp0;
662 
663 	rtinfo->rti_addrs = NULL;
664 again:
665 	switch (type) {
666 
667 	case RTM_DELADDR:
668 	case RTM_NEWADDR:
669 		len = sizeof(struct ifa_msghdr);
670 		break;
671 
672 	case RTM_IFINFO:
673 		len = sizeof(struct if_msghdr);
674 		break;
675 
676 	default:
677 		len = sizeof(struct rt_msghdr);
678 	}
679 	cp0 = cp;
680 	if (cp != NULL)
681 		cp += len;
682 
683 	for (i = 0; i < RTAX_MAX; i++) {
684 		struct sockaddr *sa;
685 
686 		if ((sa = rtinfo->rti_info[i]) == NULL)
687 			continue;
688 		rtinfo->rti_addrs |= (1 << i);
689 		dlen = ROUNDUP(sa->sa_len);
690 		if (cp != NULL) {
691 			bcopy(sa, cp, dlen);
692 			cp += dlen;
693 		}
694 		len += dlen;
695 	}
696 	len = ALIGN(len);
697 	if (cp == NULL && w != NULL && !second_time) {
698 		struct walkarg *rw = w;
699 
700 		if (rw->w_req != NULL) {
701 			if (rw->w_tmemsize < len) {
702 				if (rw->w_tmem)
703 					free(rw->w_tmem, M_RTABLE);
704 				rw->w_tmem = malloc(len, M_RTABLE,
705 						    M_INTWAIT | M_NULLOK);
706 				if (rw->w_tmem)
707 					rw->w_tmemsize = len;
708 			}
709 			if (rw->w_tmem != NULL) {
710 				cp = rw->w_tmem;
711 				second_time = TRUE;
712 				goto again;
713 			}
714 		}
715 	}
716 	if (cp != NULL) {
717 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
718 
719 		rtm->rtm_version = RTM_VERSION;
720 		rtm->rtm_type = type;
721 		rtm->rtm_msglen = len;
722 	}
723 	return (len);
724 }
725 
726 /*
727  * This routine is called to generate a message from the routing
728  * socket indicating that a redirect has occurred, a routing lookup
729  * has failed, or that a protocol has detected timeouts to a particular
730  * destination.
731  */
732 void
733 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
734 {
735 	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
736 	struct rt_msghdr *rtm;
737 	struct mbuf *m;
738 
739 	if (route_cb.any_count == 0)
740 		return;
741 	m = rt_msg1(type, rtinfo);
742 	if (m == NULL)
743 		return;
744 	rtm = mtod(m, struct rt_msghdr *);
745 	rtm->rtm_flags = RTF_DONE | flags;
746 	rtm->rtm_errno = error;
747 	rtm->rtm_addrs = rtinfo->rti_addrs;
748 	route_proto.sp_protocol = sa ? sa->sa_family : 0;
749 	raw_input(m, &route_proto, &route_src, &route_dst);
750 }
751 
752 /*
753  * This routine is called to generate a message from the routing
754  * socket indicating that the status of a network interface has changed.
755  */
756 void
757 rt_ifmsg(ifp)
758 	struct ifnet *ifp;
759 {
760 	struct if_msghdr *ifm;
761 	struct mbuf *m;
762 	struct rt_addrinfo info;
763 
764 	if (route_cb.any_count == 0)
765 		return;
766 	bzero(&info, sizeof(info));
767 	m = rt_msg1(RTM_IFINFO, &info);
768 	if (m == NULL)
769 		return;
770 	ifm = mtod(m, struct if_msghdr *);
771 	ifm->ifm_index = ifp->if_index;
772 	ifm->ifm_flags = (u_short)ifp->if_flags;
773 	ifm->ifm_data = ifp->if_data;
774 	ifm->ifm_addrs = NULL;
775 	route_proto.sp_protocol = 0;
776 	raw_input(m, &route_proto, &route_src, &route_dst);
777 }
778 
779 static void
780 rt_ifamsg(int cmd, struct ifaddr *ifa)
781 {
782 	struct ifa_msghdr *ifam;
783 	struct rt_addrinfo info;
784 	struct mbuf *m;
785 	struct sockaddr *sa;
786 	struct ifnet *ifp = ifa->ifa_ifp;
787 
788 	bzero(&info, sizeof(info));
789 	info.sa_ifaaddr = sa = ifa->ifa_addr;
790 	info.sa_ifpaddr = TAILQ_FIRST(&ifp->if_addrhead)->ifa_addr;
791 	info.sa_netmask = ifa->ifa_netmask;
792 	info.sa_bcastaddr = ifa->ifa_dstaddr;
793 
794 	m = rt_msg1(cmd, &info);
795 	if (m == NULL)
796 		return;
797 
798 	ifam = mtod(m, struct ifa_msghdr *);
799 	ifam->ifam_index = ifp->if_index;
800 	ifam->ifam_metric = ifa->ifa_metric;
801 	ifam->ifam_flags = ifa->ifa_flags;
802 	ifam->ifam_addrs = info.rti_addrs;
803 
804 	route_proto.sp_protocol = sa ? sa->sa_family : 0;
805 
806 	raw_input(m, &route_proto, &route_src, &route_dst);
807 }
808 
809 static void
810 rt_rtmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
811 {
812 	struct rt_msghdr *rtm;
813 	struct rt_addrinfo info;
814 	struct mbuf *m;
815 	struct sockaddr *sa;
816 	struct ifnet *ifp = ifa->ifa_ifp;
817 
818 	if (rt == NULL)
819 		return;
820 
821 	bzero(&info, sizeof(info));
822 	info.sa_netmask = rt_mask(rt);
823 	info.sa_dst = sa = rt_key(rt);
824 	info.sa_gateway = rt->rt_gateway;
825 
826 	m = rt_msg1(cmd, &info);
827 	if (m == NULL)
828 		return;
829 
830 	rtm = mtod(m, struct rt_msghdr *);
831 	rtm->rtm_index = ifp->if_index;
832 	rtm->rtm_flags |= rt->rt_flags;
833 	rtm->rtm_errno = error;
834 	rtm->rtm_addrs = info.rti_addrs;
835 
836 	route_proto.sp_protocol = sa ? sa->sa_family : 0;
837 
838 	raw_input(m, &route_proto, &route_src, &route_dst);
839 }
840 
841 /*
842  * This is called to generate messages from the routing socket
843  * indicating a network interface has had addresses associated with it.
844  * if we ever reverse the logic and replace messages TO the routing
845  * socket indicate a request to configure interfaces, then it will
846  * be unnecessary as the routing socket will automatically generate
847  * copies of it.
848  */
849 void
850 rt_newaddrmsg(cmd, ifa, error, rt)
851 	int cmd, error;
852 	struct ifaddr *ifa;
853 	struct rtentry *rt;
854 {
855 	if (route_cb.any_count == 0)
856 		return;
857 
858 	if (cmd == RTM_ADD) {
859 		rt_ifamsg(RTM_NEWADDR, ifa);
860 		rt_rtmsg(RTM_ADD, ifa, error, rt);
861 	} else {
862 		KASSERT((cmd == RTM_DELETE), ("unknown cmd %d", cmd));
863 		rt_rtmsg(RTM_DELETE, ifa, error, rt);
864 		rt_ifamsg(RTM_DELADDR, ifa);
865 	}
866 }
867 
868 /*
869  * This is the analogue to the rt_newaddrmsg which performs the same
870  * function but for multicast group memberhips.  This is easier since
871  * there is no route state to worry about.
872  */
873 void
874 rt_newmaddrmsg(cmd, ifma)
875 	int cmd;
876 	struct ifmultiaddr *ifma;
877 {
878 	struct rt_addrinfo info;
879 	struct mbuf *m = NULL;
880 	struct ifnet *ifp = ifma->ifma_ifp;
881 	struct ifma_msghdr *ifmam;
882 
883 	if (route_cb.any_count == 0)
884 		return;
885 
886 	bzero(&info, sizeof(info));
887 	info.sa_ifaaddr = ifma->ifma_addr;
888 	if (ifp != NULL && TAILQ_FIRST(&ifp->if_addrhead) != NULL)
889 		info.sa_ifpaddr = TAILQ_FIRST(&ifp->if_addrhead)->ifa_addr;
890 	else
891 		info.sa_ifpaddr = NULL;
892 	/*
893 	 * If a link-layer address is present, present it as a ``gateway''
894 	 * (similarly to how ARP entries, e.g., are presented).
895 	 */
896 	info.sa_gateway = ifma->ifma_lladdr;
897 
898 	m = rt_msg1(cmd, &info);
899 	if (m == NULL)
900 		return;
901 
902 	ifmam = mtod(m, struct ifma_msghdr *);
903 	ifmam->ifmam_index = ifp->if_index;
904 	ifmam->ifmam_addrs = info.rti_addrs;
905 	route_proto.sp_protocol = ifma->ifma_addr->sa_family;
906 
907 	raw_input(m, &route_proto, &route_src, &route_dst);
908 }
909 
910 /*
911  * This is called to generate routing socket messages indicating
912  * network interface arrival and departure.
913  */
914 void
915 rt_ifannouncemsg(ifp, what)
916 	struct ifnet *ifp;
917 	int what;
918 {
919 	struct if_announcemsghdr *ifan;
920 	struct mbuf *m;
921 	struct rt_addrinfo info;
922 
923 	if (route_cb.any_count == 0)
924 		return;
925 
926 	bzero(&info, sizeof(info));
927 
928 	m = rt_msg1(RTM_IFANNOUNCE, &info);
929 	if (m == NULL)
930 		return;
931 
932 	ifan = mtod(m, struct if_announcemsghdr *);
933 	ifan->ifan_index = ifp->if_index;
934 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
935 	ifan->ifan_what = what;
936 
937 	route_proto.sp_protocol = 0;
938 
939 	raw_input(m, &route_proto, &route_src, &route_dst);
940  }
941 
942 /*
943  * This is used in dumping the kernel table via sysctl().
944  */
945 int
946 sysctl_dumpentry(rn, vw)
947 	struct radix_node *rn;
948 	void *vw;
949 {
950 	struct walkarg *w = vw;
951 	struct rtentry *rt = (struct rtentry *)rn;
952 	int error = 0, size;
953 	struct rt_addrinfo info;
954 
955 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
956 		return 0;
957 
958 	bzero(&info, sizeof(info));
959 	info.sa_dst = rt_key(rt);
960 	info.sa_gateway = rt->rt_gateway;
961 	info.sa_netmask = rt_mask(rt);
962 	info.sa_genmask = rt->rt_genmask;
963 	if (rt->rt_ifp != NULL) {
964 		info.sa_ifpaddr =
965 		    TAILQ_FIRST(&rt->rt_ifp->if_addrhead)->ifa_addr;
966 		info.sa_ifaaddr = rt->rt_ifa->ifa_addr;
967 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
968 			info.sa_bcastaddr = rt->rt_ifa->ifa_dstaddr;
969 	}
970 	size = rt_msg2(RTM_GET, &info, NULL, w);
971 	if (w->w_req != NULL && w->w_tmem != NULL) {
972 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
973 
974 		rtm->rtm_flags = rt->rt_flags;
975 		rtm->rtm_use = rt->rt_use;
976 		rtm->rtm_rmx = rt->rt_rmx;
977 		rtm->rtm_index = rt->rt_ifp->if_index;
978 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
979 		rtm->rtm_addrs = info.rti_addrs;
980 		error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
981 		return (error);
982 	}
983 	return (error);
984 }
985 
986 int
987 sysctl_iflist(af, w)
988 	int	af;
989 	struct	walkarg *w;
990 {
991 	struct ifnet *ifp;
992 	struct ifaddr *ifa;
993 	struct	rt_addrinfo info;
994 	int	len, error = 0;
995 
996 	bzero(&info, sizeof(info));
997 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
998 		if (w->w_arg && w->w_arg != ifp->if_index)
999 			continue;
1000 		ifa = TAILQ_FIRST(&ifp->if_addrhead);
1001 		info.sa_ifpaddr = ifa->ifa_addr;
1002 		len = rt_msg2(RTM_IFINFO, &info, NULL, w);
1003 		info.sa_ifpaddr = NULL;
1004 		if (w->w_req != NULL && w->w_tmem != NULL) {
1005 			struct if_msghdr *ifm;
1006 
1007 			ifm = (struct if_msghdr *)w->w_tmem;
1008 			ifm->ifm_index = ifp->if_index;
1009 			ifm->ifm_flags = (u_short)ifp->if_flags;
1010 			ifm->ifm_data = ifp->if_data;
1011 			ifm->ifm_addrs = info.rti_addrs;
1012 			error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len);
1013 			if (error)
1014 				return (error);
1015 		}
1016 		while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) {
1017 			if (af && af != ifa->ifa_addr->sa_family)
1018 				continue;
1019 			if (curproc->p_ucred->cr_prison && prison_if(curthread, ifa->ifa_addr))
1020 				continue;
1021 			info.sa_ifaaddr = ifa->ifa_addr;
1022 			info.sa_netmask = ifa->ifa_netmask;
1023 			info.sa_bcastaddr = ifa->ifa_dstaddr;
1024 			len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
1025 			if (w->w_req && w->w_tmem) {
1026 				struct ifa_msghdr *ifam;
1027 
1028 				ifam = (struct ifa_msghdr *)w->w_tmem;
1029 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1030 				ifam->ifam_flags = ifa->ifa_flags;
1031 				ifam->ifam_metric = ifa->ifa_metric;
1032 				ifam->ifam_addrs = info.rti_addrs;
1033 				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1034 				if (error)
1035 					return (error);
1036 			}
1037 		}
1038 		info.sa_netmask = info.sa_ifaaddr = info.sa_bcastaddr = NULL;
1039 	}
1040 	return (0);
1041 }
1042 
1043 static int
1044 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
1045 {
1046 	int	*name = (int *)arg1;
1047 	u_int	namelen = arg2;
1048 	struct radix_node_head *rnh;
1049 	int	i, s, error = EINVAL;
1050 	u_char  af;
1051 	struct	walkarg w;
1052 
1053 	name ++;
1054 	namelen--;
1055 	if (req->newptr)
1056 		return (EPERM);
1057 	if (namelen != 3)
1058 		return (EINVAL);
1059 	af = name[0];
1060 	bzero(&w, sizeof(w));
1061 	w.w_op = name[1];
1062 	w.w_arg = name[2];
1063 	w.w_req = req;
1064 
1065 	s = splnet();
1066 	switch (w.w_op) {
1067 
1068 	case NET_RT_DUMP:
1069 	case NET_RT_FLAGS:
1070 		for (i = 1; i <= AF_MAX; i++)
1071 			if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
1072 			    (error = rnh->rnh_walktree(rnh,
1073 						       sysctl_dumpentry, &w)))
1074 				break;
1075 		break;
1076 
1077 	case NET_RT_IFLIST:
1078 		error = sysctl_iflist(af, &w);
1079 	}
1080 	splx(s);
1081 	if (w.w_tmem)
1082 		free(w.w_tmem, M_RTABLE);
1083 	return (error);
1084 }
1085 
1086 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
1087 
1088 /*
1089  * Definitions of protocols supported in the ROUTE domain.
1090  */
1091 
1092 extern struct domain routedomain;		/* or at least forward */
1093 
1094 static struct protosw routesw[] = {
1095 { SOCK_RAW,	&routedomain,	0,		PR_ATOMIC|PR_ADDR,
1096   0,		route_output,	raw_ctlinput,	0,
1097   cpu0_soport,
1098   raw_init,	0,		0,		0,
1099   &route_usrreqs
1100 }
1101 };
1102 
1103 static struct domain routedomain =
1104     { PF_ROUTE, "route", 0, 0, 0,
1105       routesw, &routesw[sizeof(routesw)/sizeof(routesw[0])] };
1106 
1107 DOMAIN_SET(route);
1108