xref: /dragonfly/sys/net/rtsock.c (revision 82ece171)
1 /*
2  * Copyright (c) 2004, 2005 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Jeffrey M. Hsu.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 2004, 2005 Jeffrey M. Hsu.  All rights reserved.
35  *
36  * License terms: all terms for the DragonFly license above plus the following:
37  *
38  * 4. All advertising materials mentioning features or use of this software
39  *    must display the following acknowledgement:
40  *
41  *	This product includes software developed by Jeffrey M. Hsu
42  *	for the DragonFly Project.
43  *
44  *    This requirement may be waived with permission from Jeffrey Hsu.
45  *    Permission will be granted to any DragonFly user for free.
46  *    This requirement will sunset and may be removed on Jan 31, 2006,
47  *    after which the standard DragonFly license (as shown above) will
48  *    apply.
49  */
50 
51 /*
52  * Copyright (c) 1988, 1991, 1993
53  *	The Regents of the University of California.  All rights reserved.
54  *
55  * Redistribution and use in source and binary forms, with or without
56  * modification, are permitted provided that the following conditions
57  * are met:
58  * 1. Redistributions of source code must retain the above copyright
59  *    notice, this list of conditions and the following disclaimer.
60  * 2. Redistributions in binary form must reproduce the above copyright
61  *    notice, this list of conditions and the following disclaimer in the
62  *    documentation and/or other materials provided with the distribution.
63  * 3. All advertising materials mentioning features or use of this software
64  *    must display the following acknowledgement:
65  *	This product includes software developed by the University of
66  *	California, Berkeley and its contributors.
67  * 4. Neither the name of the University nor the names of its contributors
68  *    may be used to endorse or promote products derived from this software
69  *    without specific prior written permission.
70  *
71  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
72  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
73  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
74  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
75  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
76  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
77  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
78  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
79  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
80  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
81  * SUCH DAMAGE.
82  *
83  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
84  * $FreeBSD: src/sys/net/rtsock.c,v 1.44.2.11 2002/12/04 14:05:41 ru Exp $
85  * $DragonFly: src/sys/net/rtsock.c,v 1.28 2005/06/14 19:47:30 joerg Exp $
86  */
87 
88 #include <sys/param.h>
89 #include <sys/systm.h>
90 #include <sys/kernel.h>
91 #include <sys/sysctl.h>
92 #include <sys/proc.h>
93 #include <sys/malloc.h>
94 #include <sys/mbuf.h>
95 #include <sys/protosw.h>
96 #include <sys/socket.h>
97 #include <sys/socketvar.h>
98 #include <sys/domain.h>
99 #include <sys/thread2.h>
100 
101 #include <net/if.h>
102 #include <net/route.h>
103 #include <net/raw_cb.h>
104 
105 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
106 
107 static struct route_cb {
108 	int	ip_count;
109 	int	ip6_count;
110 	int	ipx_count;
111 	int	ns_count;
112 	int	any_count;
113 } route_cb;
114 
115 static const struct sockaddr route_src = { 2, PF_ROUTE, };
116 
117 struct walkarg {
118 	int	w_tmemsize;
119 	int	w_op, w_arg;
120 	void	*w_tmem;
121 	struct sysctl_req *w_req;
122 };
123 
124 static struct mbuf *
125 		rt_msg_mbuf (int, struct rt_addrinfo *);
126 static void	rt_msg_buffer (int, struct rt_addrinfo *, void *buf, int len);
127 static int	rt_msgsize (int type, struct rt_addrinfo *rtinfo);
128 static int	rt_xaddrs (char *, char *, struct rt_addrinfo *);
129 static int	sysctl_dumpentry (struct radix_node *rn, void *vw);
130 static int	sysctl_iflist (int af, struct walkarg *w);
131 static int	route_output(struct mbuf *, struct socket *, ...);
132 static void	rt_setmetrics (u_long, struct rt_metrics *,
133 			       struct rt_metrics *);
134 
135 /*
136  * It really doesn't make any sense at all for this code to share much
137  * with raw_usrreq.c, since its functionality is so restricted.  XXX
138  */
139 static int
140 rts_abort(struct socket *so)
141 {
142 	int error;
143 
144 	crit_enter();
145 	error = raw_usrreqs.pru_abort(so);
146 	crit_exit();
147 	return error;
148 }
149 
150 /* pru_accept is EOPNOTSUPP */
151 
152 static int
153 rts_attach(struct socket *so, int proto, struct pru_attach_info *ai)
154 {
155 	struct rawcb *rp;
156 	int error;
157 
158 	if (sotorawcb(so) != NULL)
159 		return EISCONN;	/* XXX panic? */
160 
161 	rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
162 	if (rp == NULL)
163 		return ENOBUFS;
164 
165 	/*
166 	 * The critical section is necessary to block protocols from sending
167 	 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
168 	 * this PCB is extant but incompletely initialized.
169 	 * Probably we should try to do more of this work beforehand and
170 	 * eliminate the critical section.
171 	 */
172 	crit_enter();
173 	so->so_pcb = rp;
174 	error = raw_attach(so, proto, ai->sb_rlimit);
175 	rp = sotorawcb(so);
176 	if (error) {
177 		crit_exit();
178 		free(rp, M_PCB);
179 		return error;
180 	}
181 	switch(rp->rcb_proto.sp_protocol) {
182 	case AF_INET:
183 		route_cb.ip_count++;
184 		break;
185 	case AF_INET6:
186 		route_cb.ip6_count++;
187 		break;
188 	case AF_IPX:
189 		route_cb.ipx_count++;
190 		break;
191 	case AF_NS:
192 		route_cb.ns_count++;
193 		break;
194 	}
195 	rp->rcb_faddr = &route_src;
196 	route_cb.any_count++;
197 	soisconnected(so);
198 	so->so_options |= SO_USELOOPBACK;
199 	crit_exit();
200 	return 0;
201 }
202 
203 static int
204 rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
205 {
206 	int error;
207 
208 	crit_enter();
209 	error = raw_usrreqs.pru_bind(so, nam, td); /* xxx just EINVAL */
210 	crit_exit();
211 	return error;
212 }
213 
214 static int
215 rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
216 {
217 	int error;
218 
219 	crit_enter();
220 	error = raw_usrreqs.pru_connect(so, nam, td); /* XXX just EINVAL */
221 	crit_exit();
222 	return error;
223 }
224 
225 /* pru_connect2 is EOPNOTSUPP */
226 /* pru_control is EOPNOTSUPP */
227 
228 static int
229 rts_detach(struct socket *so)
230 {
231 	struct rawcb *rp = sotorawcb(so);
232 	int error;
233 
234 	crit_enter();
235 	if (rp != NULL) {
236 		switch(rp->rcb_proto.sp_protocol) {
237 		case AF_INET:
238 			route_cb.ip_count--;
239 			break;
240 		case AF_INET6:
241 			route_cb.ip6_count--;
242 			break;
243 		case AF_IPX:
244 			route_cb.ipx_count--;
245 			break;
246 		case AF_NS:
247 			route_cb.ns_count--;
248 			break;
249 		}
250 		route_cb.any_count--;
251 	}
252 	error = raw_usrreqs.pru_detach(so);
253 	crit_exit();
254 	return error;
255 }
256 
257 static int
258 rts_disconnect(struct socket *so)
259 {
260 	int error;
261 
262 	crit_enter();
263 	error = raw_usrreqs.pru_disconnect(so);
264 	crit_exit();
265 	return error;
266 }
267 
268 /* pru_listen is EOPNOTSUPP */
269 
270 static int
271 rts_peeraddr(struct socket *so, struct sockaddr **nam)
272 {
273 	int error;
274 
275 	crit_enter();
276 	error = raw_usrreqs.pru_peeraddr(so, nam);
277 	crit_exit();
278 	return error;
279 }
280 
281 /* pru_rcvd is EOPNOTSUPP */
282 /* pru_rcvoob is EOPNOTSUPP */
283 
284 static int
285 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
286 	 struct mbuf *control, struct thread *td)
287 {
288 	int error;
289 
290 	crit_enter();
291 	error = raw_usrreqs.pru_send(so, flags, m, nam, control, td);
292 	crit_exit();
293 	return error;
294 }
295 
296 /* pru_sense is null */
297 
298 static int
299 rts_shutdown(struct socket *so)
300 {
301 	int error;
302 
303 	crit_enter();
304 	error = raw_usrreqs.pru_shutdown(so);
305 	crit_exit();
306 	return error;
307 }
308 
309 static int
310 rts_sockaddr(struct socket *so, struct sockaddr **nam)
311 {
312 	int error;
313 
314 	crit_enter();
315 	error = raw_usrreqs.pru_sockaddr(so, nam);
316 	crit_exit();
317 	return error;
318 }
319 
320 static struct pr_usrreqs route_usrreqs = {
321 	rts_abort, pru_accept_notsupp, rts_attach, rts_bind, rts_connect,
322 	pru_connect2_notsupp, pru_control_notsupp, rts_detach, rts_disconnect,
323 	pru_listen_notsupp, rts_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp,
324 	rts_send, pru_sense_null, rts_shutdown, rts_sockaddr,
325 	sosend, soreceive, sopoll
326 };
327 
328 static __inline sa_family_t
329 familyof(struct sockaddr *sa)
330 {
331 	return (sa != NULL ? sa->sa_family : 0);
332 }
333 
334 static void
335 rts_input(struct mbuf *m, sa_family_t family)
336 {
337 	static const struct sockaddr route_dst = { 2, PF_ROUTE, };
338 	struct sockproto route_proto = { PF_ROUTE, family };
339 
340 	raw_input(m, &route_proto, &route_src, &route_dst);
341 }
342 
343 static void *
344 reallocbuf(void *ptr, size_t len, size_t olen)
345 {
346 	void *newptr;
347 
348 	newptr = malloc(len, M_RTABLE, M_INTWAIT | M_NULLOK);
349 	if (newptr == NULL)
350 		return NULL;
351 	bcopy(ptr, newptr, olen);
352 	free(ptr, M_RTABLE);
353 	return (newptr);
354 }
355 
356 /*
357  * Internal helper routine for route_output().
358  */
359 static int
360 fillrtmsg(struct rt_msghdr **prtm, struct rtentry *rt,
361 	  struct rt_addrinfo *rtinfo)
362 {
363 	int msglen;
364 	struct rt_msghdr *rtm = *prtm;
365 
366 	/* Fill in rt_addrinfo for call to rt_msg_buffer(). */
367 	rtinfo->rti_dst = rt_key(rt);
368 	rtinfo->rti_gateway = rt->rt_gateway;
369 	rtinfo->rti_netmask = rt_mask(rt);		/* might be NULL */
370 	rtinfo->rti_genmask = rt->rt_genmask;		/* might be NULL */
371 	if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
372 		if (rt->rt_ifp != NULL) {
373 			rtinfo->rti_ifpaddr =
374 			    TAILQ_FIRST(&rt->rt_ifp->if_addrhead)->ifa_addr;
375 			rtinfo->rti_ifaaddr = rt->rt_ifa->ifa_addr;
376 			if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
377 				rtinfo->rti_bcastaddr = rt->rt_ifa->ifa_dstaddr;
378 			rtm->rtm_index = rt->rt_ifp->if_index;
379 		} else {
380 			rtinfo->rti_ifpaddr = NULL;
381 			rtinfo->rti_ifaaddr = NULL;
382 	    }
383 	}
384 
385 	msglen = rt_msgsize(rtm->rtm_type, rtinfo);
386 	if (rtm->rtm_msglen < msglen) {
387 		rtm = reallocbuf(rtm, msglen, rtm->rtm_msglen);
388 		if (rtm == NULL)
389 			return (ENOBUFS);
390 		*prtm = rtm;
391 	}
392 	rt_msg_buffer(rtm->rtm_type, rtinfo, rtm, msglen);
393 
394 	rtm->rtm_flags = rt->rt_flags;
395 	rtm->rtm_rmx = rt->rt_rmx;
396 	rtm->rtm_addrs = rtinfo->rti_addrs;
397 
398 	return (0);
399 }
400 
401 /*ARGSUSED*/
402 static int
403 route_output(struct mbuf *m, struct socket *so, ...)
404 {
405 	struct rt_msghdr *rtm = NULL;
406 	struct rtentry *rt = NULL;
407 	struct rtentry *saved_nrt = NULL;
408 	struct radix_node_head *rnh;
409 	struct ifaddr *ifa = NULL;
410 	struct rawcb *rp = NULL;
411 	struct pr_output_info *oi;
412 	struct rt_addrinfo rtinfo;
413 	int len, error = 0;
414 	__va_list ap;
415 
416 	__va_start(ap, so);
417 	oi = __va_arg(ap, struct pr_output_info *);
418 	__va_end(ap);
419 
420 #define gotoerr(e) { error = e; goto flush;}
421 
422 	if (m == NULL ||
423 	    (m->m_len < sizeof(long) &&
424 	     (m = m_pullup(m, sizeof(long))) == NULL))
425 		return (ENOBUFS);
426 	if (!(m->m_flags & M_PKTHDR))
427 		panic("route_output");
428 	len = m->m_pkthdr.len;
429 	if (len < sizeof(struct rt_msghdr) ||
430 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
431 		rtinfo.rti_dst = NULL;
432 		gotoerr(EINVAL);
433 	}
434 	rtm = malloc(len, M_RTABLE, M_INTWAIT | M_NULLOK);
435 	if (rtm == NULL) {
436 		rtinfo.rti_dst = NULL;
437 		gotoerr(ENOBUFS);
438 	}
439 	m_copydata(m, 0, len, (caddr_t)rtm);
440 	if (rtm->rtm_version != RTM_VERSION) {
441 		rtinfo.rti_dst = NULL;
442 		gotoerr(EPROTONOSUPPORT);
443 	}
444 	rtm->rtm_pid = oi->p_pid;
445 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
446 	rtinfo.rti_addrs = rtm->rtm_addrs;
447 	if (rt_xaddrs((char *)(rtm + 1), (char *)rtm + len, &rtinfo) != 0) {
448 		rtinfo.rti_dst = NULL;
449 		gotoerr(EINVAL);
450 	}
451 	rtinfo.rti_flags = rtm->rtm_flags;
452 	if (rtinfo.rti_dst == NULL || rtinfo.rti_dst->sa_family >= AF_MAX ||
453 	    (rtinfo.rti_gateway && rtinfo.rti_gateway->sa_family >= AF_MAX))
454 		gotoerr(EINVAL);
455 
456 	if (rtinfo.rti_genmask != NULL) {
457 		struct radix_node *n;
458 
459 #define	clen(s)	(*(u_char *)(s))
460 		n = rn_addmask((char *)rtinfo.rti_genmask, TRUE, 1);
461 		if (n != NULL &&
462 		    rtinfo.rti_genmask->sa_len >= clen(n->rn_key) &&
463 		    bcmp((char *)rtinfo.rti_genmask + 1,
464 		         (char *)n->rn_key + 1, clen(n->rn_key) - 1) == 0)
465 			rtinfo.rti_genmask = (struct sockaddr *)n->rn_key;
466 		else
467 			gotoerr(ENOBUFS);
468 	}
469 
470 	/*
471 	 * Verify that the caller has the appropriate privilege; RTM_GET
472 	 * is the only operation the non-superuser is allowed.
473 	 */
474 	if (rtm->rtm_type != RTM_GET && suser_cred(so->so_cred, 0) != 0)
475 		gotoerr(EPERM);
476 
477 	switch (rtm->rtm_type) {
478 	case RTM_ADD:
479 		if (rtinfo.rti_gateway == NULL)
480 			gotoerr(EINVAL);
481 		error = rtrequest1(RTM_ADD, &rtinfo, &saved_nrt);
482 		if (error == 0 && saved_nrt != NULL) {
483 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
484 			    &saved_nrt->rt_rmx);
485 			saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
486 			saved_nrt->rt_rmx.rmx_locks |=
487 			    (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
488 			--saved_nrt->rt_refcnt;
489 			saved_nrt->rt_genmask = rtinfo.rti_genmask;
490 		}
491 		break;
492 	case RTM_DELETE:
493 		error = rtrequest1(RTM_DELETE, &rtinfo, &saved_nrt);
494 		if (error == 0) {
495 			if ((rt = saved_nrt))
496 				rt->rt_refcnt++;
497 			if (fillrtmsg(&rtm, rt, &rtinfo) != 0)
498 				gotoerr(ENOBUFS);
499 		}
500 		break;
501 	case RTM_GET:
502 	case RTM_CHANGE:
503 	case RTM_LOCK:
504 		if ((rnh = rt_tables[rtinfo.rti_dst->sa_family]) == NULL)
505 			gotoerr(EAFNOSUPPORT);
506 		rt = (struct rtentry *)
507 		    rnh->rnh_lookup((char *)rtinfo.rti_dst,
508 		    		    (char *)rtinfo.rti_netmask, rnh);
509 		if (rt == NULL)
510 			gotoerr(ESRCH);
511 		rt->rt_refcnt++;
512 
513 		switch(rtm->rtm_type) {
514 		case RTM_GET:
515 			if (fillrtmsg(&rtm, rt, &rtinfo) != 0)
516 				gotoerr(ENOBUFS);
517 			break;
518 		case RTM_CHANGE:
519 			/*
520 			 * new gateway could require new ifaddr, ifp;
521 			 * flags may also be different; ifp may be specified
522 			 * by ll sockaddr when protocol address is ambiguous
523 			 */
524 			if (((rt->rt_flags & RTF_GATEWAY) &&
525 			     rtinfo.rti_gateway != NULL) ||
526 			    rtinfo.rti_ifpaddr != NULL ||
527 			    (rtinfo.rti_ifaaddr != NULL &&
528 			     sa_equal(rtinfo.rti_ifaaddr,
529 			     	      rt->rt_ifa->ifa_addr))) {
530 				error = rt_getifa(&rtinfo);
531 				if (error != 0)
532 					gotoerr(error);
533 			}
534 			if (rtinfo.rti_gateway != NULL) {
535 				error = rt_setgate(rt, rt_key(rt),
536 						   rtinfo.rti_gateway);
537 				if (error != 0)
538 					gotoerr(error);
539 			}
540 			if ((ifa = rtinfo.rti_ifa) != NULL) {
541 				struct ifaddr *oifa = rt->rt_ifa;
542 
543 				if (oifa != ifa) {
544 					if (oifa && oifa->ifa_rtrequest)
545 						oifa->ifa_rtrequest(RTM_DELETE,
546 						    rt, &rtinfo);
547 					IFAFREE(rt->rt_ifa);
548 					IFAREF(ifa);
549 					rt->rt_ifa = ifa;
550 					rt->rt_ifp = rtinfo.rti_ifp;
551 				}
552 			}
553 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
554 				      &rt->rt_rmx);
555 			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
556 			       rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &rtinfo);
557 			if (rtinfo.rti_genmask != NULL)
558 				rt->rt_genmask = rtinfo.rti_genmask;
559 			/*
560 			 * Fall into
561 			 */
562 		case RTM_LOCK:
563 			rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
564 			rt->rt_rmx.rmx_locks |=
565 				(rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
566 			break;
567 		}
568 
569 		break;
570 	default:
571 		gotoerr(EOPNOTSUPP);
572 	}
573 
574 flush:
575 	if (rtm != NULL) {
576 		if (error != 0)
577 			rtm->rtm_errno = error;
578 		else
579 			rtm->rtm_flags |= RTF_DONE;
580 	}
581 	if (rt != NULL)
582 		rtfree(rt);
583 	/*
584 	 * Check to see if we don't want our own messages.
585 	 */
586 	if (!(so->so_options & SO_USELOOPBACK)) {
587 		if (route_cb.any_count <= 1) {
588 			if (rtm != NULL)
589 				free(rtm, M_RTABLE);
590 			m_freem(m);
591 			return (error);
592 		}
593 		/* There is another listener, so construct message */
594 		rp = sotorawcb(so);
595 	}
596 	if (rtm != NULL) {
597 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
598 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
599 			m_freem(m);
600 			m = NULL;
601 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
602 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
603 		free(rtm, M_RTABLE);
604 	}
605 	if (rp != NULL)
606 		rp->rcb_proto.sp_family = 0; /* Avoid us */
607 	if (m != NULL)
608 		rts_input(m, familyof(rtinfo.rti_dst));
609 	if (rp != NULL)
610 		rp->rcb_proto.sp_family = PF_ROUTE;
611 	return (error);
612 }
613 
614 static void
615 rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics *out)
616 {
617 #define setmetric(flag, elt) if (which & (flag)) out->elt = in->elt;
618 	setmetric(RTV_RPIPE, rmx_recvpipe);
619 	setmetric(RTV_SPIPE, rmx_sendpipe);
620 	setmetric(RTV_SSTHRESH, rmx_ssthresh);
621 	setmetric(RTV_RTT, rmx_rtt);
622 	setmetric(RTV_RTTVAR, rmx_rttvar);
623 	setmetric(RTV_HOPCOUNT, rmx_hopcount);
624 	setmetric(RTV_MTU, rmx_mtu);
625 	setmetric(RTV_EXPIRE, rmx_expire);
626 #undef setmetric
627 }
628 
629 #define ROUNDUP(a) \
630 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
631 
632 /*
633  * Extract the addresses of the passed sockaddrs.
634  * Do a little sanity checking so as to avoid bad memory references.
635  * This data is derived straight from userland.
636  */
637 static int
638 rt_xaddrs(char *cp, char *cplim, struct rt_addrinfo *rtinfo)
639 {
640 	struct sockaddr *sa;
641 	int i;
642 
643 	for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
644 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
645 			continue;
646 		sa = (struct sockaddr *)cp;
647 		/*
648 		 * It won't fit.
649 		 */
650 		if ((cp + sa->sa_len) > cplim) {
651 			return (EINVAL);
652 		}
653 
654 		/*
655 		 * There are no more...  Quit now.
656 		 * If there are more bits, they are in error.
657 		 * I've seen this.  route(1) can evidently generate these.
658 		 * This causes kernel to core dump.
659 		 * For compatibility, if we see this, point to a safe address.
660 		 */
661 		if (sa->sa_len == 0) {
662 			static struct sockaddr sa_zero = {
663 				sizeof sa_zero, AF_INET,
664 			};
665 
666 			rtinfo->rti_info[i] = &sa_zero;
667 			return (0); /* should be EINVAL but for compat */
668 		}
669 
670 		/* Accept the sockaddr. */
671 		rtinfo->rti_info[i] = sa;
672 		cp += ROUNDUP(sa->sa_len);
673 	}
674 	return (0);
675 }
676 
677 static int
678 rt_msghdrsize(int type)
679 {
680 	switch (type) {
681 	case RTM_DELADDR:
682 	case RTM_NEWADDR:
683 		return sizeof(struct ifa_msghdr);
684 	case RTM_DELMADDR:
685 	case RTM_NEWMADDR:
686 		return sizeof(struct ifma_msghdr);
687 	case RTM_IFINFO:
688 		return sizeof(struct if_msghdr);
689 	case RTM_IFANNOUNCE:
690 		return sizeof(struct if_announcemsghdr);
691 	default:
692 		return sizeof(struct rt_msghdr);
693 	}
694 }
695 
696 static int
697 rt_msgsize(int type, struct rt_addrinfo *rtinfo)
698 {
699 	int len, i;
700 
701 	len = rt_msghdrsize(type);
702 	for (i = 0; i < RTAX_MAX; i++) {
703 		if (rtinfo->rti_info[i] != NULL)
704 			len += ROUNDUP(rtinfo->rti_info[i]->sa_len);
705 	}
706 	len = ALIGN(len);
707 	return len;
708 }
709 
710 /*
711  * Build a routing message in a buffer.
712  * Copy the addresses in the rtinfo->rti_info[] sockaddr array
713  * to the end of the buffer after the message header.
714  *
715  * Set the rtinfo->rti_addrs bitmask of addresses present in rtinfo->rti_info[].
716  * This side-effect can be avoided if we reorder the addrs bitmask field in all
717  * the route messages to line up so we can set it here instead of back in the
718  * calling routine.
719  */
720 static void
721 rt_msg_buffer(int type, struct rt_addrinfo *rtinfo, void *buf, int msglen)
722 {
723 	struct rt_msghdr *rtm;
724 	char *cp;
725 	int dlen, i;
726 
727 	rtm = (struct rt_msghdr *) buf;
728 	rtm->rtm_version = RTM_VERSION;
729 	rtm->rtm_type = type;
730 	rtm->rtm_msglen = msglen;
731 
732 	cp = (char *)buf + rt_msghdrsize(type);
733 	rtinfo->rti_addrs = 0;
734 	for (i = 0; i < RTAX_MAX; i++) {
735 		struct sockaddr *sa;
736 
737 		if ((sa = rtinfo->rti_info[i]) == NULL)
738 			continue;
739 		rtinfo->rti_addrs |= (1 << i);
740 		dlen = ROUNDUP(sa->sa_len);
741 		bcopy(sa, cp, dlen);
742 		cp += dlen;
743 	}
744 }
745 
746 /*
747  * Build a routing message in a mbuf chain.
748  * Copy the addresses in the rtinfo->rti_info[] sockaddr array
749  * to the end of the mbuf after the message header.
750  *
751  * Set the rtinfo->rti_addrs bitmask of addresses present in rtinfo->rti_info[].
752  * This side-effect can be avoided if we reorder the addrs bitmask field in all
753  * the route messages to line up so we can set it here instead of back in the
754  * calling routine.
755  */
756 static struct mbuf *
757 rt_msg_mbuf(int type, struct rt_addrinfo *rtinfo)
758 {
759 	struct mbuf *m;
760 	struct rt_msghdr *rtm;
761 	int hlen, len;
762 	int i;
763 
764 	hlen = rt_msghdrsize(type);
765 	KASSERT(hlen <= MCLBYTES, ("rt_msg_mbuf: hlen %d doesn't fit", hlen));
766 
767 	m = m_getl(hlen, MB_DONTWAIT, MT_DATA, M_PKTHDR, NULL);
768 	if (m == NULL)
769 		return (NULL);
770 	m->m_pkthdr.len = m->m_len = hlen;
771 	m->m_pkthdr.rcvif = NULL;
772 	rtinfo->rti_addrs = 0;
773 	len = hlen;
774 	for (i = 0; i < RTAX_MAX; i++) {
775 		struct sockaddr *sa;
776 		int dlen;
777 
778 		if ((sa = rtinfo->rti_info[i]) == NULL)
779 			continue;
780 		rtinfo->rti_addrs |= (1 << i);
781 		dlen = ROUNDUP(sa->sa_len);
782 		m_copyback(m, len, dlen, (caddr_t)sa); /* can grow mbuf chain */
783 		len += dlen;
784 	}
785 	if (m->m_pkthdr.len != len) { /* one of the m_copyback() calls failed */
786 		m_freem(m);
787 		return (NULL);
788 	}
789 	rtm = mtod(m, struct rt_msghdr *);
790 	bzero(rtm, hlen);
791 	rtm->rtm_msglen = len;
792 	rtm->rtm_version = RTM_VERSION;
793 	rtm->rtm_type = type;
794 	return (m);
795 }
796 
797 /*
798  * This routine is called to generate a message from the routing
799  * socket indicating that a redirect has occurred, a routing lookup
800  * has failed, or that a protocol has detected timeouts to a particular
801  * destination.
802  */
803 void
804 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
805 {
806 	struct sockaddr *dst = rtinfo->rti_info[RTAX_DST];
807 	struct rt_msghdr *rtm;
808 	struct mbuf *m;
809 
810 	if (route_cb.any_count == 0)
811 		return;
812 	m = rt_msg_mbuf(type, rtinfo);
813 	if (m == NULL)
814 		return;
815 	rtm = mtod(m, struct rt_msghdr *);
816 	rtm->rtm_flags = RTF_DONE | flags;
817 	rtm->rtm_errno = error;
818 	rtm->rtm_addrs = rtinfo->rti_addrs;
819 	rts_input(m, familyof(dst));
820 }
821 
822 void
823 rt_dstmsg(int type, struct sockaddr *dst, int error)
824 {
825 	struct rt_msghdr *rtm;
826 	struct rt_addrinfo addrs;
827 	struct mbuf *m;
828 
829 	if (route_cb.any_count == 0)
830 		return;
831 	bzero(&addrs, sizeof(struct rt_addrinfo));
832 	addrs.rti_info[RTAX_DST] = dst;
833 	m = rt_msg_mbuf(type, &addrs);
834 	if (m == NULL)
835 		return;
836 	rtm = mtod(m, struct rt_msghdr *);
837 	rtm->rtm_flags = RTF_DONE;
838 	rtm->rtm_errno = error;
839 	rtm->rtm_addrs = addrs.rti_addrs;
840 	rts_input(m, familyof(dst));
841 }
842 
843 /*
844  * This routine is called to generate a message from the routing
845  * socket indicating that the status of a network interface has changed.
846  */
847 void
848 rt_ifmsg(struct ifnet *ifp)
849 {
850 	struct if_msghdr *ifm;
851 	struct mbuf *m;
852 	struct rt_addrinfo rtinfo;
853 
854 	if (route_cb.any_count == 0)
855 		return;
856 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
857 	m = rt_msg_mbuf(RTM_IFINFO, &rtinfo);
858 	if (m == NULL)
859 		return;
860 	ifm = mtod(m, struct if_msghdr *);
861 	ifm->ifm_index = ifp->if_index;
862 	ifm->ifm_flags = ifp->if_flags;
863 	ifm->ifm_data = ifp->if_data;
864 	ifm->ifm_addrs = 0;
865 	rts_input(m, 0);
866 }
867 
868 static void
869 rt_ifamsg(int cmd, struct ifaddr *ifa)
870 {
871 	struct ifa_msghdr *ifam;
872 	struct rt_addrinfo rtinfo;
873 	struct mbuf *m;
874 	struct ifnet *ifp = ifa->ifa_ifp;
875 
876 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
877 	rtinfo.rti_ifaaddr = ifa->ifa_addr;
878 	rtinfo.rti_ifpaddr = TAILQ_FIRST(&ifp->if_addrhead)->ifa_addr;
879 	rtinfo.rti_netmask = ifa->ifa_netmask;
880 	rtinfo.rti_bcastaddr = ifa->ifa_dstaddr;
881 
882 	m = rt_msg_mbuf(cmd, &rtinfo);
883 	if (m == NULL)
884 		return;
885 
886 	ifam = mtod(m, struct ifa_msghdr *);
887 	ifam->ifam_index = ifp->if_index;
888 	ifam->ifam_metric = ifa->ifa_metric;
889 	ifam->ifam_flags = ifa->ifa_flags;
890 	ifam->ifam_addrs = rtinfo.rti_addrs;
891 
892 	rts_input(m, familyof(ifa->ifa_addr));
893 }
894 
895 void
896 rt_rtmsg(int cmd, struct rtentry *rt, struct ifnet *ifp, int error)
897 {
898 	struct rt_msghdr *rtm;
899 	struct rt_addrinfo rtinfo;
900 	struct mbuf *m;
901 	struct sockaddr *dst;
902 
903 	if (rt == NULL)
904 		return;
905 
906 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
907 	rtinfo.rti_dst = dst = rt_key(rt);
908 	rtinfo.rti_gateway = rt->rt_gateway;
909 	rtinfo.rti_netmask = rt_mask(rt);
910 	if (ifp != NULL)
911 		rtinfo.rti_ifpaddr = TAILQ_FIRST(&ifp->if_addrhead)->ifa_addr;
912 	rtinfo.rti_ifaaddr = rt->rt_ifa->ifa_addr;
913 
914 	m = rt_msg_mbuf(cmd, &rtinfo);
915 	if (m == NULL)
916 		return;
917 
918 	rtm = mtod(m, struct rt_msghdr *);
919 	if (ifp != NULL)
920 		rtm->rtm_index = ifp->if_index;
921 	rtm->rtm_flags |= rt->rt_flags;
922 	rtm->rtm_errno = error;
923 	rtm->rtm_addrs = rtinfo.rti_addrs;
924 
925 	rts_input(m, familyof(dst));
926 }
927 
928 /*
929  * This is called to generate messages from the routing socket
930  * indicating a network interface has had addresses associated with it.
931  * if we ever reverse the logic and replace messages TO the routing
932  * socket indicate a request to configure interfaces, then it will
933  * be unnecessary as the routing socket will automatically generate
934  * copies of it.
935  */
936 void
937 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
938 {
939 	if (route_cb.any_count == 0)
940 		return;
941 
942 	if (cmd == RTM_ADD) {
943 		rt_ifamsg(RTM_NEWADDR, ifa);
944 		rt_rtmsg(RTM_ADD, rt, ifa->ifa_ifp, error);
945 	} else {
946 		KASSERT((cmd == RTM_DELETE), ("unknown cmd %d", cmd));
947 		rt_rtmsg(RTM_DELETE, rt, ifa->ifa_ifp, error);
948 		rt_ifamsg(RTM_DELADDR, ifa);
949 	}
950 }
951 
952 /*
953  * This is the analogue to the rt_newaddrmsg which performs the same
954  * function but for multicast group memberhips.  This is easier since
955  * there is no route state to worry about.
956  */
957 void
958 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
959 {
960 	struct rt_addrinfo rtinfo;
961 	struct mbuf *m = NULL;
962 	struct ifnet *ifp = ifma->ifma_ifp;
963 	struct ifma_msghdr *ifmam;
964 
965 	if (route_cb.any_count == 0)
966 		return;
967 
968 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
969 	rtinfo.rti_ifaaddr = ifma->ifma_addr;
970 	if (ifp != NULL && !TAILQ_EMPTY(&ifp->if_addrhead))
971 		rtinfo.rti_ifpaddr = TAILQ_FIRST(&ifp->if_addrhead)->ifa_addr;
972 	/*
973 	 * If a link-layer address is present, present it as a ``gateway''
974 	 * (similarly to how ARP entries, e.g., are presented).
975 	 */
976 	rtinfo.rti_gateway = ifma->ifma_lladdr;
977 
978 	m = rt_msg_mbuf(cmd, &rtinfo);
979 	if (m == NULL)
980 		return;
981 
982 	ifmam = mtod(m, struct ifma_msghdr *);
983 	ifmam->ifmam_index = ifp->if_index;
984 	ifmam->ifmam_addrs = rtinfo.rti_addrs;
985 
986 	rts_input(m, familyof(ifma->ifma_addr));
987 }
988 
989 /*
990  * This is called to generate routing socket messages indicating
991  * network interface arrival and departure.
992  */
993 void
994 rt_ifannouncemsg(struct ifnet *ifp, int what)
995 {
996 	struct rt_addrinfo addrinfo;
997 	struct mbuf *m;
998 	struct if_announcemsghdr *ifan;
999 
1000 	if (route_cb.any_count == 0)
1001 		return;
1002 
1003 	bzero(&addrinfo, sizeof addrinfo);
1004 	m = rt_msg_mbuf(RTM_IFANNOUNCE, &addrinfo);
1005 	if (m == NULL)
1006 		return;
1007 
1008 	ifan = mtod(m, struct if_announcemsghdr *);
1009 	ifan->ifan_index = ifp->if_index;
1010 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof ifan->ifan_name);
1011 	ifan->ifan_what = what;
1012 
1013 	rts_input(m, 0);
1014 }
1015 
1016 static int
1017 resizewalkarg(struct walkarg *w, int len)
1018 {
1019 	void *newptr;
1020 
1021 	newptr = malloc(len, M_RTABLE, M_INTWAIT | M_NULLOK);
1022 	if (newptr == NULL)
1023 		return (ENOMEM);
1024 	if (w->w_tmem != NULL)
1025 		free(w->w_tmem, M_RTABLE);
1026 	w->w_tmem = newptr;
1027 	w->w_tmemsize = len;
1028 	return (0);
1029 }
1030 
1031 /*
1032  * This is used in dumping the kernel table via sysctl().
1033  */
1034 int
1035 sysctl_dumpentry(struct radix_node *rn, void *vw)
1036 {
1037 	struct walkarg *w = vw;
1038 	struct rtentry *rt = (struct rtentry *)rn;
1039 	struct rt_addrinfo rtinfo;
1040 	int error, msglen;
1041 
1042 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1043 		return 0;
1044 
1045 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
1046 	rtinfo.rti_dst = rt_key(rt);
1047 	rtinfo.rti_gateway = rt->rt_gateway;
1048 	rtinfo.rti_netmask = rt_mask(rt);
1049 	rtinfo.rti_genmask = rt->rt_genmask;
1050 	if (rt->rt_ifp != NULL) {
1051 		rtinfo.rti_ifpaddr =
1052 		    TAILQ_FIRST(&rt->rt_ifp->if_addrhead)->ifa_addr;
1053 		rtinfo.rti_ifaaddr = rt->rt_ifa->ifa_addr;
1054 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
1055 			rtinfo.rti_bcastaddr = rt->rt_ifa->ifa_dstaddr;
1056 	}
1057 	msglen = rt_msgsize(RTM_GET, &rtinfo);
1058 	if (w->w_tmemsize < msglen && resizewalkarg(w, msglen) != 0)
1059 		return (ENOMEM);
1060 	rt_msg_buffer(RTM_GET, &rtinfo, w->w_tmem, msglen);
1061 	if (w->w_req != NULL) {
1062 		struct rt_msghdr *rtm = w->w_tmem;
1063 
1064 		rtm->rtm_flags = rt->rt_flags;
1065 		rtm->rtm_use = rt->rt_use;
1066 		rtm->rtm_rmx = rt->rt_rmx;
1067 		rtm->rtm_index = rt->rt_ifp->if_index;
1068 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
1069 		rtm->rtm_addrs = rtinfo.rti_addrs;
1070 		error = SYSCTL_OUT(w->w_req, rtm, msglen);
1071 		return (error);
1072 	}
1073 	return (0);
1074 }
1075 
1076 static int
1077 sysctl_iflist(int af, struct walkarg *w)
1078 {
1079 	struct ifnet *ifp;
1080 	struct ifaddr *ifa;
1081 	struct rt_addrinfo rtinfo;
1082 	int msglen, error;
1083 
1084 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
1085 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1086 		if (w->w_arg && w->w_arg != ifp->if_index)
1087 			continue;
1088 		ifa = TAILQ_FIRST(&ifp->if_addrhead);
1089 		rtinfo.rti_ifpaddr = ifa->ifa_addr;
1090 		msglen = rt_msgsize(RTM_IFINFO, &rtinfo);
1091 		if (w->w_tmemsize < msglen && resizewalkarg(w, msglen) != 0)
1092 			return (ENOMEM);
1093 		rt_msg_buffer(RTM_IFINFO, &rtinfo, w->w_tmem, msglen);
1094 		rtinfo.rti_ifpaddr = NULL;
1095 		if (w->w_req != NULL && w->w_tmem != NULL) {
1096 			struct if_msghdr *ifm = w->w_tmem;
1097 
1098 			ifm->ifm_index = ifp->if_index;
1099 			ifm->ifm_flags = ifp->if_flags;
1100 			ifm->ifm_data = ifp->if_data;
1101 			ifm->ifm_addrs = rtinfo.rti_addrs;
1102 			error = SYSCTL_OUT(w->w_req, ifm, msglen);
1103 			if (error)
1104 				return (error);
1105 		}
1106 		while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) {
1107 			if (af && af != ifa->ifa_addr->sa_family)
1108 				continue;
1109 			if (curproc->p_ucred->cr_prison &&
1110 			    prison_if(curthread, ifa->ifa_addr))
1111 				continue;
1112 			rtinfo.rti_ifaaddr = ifa->ifa_addr;
1113 			rtinfo.rti_netmask = ifa->ifa_netmask;
1114 			rtinfo.rti_bcastaddr = ifa->ifa_dstaddr;
1115 			msglen = rt_msgsize(RTM_NEWADDR, &rtinfo);
1116 			if (w->w_tmemsize < msglen &&
1117 			    resizewalkarg(w, msglen) != 0)
1118 				return (ENOMEM);
1119 			rt_msg_buffer(RTM_NEWADDR, &rtinfo, w->w_tmem, msglen);
1120 			if (w->w_req != NULL) {
1121 				struct ifa_msghdr *ifam = w->w_tmem;
1122 
1123 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1124 				ifam->ifam_flags = ifa->ifa_flags;
1125 				ifam->ifam_metric = ifa->ifa_metric;
1126 				ifam->ifam_addrs = rtinfo.rti_addrs;
1127 				error = SYSCTL_OUT(w->w_req, w->w_tmem, msglen);
1128 				if (error)
1129 					return (error);
1130 			}
1131 		}
1132 		rtinfo.rti_netmask = NULL;
1133 		rtinfo.rti_ifaaddr = NULL;
1134 		rtinfo.rti_bcastaddr = NULL;
1135 	}
1136 	return (0);
1137 }
1138 
1139 static int
1140 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
1141 {
1142 	int	*name = (int *)arg1;
1143 	u_int	namelen = arg2;
1144 	struct radix_node_head *rnh;
1145 	int	i, error = EINVAL;
1146 	u_char  af;
1147 	struct	walkarg w;
1148 
1149 	name ++;
1150 	namelen--;
1151 	if (req->newptr)
1152 		return (EPERM);
1153 	if (namelen != 3)
1154 		return (EINVAL);
1155 	af = name[0];
1156 	bzero(&w, sizeof w);
1157 	w.w_op = name[1];
1158 	w.w_arg = name[2];
1159 	w.w_req = req;
1160 
1161 	crit_enter();
1162 	switch (w.w_op) {
1163 
1164 	case NET_RT_DUMP:
1165 	case NET_RT_FLAGS:
1166 		for (i = 1; i <= AF_MAX; i++)
1167 			if ((rnh = rt_tables[i]) && (af == 0 || af == i) &&
1168 			    (error = rnh->rnh_walktree(rnh,
1169 						       sysctl_dumpentry, &w)))
1170 				break;
1171 		break;
1172 
1173 	case NET_RT_IFLIST:
1174 		error = sysctl_iflist(af, &w);
1175 	}
1176 	crit_exit();
1177 	if (w.w_tmem != NULL)
1178 		free(w.w_tmem, M_RTABLE);
1179 	return (error);
1180 }
1181 
1182 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
1183 
1184 /*
1185  * Definitions of protocols supported in the ROUTE domain.
1186  */
1187 
1188 extern struct domain routedomain;		/* or at least forward */
1189 
1190 static struct protosw routesw[] = {
1191 { SOCK_RAW,	&routedomain,	0,		PR_ATOMIC|PR_ADDR,
1192   0,		route_output,	raw_ctlinput,	0,
1193   cpu0_soport,
1194   raw_init,	0,		0,		0,
1195   &route_usrreqs
1196 }
1197 };
1198 
1199 static struct domain routedomain = {
1200 	PF_ROUTE, "route", NULL, NULL, NULL,
1201 	routesw, &routesw[(sizeof routesw)/(sizeof routesw[0])],
1202 };
1203 
1204 DOMAIN_SET(route);
1205