xref: /dragonfly/sys/net/rtsock.c (revision 9317c2d0)
1 /*
2  * Copyright (c) 2004, 2005 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Jeffrey M. Hsu.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1988, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
62  * $FreeBSD: src/sys/net/rtsock.c,v 1.44.2.11 2002/12/04 14:05:41 ru Exp $
63  */
64 
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/kernel.h>
68 #include <sys/sysctl.h>
69 #include <sys/proc.h>
70 #include <sys/priv.h>
71 #include <sys/malloc.h>
72 #include <sys/mbuf.h>
73 #include <sys/protosw.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/domain.h>
77 #include <sys/jail.h>
78 
79 #include <sys/thread2.h>
80 #include <sys/socketvar2.h>
81 
82 #include <net/if.h>
83 #include <net/if_var.h>
84 #include <net/route.h>
85 #include <net/raw_cb.h>
86 #include <net/netmsg2.h>
87 #include <net/netisr2.h>
88 
89 /* sa_family is after sa_len, rest is data */
90 #define	_SA_MINSIZE	(offsetof(struct sockaddr, sa_family) + \
91 			 sizeof(((struct sockaddr *)0)->sa_family))
92 
93 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
94 
95 static struct route_cb {
96 	int	ip_count;
97 	int	ip6_count;
98 	int	ns_count;
99 	int	any_count;
100 } route_cb;
101 
102 static const struct sockaddr route_src = { 2, PF_ROUTE, };
103 
104 struct walkarg {
105 	int	w_tmemsize;
106 	int	w_op, w_arg;
107 	void	*w_tmem;
108 	struct sysctl_req *w_req;
109 };
110 
111 #ifndef RTTABLE_DUMP_MSGCNT_MAX
112 /* Should be large enough for dupkeys */
113 #define RTTABLE_DUMP_MSGCNT_MAX		64
114 #endif
115 
116 struct rttable_walkarg {
117 	int	w_op;
118 	int	w_arg;
119 	int	w_bufsz;
120 	void	*w_buf;
121 
122 	int	w_buflen;
123 
124 	const char *w_key;
125 	const char *w_mask;
126 
127 	struct sockaddr_storage w_key0;
128 	struct sockaddr_storage w_mask0;
129 };
130 
131 struct netmsg_rttable_walk {
132 	struct netmsg_base	base;
133 	int			af;
134 	struct rttable_walkarg	*w;
135 };
136 
137 struct routecb {
138 	struct rawcb	rocb_rcb;
139 	unsigned int	rocb_msgfilter;
140 	char		*rocb_missfilter;
141 	size_t		rocb_missfilterlen;
142 };
143 #define	sotoroutecb(so)	((struct routecb *)(so)->so_pcb)
144 
145 static struct mbuf *
146 		rt_msg_mbuf (int, struct rt_addrinfo *);
147 static void	rt_msg_buffer (int, struct rt_addrinfo *, void *buf, int len);
148 static int	rt_msgsize(int type, const struct rt_addrinfo *rtinfo);
149 static int	rt_xaddrs (char *, char *, struct rt_addrinfo *);
150 static int	sysctl_rttable(int af, struct sysctl_req *req, int op, int arg);
151 static int	if_addrflags(const struct ifaddr *ifa);
152 static int	sysctl_iflist (int af, struct walkarg *w);
153 static int	route_output(struct mbuf *, struct socket *, ...);
154 static void	rt_setmetrics (u_long, struct rt_metrics *,
155 			       struct rt_metrics *);
156 
157 /*
158  * It really doesn't make any sense at all for this code to share much
159  * with raw_usrreq.c, since its functionality is so restricted.  XXX
160  */
161 static void
162 rts_abort(netmsg_t msg)
163 {
164 	crit_enter();
165 	raw_usrreqs.pru_abort(msg);
166 	/* msg invalid now */
167 	crit_exit();
168 }
169 
170 static int
171 rts_filter(struct mbuf *m, const struct sockproto *proto,
172 	const struct rawcb *rp)
173 {
174 	const struct routecb *rop = (const struct routecb *)rp;
175 	const struct rt_msghdr *rtm;
176 
177 	KKASSERT(m != NULL);
178 	KKASSERT(proto != NULL);
179 	KKASSERT(rp != NULL);
180 
181 	/* Wrong family for this socket. */
182 	if (proto->sp_family != PF_ROUTE)
183 		return ENOPROTOOPT;
184 
185 	/* If no filter set, just return. */
186 	if (rop->rocb_msgfilter == 0 && rop->rocb_missfilterlen == 0)
187 		return 0;
188 
189 	/* Ensure we can access rtm_type */
190 	if (m->m_len <
191 	    offsetof(struct rt_msghdr, rtm_type) + sizeof(rtm->rtm_type))
192 		return EINVAL;
193 
194 	rtm = mtod(m, const struct rt_msghdr *);
195 	/* If the rtm type is filtered out, return a positive. */
196 	if (rop->rocb_msgfilter != 0 &&
197 	    !(rop->rocb_msgfilter & ROUTE_FILTER(rtm->rtm_type)))
198 		return EEXIST;
199 
200 	if (rop->rocb_missfilterlen != 0 && rtm->rtm_type == RTM_MISS) {
201 		CTASSERT(RTAX_DST == 0);
202 		struct sockaddr *sa;
203 		struct sockaddr_storage ss;
204 		struct sockaddr *dst = (struct sockaddr *)&ss;
205 		char *cp = rop->rocb_missfilter;
206 		char *ep = cp + rop->rocb_missfilterlen;
207 
208 		/* Ensure we can access sa_len */
209 		if (m->m_pkthdr.len < sizeof(*rtm) + _SA_MINSIZE)
210 			return EINVAL;
211 		m_copydata(m, sizeof(*rtm) + offsetof(struct sockaddr, sa_len),
212 		    sizeof(ss.ss_len), (caddr_t)&ss);
213 		if (ss.ss_len < _SA_MINSIZE ||
214 		    ss.ss_len > sizeof(ss) ||
215 		    m->m_pkthdr.len < sizeof(*rtm) + ss.ss_len)
216 			return EINVAL;
217 		/* Copy out the destination sockaddr */
218 		m_copydata(m, sizeof(*rtm), ss.ss_len, (caddr_t)&ss);
219 
220 		/* Find a matching sockaddr in the filter */
221 		while (cp < ep) {
222 			sa = (struct sockaddr *)cp;
223 			if (sa->sa_len == dst->sa_len &&
224 			    memcmp(sa, dst, sa->sa_len) == 0)
225 				break;
226 			cp += RT_ROUNDUP(sa->sa_len);
227 		}
228 		if (cp == ep)
229 			return EEXIST;
230 	}
231 
232 	/* Passed the filter. */
233 	return 0;
234 }
235 
236 
237 /* pru_accept is EOPNOTSUPP */
238 
239 static void
240 rts_attach(netmsg_t msg)
241 {
242 	struct socket *so = msg->base.nm_so;
243 	struct pru_attach_info *ai = msg->attach.nm_ai;
244 	struct rawcb *rp;
245 	struct routecb *rop;
246 	int proto = msg->attach.nm_proto;
247 	int error;
248 
249 	crit_enter();
250 	if (sotorawcb(so) != NULL) {
251 		error = EISCONN;
252 		goto done;
253 	}
254 
255 	rop = kmalloc(sizeof *rop, M_PCB, M_WAITOK | M_ZERO);
256 	rp = &rop->rocb_rcb;
257 
258 	/*
259 	 * The critical section is necessary to block protocols from sending
260 	 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
261 	 * this PCB is extant but incompletely initialized.
262 	 * Probably we should try to do more of this work beforehand and
263 	 * eliminate the critical section.
264 	 */
265 	so->so_pcb = rp;
266 	soreference(so);	/* so_pcb assignment */
267 	error = raw_attach(so, proto, ai->sb_rlimit);
268 	rp = sotorawcb(so);
269 	if (error) {
270 		kfree(rop, M_PCB);
271 		goto done;
272 	}
273 	switch(rp->rcb_proto.sp_protocol) {
274 	case AF_INET:
275 		route_cb.ip_count++;
276 		break;
277 	case AF_INET6:
278 		route_cb.ip6_count++;
279 		break;
280 	}
281 	rp->rcb_faddr = &route_src;
282 	rp->rcb_filter = rts_filter;
283 	route_cb.any_count++;
284 	soisconnected(so);
285 	so->so_options |= SO_USELOOPBACK;
286 	error = 0;
287 done:
288 	crit_exit();
289 	lwkt_replymsg(&msg->lmsg, error);
290 }
291 
292 static void
293 rts_bind(netmsg_t msg)
294 {
295 	crit_enter();
296 	raw_usrreqs.pru_bind(msg); /* xxx just EINVAL */
297 	/* msg invalid now */
298 	crit_exit();
299 }
300 
301 static void
302 rts_connect(netmsg_t msg)
303 {
304 	crit_enter();
305 	raw_usrreqs.pru_connect(msg); /* XXX just EINVAL */
306 	/* msg invalid now */
307 	crit_exit();
308 }
309 
310 /* pru_connect2 is EOPNOTSUPP */
311 /* pru_control is EOPNOTSUPP */
312 
313 static void
314 rts_detach(netmsg_t msg)
315 {
316 	struct socket *so = msg->base.nm_so;
317 	struct rawcb *rp = sotorawcb(so);
318 	struct routecb *rop = (struct routecb *)rp;
319 
320 	crit_enter();
321 	if (rop->rocb_missfilterlen != 0)
322 		kfree(rop->rocb_missfilter, M_PCB);
323 	if (rp != NULL) {
324 		switch(rp->rcb_proto.sp_protocol) {
325 		case AF_INET:
326 			route_cb.ip_count--;
327 			break;
328 		case AF_INET6:
329 			route_cb.ip6_count--;
330 			break;
331 		}
332 		route_cb.any_count--;
333 	}
334 	raw_usrreqs.pru_detach(msg);
335 	/* msg invalid now */
336 	crit_exit();
337 }
338 
339 static void
340 rts_disconnect(netmsg_t msg)
341 {
342 	crit_enter();
343 	raw_usrreqs.pru_disconnect(msg);
344 	/* msg invalid now */
345 	crit_exit();
346 }
347 
348 /* pru_listen is EOPNOTSUPP */
349 
350 static void
351 rts_peeraddr(netmsg_t msg)
352 {
353 	crit_enter();
354 	raw_usrreqs.pru_peeraddr(msg);
355 	/* msg invalid now */
356 	crit_exit();
357 }
358 
359 /* pru_rcvd is EOPNOTSUPP */
360 /* pru_rcvoob is EOPNOTSUPP */
361 
362 static void
363 rts_send(netmsg_t msg)
364 {
365 	crit_enter();
366 	raw_usrreqs.pru_send(msg);
367 	/* msg invalid now */
368 	crit_exit();
369 }
370 
371 /* pru_sense is null */
372 
373 static void
374 rts_shutdown(netmsg_t msg)
375 {
376 	crit_enter();
377 	raw_usrreqs.pru_shutdown(msg);
378 	/* msg invalid now */
379 	crit_exit();
380 }
381 
382 static void
383 rts_sockaddr(netmsg_t msg)
384 {
385 	crit_enter();
386 	raw_usrreqs.pru_sockaddr(msg);
387 	/* msg invalid now */
388 	crit_exit();
389 }
390 
391 static struct pr_usrreqs route_usrreqs = {
392 	.pru_abort = rts_abort,
393 	.pru_accept = pr_generic_notsupp,
394 	.pru_attach = rts_attach,
395 	.pru_bind = rts_bind,
396 	.pru_connect = rts_connect,
397 	.pru_connect2 = pr_generic_notsupp,
398 	.pru_control = pr_generic_notsupp,
399 	.pru_detach = rts_detach,
400 	.pru_disconnect = rts_disconnect,
401 	.pru_listen = pr_generic_notsupp,
402 	.pru_peeraddr = rts_peeraddr,
403 	.pru_rcvd = pr_generic_notsupp,
404 	.pru_rcvoob = pr_generic_notsupp,
405 	.pru_send = rts_send,
406 	.pru_sense = pru_sense_null,
407 	.pru_shutdown = rts_shutdown,
408 	.pru_sockaddr = rts_sockaddr,
409 	.pru_sosend = sosend,
410 	.pru_soreceive = soreceive
411 };
412 
413 static __inline sa_family_t
414 familyof(struct sockaddr *sa)
415 {
416 	return (sa != NULL ? sa->sa_family : 0);
417 }
418 
419 /*
420  * Routing socket input function.  The packet must be serialized onto cpu 0.
421  * We use the cpu0_soport() netisr processing loop to handle it.
422  *
423  * This looks messy but it means that anyone, including interrupt code,
424  * can send a message to the routing socket.
425  */
426 static void
427 rts_input_handler(netmsg_t msg)
428 {
429 	static const struct sockaddr route_dst = { 2, PF_ROUTE, };
430 	struct sockproto route_proto;
431 	struct netmsg_packet *pmsg = &msg->packet;
432 	struct mbuf *m;
433 	sa_family_t family;
434 	struct rawcb *skip;
435 
436 	family = pmsg->base.lmsg.u.ms_result;
437 	route_proto.sp_family = PF_ROUTE;
438 	route_proto.sp_protocol = family;
439 
440 	m = pmsg->nm_packet;
441 	M_ASSERTPKTHDR(m);
442 
443 	skip = m->m_pkthdr.header;
444 	m->m_pkthdr.header = NULL;
445 
446 	raw_input(m, &route_proto, &route_src, &route_dst, skip);
447 }
448 
449 static void
450 rts_input_skip(struct mbuf *m, sa_family_t family, struct rawcb *skip)
451 {
452 	struct netmsg_packet *pmsg;
453 	lwkt_port_t port;
454 
455 	M_ASSERTPKTHDR(m);
456 
457 	port = netisr_cpuport(0);	/* XXX same as for routing socket */
458 	pmsg = &m->m_hdr.mh_netmsg;
459 	netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport,
460 		    0, rts_input_handler);
461 	pmsg->nm_packet = m;
462 	pmsg->base.lmsg.u.ms_result = family;
463 	m->m_pkthdr.header = skip; /* XXX steal field in pkthdr */
464 	lwkt_sendmsg(port, &pmsg->base.lmsg);
465 }
466 
467 static __inline void
468 rts_input(struct mbuf *m, sa_family_t family)
469 {
470 	rts_input_skip(m, family, NULL);
471 }
472 
473 static void
474 route_ctloutput(netmsg_t msg)
475 {
476 	struct socket *so = msg->ctloutput.base.nm_so;
477 	struct sockopt *sopt = msg->ctloutput.nm_sopt;
478 	struct routecb *rop = sotoroutecb(so);
479 	int error;
480 	unsigned int msgfilter;
481 	unsigned char *cp, *ep;
482 	size_t len;
483 	struct sockaddr *sa;
484 
485 	if (sopt->sopt_level != AF_ROUTE) {
486 		error = EINVAL;
487 		goto out;
488 	}
489 
490 	error = 0;
491 
492 	switch (sopt->sopt_dir) {
493 	case SOPT_SET:
494 		switch (sopt->sopt_name) {
495 		case ROUTE_MSGFILTER:
496 			error = soopt_to_kbuf(sopt, &msgfilter,
497 			    sizeof(msgfilter), sizeof(msgfilter));
498 			if (error == 0)
499 				rop->rocb_msgfilter = msgfilter;
500 			break;
501 		case RO_MISSFILTER:
502 			/* Validate the data */
503 			len = 0;
504 			cp = sopt->sopt_val;
505 			ep = cp + sopt->sopt_valsize;
506 			while (cp < ep) {
507 				if (ep - cp <
508 				    offsetof(struct sockaddr, sa_len) +
509 				    sizeof(sa->sa_len))
510 					break;
511 				if (++len > RO_FILTSA_MAX) {
512 					error = ENOBUFS;
513 					break;
514 				}
515 				sa = (struct sockaddr *)cp;
516 				if (sa->sa_len < _SA_MINSIZE ||
517 				    sa->sa_len > sizeof(struct sockaddr_storage))
518 					break;
519 				cp += RT_ROUNDUP(sa->sa_len);
520 			}
521 			if (cp != ep) {
522 				if (error == 0)
523 					error = EINVAL;
524 				break;
525 			}
526 			if (rop->rocb_missfilterlen != 0)
527 				kfree(rop->rocb_missfilter, M_PCB);
528 			if (sopt->sopt_valsize != 0) {
529 				rop->rocb_missfilter =
530 				    kmalloc(sopt->sopt_valsize,
531 				            M_PCB, M_WAITOK | M_NULLOK);
532 				if (rop->rocb_missfilter == NULL) {
533 					rop->rocb_missfilterlen = 0;
534 					error = ENOBUFS;
535 					break;
536 				}
537 			} else
538 				rop->rocb_missfilter = NULL;
539 			rop->rocb_missfilterlen = sopt->sopt_valsize;
540 			if (rop->rocb_missfilterlen != 0)
541 				memcpy(rop->rocb_missfilter, sopt->sopt_val,
542 				    rop->rocb_missfilterlen);
543 			break;
544 		default:
545 			error = ENOPROTOOPT;
546 			break;
547 		}
548 		break;
549 	case SOPT_GET:
550 		switch (sopt->sopt_name) {
551 		case ROUTE_MSGFILTER:
552 			msgfilter = rop->rocb_msgfilter;
553 			soopt_from_kbuf(sopt, &msgfilter, sizeof(msgfilter));
554 			break;
555 		case RO_MISSFILTER:
556 			soopt_from_kbuf(sopt, rop->rocb_missfilter,
557 			    rop->rocb_missfilterlen);
558 			break;
559 		default:
560 			error = ENOPROTOOPT;
561 			break;
562 		}
563 	}
564 out:
565 	lwkt_replymsg(&msg->ctloutput.base.lmsg, error);
566 }
567 
568 
569 
570 static void *
571 reallocbuf_nofree(void *ptr, size_t len, size_t olen)
572 {
573 	void *newptr;
574 
575 	newptr = kmalloc(len, M_RTABLE, M_INTWAIT | M_NULLOK);
576 	if (newptr == NULL)
577 		return NULL;
578 	bcopy(ptr, newptr, olen);
579 	if (olen < len)
580 		bzero((char *)newptr + olen, len - olen);
581 
582 	return (newptr);
583 }
584 
585 /*
586  * Internal helper routine for route_output().
587  */
588 static int
589 _fillrtmsg(struct rt_msghdr **prtm, struct rtentry *rt,
590 	   struct rt_addrinfo *rtinfo)
591 {
592 	int msglen;
593 	struct rt_msghdr *rtm = *prtm;
594 
595 	/* Fill in rt_addrinfo for call to rt_msg_buffer(). */
596 	rtinfo->rti_dst = rt_key(rt);
597 	rtinfo->rti_gateway = rt->rt_gateway;
598 	rtinfo->rti_netmask = rt_mask(rt);		/* might be NULL */
599 	rtinfo->rti_genmask = rt->rt_genmask;		/* might be NULL */
600 	if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
601 		if (rt->rt_ifp != NULL) {
602 			rtinfo->rti_ifpaddr =
603 			    TAILQ_FIRST(&rt->rt_ifp->if_addrheads[mycpuid])
604 			    ->ifa->ifa_addr;
605 			rtinfo->rti_ifaaddr = rt->rt_ifa->ifa_addr;
606 			if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
607 				rtinfo->rti_bcastaddr = rt->rt_ifa->ifa_dstaddr;
608 			rtm->rtm_index = rt->rt_ifp->if_index;
609 		} else {
610 			rtinfo->rti_ifpaddr = NULL;
611 			rtinfo->rti_ifaaddr = NULL;
612 		}
613 	} else if (rt->rt_ifp != NULL) {
614 		rtm->rtm_index = rt->rt_ifp->if_index;
615 	}
616 
617 	msglen = rt_msgsize(rtm->rtm_type, rtinfo);
618 	if (rtm->rtm_msglen < msglen) {
619 		/* NOTE: Caller will free the old rtm accordingly */
620 		rtm = reallocbuf_nofree(rtm, msglen, rtm->rtm_msglen);
621 		if (rtm == NULL)
622 			return (ENOBUFS);
623 		*prtm = rtm;
624 	}
625 	rt_msg_buffer(rtm->rtm_type, rtinfo, rtm, msglen);
626 
627 	rtm->rtm_flags = rt->rt_flags;
628 	rtm->rtm_rmx = rt->rt_rmx;
629 	rtm->rtm_addrs = rtinfo->rti_addrs;
630 
631 	return (0);
632 }
633 
634 struct rtm_arg {
635 	struct rt_msghdr	*bak_rtm;
636 	struct rt_msghdr	*new_rtm;
637 };
638 
639 static int
640 fillrtmsg(struct rtm_arg *arg, struct rtentry *rt,
641 	  struct rt_addrinfo *rtinfo)
642 {
643 	struct rt_msghdr *rtm = arg->new_rtm;
644 	int error;
645 
646 	error = _fillrtmsg(&rtm, rt, rtinfo);
647 	if (!error) {
648 		if (arg->new_rtm != rtm) {
649 			/*
650 			 * _fillrtmsg() just allocated a new rtm;
651 			 * if the previously allocated rtm is not
652 			 * the backing rtm, it should be freed.
653 			 */
654 			if (arg->new_rtm != arg->bak_rtm)
655 				kfree(arg->new_rtm, M_RTABLE);
656 			arg->new_rtm = rtm;
657 		}
658 	}
659 	return error;
660 }
661 
662 static void route_output_add_callback(int, int, struct rt_addrinfo *,
663 					struct rtentry *, void *);
664 static void route_output_delete_callback(int, int, struct rt_addrinfo *,
665 					struct rtentry *, void *);
666 static int route_output_get_callback(int, struct rt_addrinfo *,
667 				     struct rtentry *, void *, int);
668 static int route_output_change_callback(int, struct rt_addrinfo *,
669 					struct rtentry *, void *, int);
670 static int route_output_lock_callback(int, struct rt_addrinfo *,
671 				      struct rtentry *, void *, int);
672 
673 /*ARGSUSED*/
674 static int
675 route_output(struct mbuf *m, struct socket *so, ...)
676 {
677 	struct rtm_arg arg;
678 	struct rt_msghdr *rtm = NULL;
679 	struct rawcb *rp = NULL;
680 	struct pr_output_info *oi;
681 	struct rt_addrinfo rtinfo;
682 	sa_family_t family;
683 	int len, error = 0;
684 	__va_list ap;
685 
686 	M_ASSERTPKTHDR(m);
687 
688 	__va_start(ap, so);
689 	oi = __va_arg(ap, struct pr_output_info *);
690 	__va_end(ap);
691 
692 	family = familyof(NULL);
693 
694 #define gotoerr(e) { error = e; goto flush;}
695 
696 	if (m == NULL ||
697 	    (m->m_len < sizeof(long) &&
698 	     (m = m_pullup(m, sizeof(long))) == NULL))
699 		return (ENOBUFS);
700 	len = m->m_pkthdr.len;
701 	if (len < sizeof(struct rt_msghdr) ||
702 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen)
703 		gotoerr(EINVAL);
704 
705 	rtm = kmalloc(len, M_RTABLE, M_INTWAIT | M_NULLOK);
706 	if (rtm == NULL)
707 		gotoerr(ENOBUFS);
708 
709 	m_copydata(m, 0, len, (caddr_t)rtm);
710 	if (rtm->rtm_version != RTM_VERSION)
711 		gotoerr(EPROTONOSUPPORT);
712 
713 	rtm->rtm_pid = oi->p_pid;
714 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
715 	rtinfo.rti_addrs = rtm->rtm_addrs;
716 	if (rt_xaddrs((char *)(rtm + 1), (char *)rtm + len, &rtinfo) != 0)
717 		gotoerr(EINVAL);
718 
719 	rtinfo.rti_flags = rtm->rtm_flags;
720 	if (rtinfo.rti_dst == NULL || rtinfo.rti_dst->sa_family >= AF_MAX ||
721 	    (rtinfo.rti_gateway && rtinfo.rti_gateway->sa_family >= AF_MAX))
722 		gotoerr(EINVAL);
723 
724 	family = familyof(rtinfo.rti_dst);
725 
726 	/*
727 	 * Verify that the caller has the appropriate privilege; RTM_GET
728 	 * is the only operation the non-superuser is allowed.
729 	 */
730 	if (rtm->rtm_type != RTM_GET &&
731 	    priv_check_cred(so->so_cred, PRIV_ROOT, 0) != 0)
732 		gotoerr(EPERM);
733 
734 	if (rtinfo.rti_genmask != NULL) {
735 		error = rtmask_add_global(rtinfo.rti_genmask,
736 		    rtm->rtm_type != RTM_GET ?
737 		    RTREQ_PRIO_HIGH : RTREQ_PRIO_NORM);
738 		if (error)
739 			goto flush;
740 	}
741 
742 	switch (rtm->rtm_type) {
743 	case RTM_ADD:
744 		if (rtinfo.rti_gateway == NULL) {
745 			error = EINVAL;
746 		} else {
747 			error = rtrequest1_global(RTM_ADD, &rtinfo,
748 			    route_output_add_callback, rtm, RTREQ_PRIO_HIGH);
749 		}
750 		break;
751 	case RTM_DELETE:
752 		/*
753 		 * Backing rtm (bak_rtm) could _not_ be freed during
754 		 * rtrequest1_global or rtsearch_global, even if the
755 		 * callback reallocates the rtm due to its size changes,
756 		 * since rtinfo points to the backing rtm's memory area.
757 		 * After rtrequest1_global or rtsearch_global returns,
758 		 * it is safe to free the backing rtm, since rtinfo will
759 		 * not be used anymore.
760 		 *
761 		 * new_rtm will be used to save the new rtm allocated
762 		 * by rtrequest1_global or rtsearch_global.
763 		 */
764 		arg.bak_rtm = rtm;
765 		arg.new_rtm = rtm;
766 		error = rtrequest1_global(RTM_DELETE, &rtinfo,
767 		    route_output_delete_callback, &arg, RTREQ_PRIO_HIGH);
768 		rtm = arg.new_rtm;
769 		if (rtm != arg.bak_rtm)
770 			kfree(arg.bak_rtm, M_RTABLE);
771 		break;
772 	case RTM_GET:
773 		/* See the comment in RTM_DELETE */
774 		arg.bak_rtm = rtm;
775 		arg.new_rtm = rtm;
776 		error = rtsearch_global(RTM_GET, &rtinfo,
777 		    route_output_get_callback, &arg, RTS_NOEXACTMATCH,
778 		    RTREQ_PRIO_NORM);
779 		rtm = arg.new_rtm;
780 		if (rtm != arg.bak_rtm)
781 			kfree(arg.bak_rtm, M_RTABLE);
782 		break;
783 	case RTM_CHANGE:
784 		error = rtsearch_global(RTM_CHANGE, &rtinfo,
785 		    route_output_change_callback, rtm, RTS_EXACTMATCH,
786 		    RTREQ_PRIO_HIGH);
787 		break;
788 	case RTM_LOCK:
789 		error = rtsearch_global(RTM_LOCK, &rtinfo,
790 		    route_output_lock_callback, rtm, RTS_EXACTMATCH,
791 		    RTREQ_PRIO_HIGH);
792 		break;
793 	default:
794 		error = EOPNOTSUPP;
795 		break;
796 	}
797 flush:
798 	if (rtm != NULL) {
799 		if (error != 0)
800 			rtm->rtm_errno = error;
801 		else
802 			rtm->rtm_flags |= RTF_DONE;
803 	}
804 
805 	/*
806 	 * Check to see if we don't want our own messages.
807 	 */
808 	if (!(so->so_options & SO_USELOOPBACK)) {
809 		if (route_cb.any_count <= 1) {
810 			if (rtm != NULL)
811 				kfree(rtm, M_RTABLE);
812 			m_freem(m);
813 			return (error);
814 		}
815 		/* There is another listener, so construct message */
816 		rp = sotorawcb(so);
817 	}
818 	if (rtm != NULL) {
819 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
820 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
821 			m_freem(m);
822 			m = NULL;
823 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
824 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
825 		kfree(rtm, M_RTABLE);
826 	}
827 	if (m != NULL)
828 		rts_input_skip(m, family, rp);
829 	return (error);
830 }
831 
832 static void
833 route_output_add_callback(int cmd, int error, struct rt_addrinfo *rtinfo,
834 			  struct rtentry *rt, void *arg)
835 {
836 	struct rt_msghdr *rtm = arg;
837 
838 	if (error == 0 && rt != NULL) {
839 		rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
840 		    &rt->rt_rmx);
841 		rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
842 		rt->rt_rmx.rmx_locks |=
843 		    (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
844 		if (rtinfo->rti_genmask != NULL) {
845 			rt->rt_genmask = rtmask_purelookup(rtinfo->rti_genmask);
846 			if (rt->rt_genmask == NULL) {
847 				/*
848 				 * This should not happen, since we
849 				 * have already installed genmask
850 				 * on each CPU before we reach here.
851 				 */
852 				panic("genmask is gone!?");
853 			}
854 		} else {
855 			rt->rt_genmask = NULL;
856 		}
857 		rtm->rtm_index = rt->rt_ifp->if_index;
858 	}
859 }
860 
861 static void
862 route_output_delete_callback(int cmd, int error, struct rt_addrinfo *rtinfo,
863 			  struct rtentry *rt, void *arg)
864 {
865 	if (error == 0 && rt) {
866 		++rt->rt_refcnt;
867 		if (fillrtmsg(arg, rt, rtinfo) != 0) {
868 			error = ENOBUFS;
869 			/* XXX no way to return the error */
870 		}
871 		--rt->rt_refcnt;
872 	}
873 	if (rt && rt->rt_refcnt == 0) {
874 		++rt->rt_refcnt;
875 		rtfree(rt);
876 	}
877 }
878 
879 static int
880 route_output_get_callback(int cmd, struct rt_addrinfo *rtinfo,
881 			  struct rtentry *rt, void *arg, int found_cnt)
882 {
883 	int error, found = 0;
884 
885 	if (((rtinfo->rti_flags ^ rt->rt_flags) & RTF_HOST) == 0)
886 		found = 1;
887 
888 	error = fillrtmsg(arg, rt, rtinfo);
889 	if (!error && found) {
890 		/* Got the exact match, we could return now! */
891 		error = EJUSTRETURN;
892 	}
893 	return error;
894 }
895 
896 static int
897 route_output_change_callback(int cmd, struct rt_addrinfo *rtinfo,
898 			     struct rtentry *rt, void *arg, int found_cnt)
899 {
900 	struct rt_msghdr *rtm = arg;
901 	struct ifaddr *ifa;
902 	int error = 0;
903 
904 	/*
905 	 * new gateway could require new ifaddr, ifp;
906 	 * flags may also be different; ifp may be specified
907 	 * by ll sockaddr when protocol address is ambiguous
908 	 */
909 	if (((rt->rt_flags & RTF_GATEWAY) && rtinfo->rti_gateway != NULL) ||
910 	    rtinfo->rti_ifpaddr != NULL ||
911 	    (rtinfo->rti_ifaaddr != NULL &&
912 	     !sa_equal(rtinfo->rti_ifaaddr, rt->rt_ifa->ifa_addr))) {
913 		error = rt_getifa(rtinfo);
914 		if (error != 0)
915 			goto done;
916 	}
917 	if (rtinfo->rti_gateway != NULL) {
918 		/*
919 		 * We only need to generate rtmsg upon the
920 		 * first route to be changed.
921 		 */
922 		error = rt_setgate(rt, rt_key(rt), rtinfo->rti_gateway);
923 		if (error != 0)
924 			goto done;
925 	}
926 	if ((ifa = rtinfo->rti_ifa) != NULL) {
927 		struct ifaddr *oifa = rt->rt_ifa;
928 
929 		if (oifa != ifa) {
930 			if (oifa && oifa->ifa_rtrequest)
931 				oifa->ifa_rtrequest(RTM_DELETE, rt);
932 			IFAFREE(rt->rt_ifa);
933 			IFAREF(ifa);
934 			rt->rt_ifa = ifa;
935 			rt->rt_ifp = rtinfo->rti_ifp;
936 		}
937 	}
938 	rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx);
939 	if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
940 		rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt);
941 	if (rtinfo->rti_genmask != NULL) {
942 		rt->rt_genmask = rtmask_purelookup(rtinfo->rti_genmask);
943 		if (rt->rt_genmask == NULL) {
944 			/*
945 			 * This should not happen, since we
946 			 * have already installed genmask
947 			 * on each CPU before we reach here.
948 			 */
949 			panic("genmask is gone!?");
950 		}
951 	}
952 	rtm->rtm_index = rt->rt_ifp->if_index;
953 	if (found_cnt == 1)
954 		rt_rtmsg(RTM_CHANGE, rt, rt->rt_ifp, 0);
955 done:
956 	return error;
957 }
958 
959 static int
960 route_output_lock_callback(int cmd, struct rt_addrinfo *rtinfo,
961 			   struct rtentry *rt, void *arg,
962 			   int found_cnt __unused)
963 {
964 	struct rt_msghdr *rtm = arg;
965 
966 	rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
967 	rt->rt_rmx.rmx_locks |=
968 		(rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
969 	return 0;
970 }
971 
972 static void
973 rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics *out)
974 {
975 #define setmetric(flag, elt) if (which & (flag)) out->elt = in->elt;
976 	setmetric(RTV_RPIPE, rmx_recvpipe);
977 	setmetric(RTV_SPIPE, rmx_sendpipe);
978 	setmetric(RTV_SSTHRESH, rmx_ssthresh);
979 	setmetric(RTV_RTT, rmx_rtt);
980 	setmetric(RTV_RTTVAR, rmx_rttvar);
981 	setmetric(RTV_HOPCOUNT, rmx_hopcount);
982 	setmetric(RTV_MTU, rmx_mtu);
983 	setmetric(RTV_EXPIRE, rmx_expire);
984 	setmetric(RTV_MSL, rmx_msl);
985 	setmetric(RTV_IWMAXSEGS, rmx_iwmaxsegs);
986 	setmetric(RTV_IWCAPSEGS, rmx_iwcapsegs);
987 #undef setmetric
988 }
989 
990 /*
991  * Extract the addresses of the passed sockaddrs.
992  * Do a little sanity checking so as to avoid bad memory references.
993  * This data is derived straight from userland.
994  */
995 static int
996 rt_xaddrs(char *cp, char *cplim, struct rt_addrinfo *rtinfo)
997 {
998 	struct sockaddr *sa;
999 	int i;
1000 
1001 	for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
1002 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
1003 			continue;
1004 		sa = (struct sockaddr *)cp;
1005 		/*
1006 		 * It won't fit.
1007 		 */
1008 		if ((cp + sa->sa_len) > cplim) {
1009 			return (EINVAL);
1010 		}
1011 
1012 		/*
1013 		 * There are no more...  Quit now.
1014 		 * If there are more bits, they are in error.
1015 		 * I've seen this.  route(1) can evidently generate these.
1016 		 * This causes kernel to core dump.
1017 		 * For compatibility, if we see this, point to a safe address.
1018 		 */
1019 		if (sa->sa_len == 0) {
1020 			static struct sockaddr sa_zero = {
1021 				sizeof sa_zero, AF_INET,
1022 			};
1023 
1024 			rtinfo->rti_info[i] = &sa_zero;
1025 			kprintf("rtsock: received more addr bits than sockaddrs.\n");
1026 			return (0); /* should be EINVAL but for compat */
1027 		}
1028 
1029 		/* Accept the sockaddr. */
1030 		rtinfo->rti_info[i] = sa;
1031 		cp += RT_ROUNDUP(sa->sa_len);
1032 	}
1033 	return (0);
1034 }
1035 
1036 static int
1037 rt_msghdrsize(int type)
1038 {
1039 	switch (type) {
1040 	case RTM_DELADDR:
1041 	case RTM_NEWADDR:
1042 		return sizeof(struct ifa_msghdr);
1043 	case RTM_DELMADDR:
1044 	case RTM_NEWMADDR:
1045 		return sizeof(struct ifma_msghdr);
1046 	case RTM_IFINFO:
1047 		return sizeof(struct if_msghdr);
1048 	case RTM_IFANNOUNCE:
1049 	case RTM_IEEE80211:
1050 		return sizeof(struct if_announcemsghdr);
1051 	default:
1052 		return sizeof(struct rt_msghdr);
1053 	}
1054 }
1055 
1056 static int
1057 rt_msgsize(int type, const struct rt_addrinfo *rtinfo)
1058 {
1059 	int len, i;
1060 
1061 	len = rt_msghdrsize(type);
1062 	for (i = 0; i < RTAX_MAX; i++) {
1063 		if (rtinfo->rti_info[i] != NULL)
1064 			len += RT_ROUNDUP(rtinfo->rti_info[i]->sa_len);
1065 	}
1066 	len = ALIGN(len);
1067 	return len;
1068 }
1069 
1070 /*
1071  * Build a routing message in a buffer.
1072  * Copy the addresses in the rtinfo->rti_info[] sockaddr array
1073  * to the end of the buffer after the message header.
1074  *
1075  * Set the rtinfo->rti_addrs bitmask of addresses present in rtinfo->rti_info[].
1076  * This side-effect can be avoided if we reorder the addrs bitmask field in all
1077  * the route messages to line up so we can set it here instead of back in the
1078  * calling routine.
1079  *
1080  * NOTE! The buffer may already contain a partially filled-out rtm via
1081  *	 _fillrtmsg().
1082  */
1083 static void
1084 rt_msg_buffer(int type, struct rt_addrinfo *rtinfo, void *buf, int msglen)
1085 {
1086 	struct rt_msghdr *rtm;
1087 	char *cp;
1088 	int dlen, i;
1089 
1090 	rtm = (struct rt_msghdr *) buf;
1091 	rtm->rtm_version = RTM_VERSION;
1092 	rtm->rtm_type = type;
1093 	rtm->rtm_msglen = msglen;
1094 
1095 	cp = (char *)buf + rt_msghdrsize(type);
1096 	rtinfo->rti_addrs = 0;
1097 	for (i = 0; i < RTAX_MAX; i++) {
1098 		struct sockaddr *sa;
1099 
1100 		if ((sa = rtinfo->rti_info[i]) == NULL)
1101 			continue;
1102 		rtinfo->rti_addrs |= (1 << i);
1103 		dlen = RT_ROUNDUP(sa->sa_len);
1104 		bcopy(sa, cp, dlen);
1105 		cp += dlen;
1106 	}
1107 }
1108 
1109 /*
1110  * Build a routing message in a mbuf chain.
1111  * Copy the addresses in the rtinfo->rti_info[] sockaddr array
1112  * to the end of the mbuf after the message header.
1113  *
1114  * Set the rtinfo->rti_addrs bitmask of addresses present in rtinfo->rti_info[].
1115  * This side-effect can be avoided if we reorder the addrs bitmask field in all
1116  * the route messages to line up so we can set it here instead of back in the
1117  * calling routine.
1118  */
1119 static struct mbuf *
1120 rt_msg_mbuf(int type, struct rt_addrinfo *rtinfo)
1121 {
1122 	struct mbuf *m;
1123 	struct rt_msghdr *rtm;
1124 	int hlen, len;
1125 	int i;
1126 
1127 	hlen = rt_msghdrsize(type);
1128 	KASSERT(hlen <= MCLBYTES, ("rt_msg_mbuf: hlen %d doesn't fit", hlen));
1129 
1130 	m = m_getl(hlen, M_NOWAIT, MT_DATA, M_PKTHDR, NULL);
1131 	if (m == NULL)
1132 		return (NULL);
1133 	mbuftrackid(m, 32);
1134 	m->m_pkthdr.len = m->m_len = hlen;
1135 	m->m_pkthdr.rcvif = NULL;
1136 	rtinfo->rti_addrs = 0;
1137 	len = hlen;
1138 	for (i = 0; i < RTAX_MAX; i++) {
1139 		struct sockaddr *sa;
1140 		int dlen;
1141 
1142 		if ((sa = rtinfo->rti_info[i]) == NULL)
1143 			continue;
1144 		rtinfo->rti_addrs |= (1 << i);
1145 		dlen = RT_ROUNDUP(sa->sa_len);
1146 		m_copyback(m, len, dlen, (caddr_t)sa); /* can grow mbuf chain */
1147 		len += dlen;
1148 	}
1149 	if (m->m_pkthdr.len != len) { /* one of the m_copyback() calls failed */
1150 		m_freem(m);
1151 		return (NULL);
1152 	}
1153 	rtm = mtod(m, struct rt_msghdr *);
1154 	bzero(rtm, hlen);
1155 	rtm->rtm_msglen = len;
1156 	rtm->rtm_version = RTM_VERSION;
1157 	rtm->rtm_type = type;
1158 	return (m);
1159 }
1160 
1161 /*
1162  * This routine is called to generate a message from the routing
1163  * socket indicating that a redirect has occurred, a routing lookup
1164  * has failed, or that a protocol has detected timeouts to a particular
1165  * destination.
1166  */
1167 void
1168 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
1169 {
1170 	struct sockaddr *dst = rtinfo->rti_info[RTAX_DST];
1171 	struct rt_msghdr *rtm;
1172 	struct mbuf *m;
1173 
1174 	if (route_cb.any_count == 0)
1175 		return;
1176 	m = rt_msg_mbuf(type, rtinfo);
1177 	if (m == NULL)
1178 		return;
1179 	rtm = mtod(m, struct rt_msghdr *);
1180 	rtm->rtm_flags = RTF_DONE | flags;
1181 	rtm->rtm_errno = error;
1182 	rtm->rtm_addrs = rtinfo->rti_addrs;
1183 	rts_input(m, familyof(dst));
1184 }
1185 
1186 void
1187 rt_dstmsg(int type, struct sockaddr *dst, int error)
1188 {
1189 	struct rt_msghdr *rtm;
1190 	struct rt_addrinfo addrs;
1191 	struct mbuf *m;
1192 
1193 	if (route_cb.any_count == 0)
1194 		return;
1195 	bzero(&addrs, sizeof(struct rt_addrinfo));
1196 	addrs.rti_info[RTAX_DST] = dst;
1197 	m = rt_msg_mbuf(type, &addrs);
1198 	if (m == NULL)
1199 		return;
1200 	rtm = mtod(m, struct rt_msghdr *);
1201 	rtm->rtm_flags = RTF_DONE;
1202 	rtm->rtm_errno = error;
1203 	rtm->rtm_addrs = addrs.rti_addrs;
1204 	rts_input(m, familyof(dst));
1205 }
1206 
1207 /*
1208  * This routine is called to generate a message from the routing
1209  * socket indicating that the status of a network interface has changed.
1210  */
1211 void
1212 rt_ifmsg(struct ifnet *ifp)
1213 {
1214 	struct if_msghdr *ifm;
1215 	struct mbuf *m;
1216 	struct rt_addrinfo rtinfo;
1217 
1218 	if (route_cb.any_count == 0)
1219 		return;
1220 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
1221 	m = rt_msg_mbuf(RTM_IFINFO, &rtinfo);
1222 	if (m == NULL)
1223 		return;
1224 	ifm = mtod(m, struct if_msghdr *);
1225 	ifm->ifm_index = ifp->if_index;
1226 	ifm->ifm_flags = ifp->if_flags;
1227 	ifm->ifm_data = ifp->if_data;
1228 	ifm->ifm_addrs = 0;
1229 	rts_input(m, 0);
1230 }
1231 
1232 static void
1233 rt_ifamsg(int cmd, struct ifaddr *ifa)
1234 {
1235 	struct ifa_msghdr *ifam;
1236 	struct rt_addrinfo rtinfo;
1237 	struct mbuf *m;
1238 	struct ifnet *ifp = ifa->ifa_ifp;
1239 
1240 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
1241 	rtinfo.rti_ifaaddr = ifa->ifa_addr;
1242 	rtinfo.rti_ifpaddr =
1243 		TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa->ifa_addr;
1244 	rtinfo.rti_netmask = ifa->ifa_netmask;
1245 	rtinfo.rti_bcastaddr = ifa->ifa_dstaddr;
1246 
1247 	m = rt_msg_mbuf(cmd, &rtinfo);
1248 	if (m == NULL)
1249 		return;
1250 
1251 	ifam = mtod(m, struct ifa_msghdr *);
1252 	ifam->ifam_index = ifp->if_index;
1253 	ifam->ifam_flags = ifa->ifa_flags;
1254 	ifam->ifam_addrs = rtinfo.rti_addrs;
1255 	ifam->ifam_addrflags = if_addrflags(ifa);
1256 	ifam->ifam_metric = ifa->ifa_metric;
1257 
1258 	rts_input(m, familyof(ifa->ifa_addr));
1259 }
1260 
1261 void
1262 rt_rtmsg(int cmd, struct rtentry *rt, struct ifnet *ifp, int error)
1263 {
1264 	struct rt_msghdr *rtm;
1265 	struct rt_addrinfo rtinfo;
1266 	struct mbuf *m;
1267 	struct sockaddr *dst;
1268 
1269 	if (rt == NULL)
1270 		return;
1271 
1272 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
1273 	rtinfo.rti_dst = dst = rt_key(rt);
1274 	rtinfo.rti_gateway = rt->rt_gateway;
1275 	rtinfo.rti_netmask = rt_mask(rt);
1276 	if (ifp != NULL) {
1277 		rtinfo.rti_ifpaddr =
1278 		TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa->ifa_addr;
1279 	}
1280 	if (rt->rt_ifa != NULL)
1281 		rtinfo.rti_ifaaddr = rt->rt_ifa->ifa_addr;
1282 
1283 	m = rt_msg_mbuf(cmd, &rtinfo);
1284 	if (m == NULL)
1285 		return;
1286 
1287 	rtm = mtod(m, struct rt_msghdr *);
1288 	if (ifp != NULL)
1289 		rtm->rtm_index = ifp->if_index;
1290 	rtm->rtm_flags |= rt->rt_flags;
1291 	rtm->rtm_errno = error;
1292 	rtm->rtm_addrs = rtinfo.rti_addrs;
1293 
1294 	rts_input(m, familyof(dst));
1295 }
1296 
1297 /*
1298  * This is called to generate messages from the routing socket
1299  * indicating a network interface has had addresses associated with it.
1300  * if we ever reverse the logic and replace messages TO the routing
1301  * socket indicate a request to configure interfaces, then it will
1302  * be unnecessary as the routing socket will automatically generate
1303  * copies of it.
1304  */
1305 void
1306 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
1307 {
1308 	if (route_cb.any_count == 0)
1309 		return;
1310 
1311 	if (cmd == RTM_ADD) {
1312 		rt_ifamsg(RTM_NEWADDR, ifa);
1313 		rt_rtmsg(RTM_ADD, rt, ifa->ifa_ifp, error);
1314 	} else {
1315 		KASSERT((cmd == RTM_DELETE), ("unknown cmd %d", cmd));
1316 		rt_rtmsg(RTM_DELETE, rt, ifa->ifa_ifp, error);
1317 		rt_ifamsg(RTM_DELADDR, ifa);
1318 	}
1319 }
1320 
1321 /*
1322  * This is the analogue to the rt_newaddrmsg which performs the same
1323  * function but for multicast group memberhips.  This is easier since
1324  * there is no route state to worry about.
1325  */
1326 void
1327 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
1328 {
1329 	struct rt_addrinfo rtinfo;
1330 	struct mbuf *m = NULL;
1331 	struct ifnet *ifp = ifma->ifma_ifp;
1332 	struct ifma_msghdr *ifmam;
1333 
1334 	if (route_cb.any_count == 0)
1335 		return;
1336 
1337 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
1338 	rtinfo.rti_ifaaddr = ifma->ifma_addr;
1339 	if (ifp != NULL && !TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) {
1340 		rtinfo.rti_ifpaddr =
1341 		TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa->ifa_addr;
1342 	}
1343 	/*
1344 	 * If a link-layer address is present, present it as a ``gateway''
1345 	 * (similarly to how ARP entries, e.g., are presented).
1346 	 */
1347 	rtinfo.rti_gateway = ifma->ifma_lladdr;
1348 
1349 	m = rt_msg_mbuf(cmd, &rtinfo);
1350 	if (m == NULL)
1351 		return;
1352 
1353 	ifmam = mtod(m, struct ifma_msghdr *);
1354 	ifmam->ifmam_index = ifp->if_index;
1355 	ifmam->ifmam_addrs = rtinfo.rti_addrs;
1356 
1357 	rts_input(m, familyof(ifma->ifma_addr));
1358 }
1359 
1360 static struct mbuf *
1361 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
1362 		     struct rt_addrinfo *info)
1363 {
1364 	struct if_announcemsghdr *ifan;
1365 	struct mbuf *m;
1366 
1367 	if (route_cb.any_count == 0)
1368 		return NULL;
1369 
1370 	bzero(info, sizeof(*info));
1371 	m = rt_msg_mbuf(type, info);
1372 	if (m == NULL)
1373 		return NULL;
1374 
1375 	ifan = mtod(m, struct if_announcemsghdr *);
1376 	ifan->ifan_index = ifp->if_index;
1377 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof ifan->ifan_name);
1378 	ifan->ifan_what = what;
1379 	return m;
1380 }
1381 
1382 /*
1383  * This is called to generate routing socket messages indicating
1384  * IEEE80211 wireless events.
1385  * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
1386  */
1387 void
1388 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
1389 {
1390 	struct rt_addrinfo info;
1391 	struct mbuf *m;
1392 
1393 	m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
1394 	if (m == NULL)
1395 		return;
1396 
1397 	/*
1398 	 * Append the ieee80211 data.  Try to stick it in the
1399 	 * mbuf containing the ifannounce msg; otherwise allocate
1400 	 * a new mbuf and append.
1401 	 *
1402 	 * NB: we assume m is a single mbuf.
1403 	 */
1404 	if (data_len > M_TRAILINGSPACE(m)) {
1405 		/* XXX use m_getb(data_len, M_NOWAIT, MT_DATA, 0); */
1406 		struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
1407 		if (n == NULL) {
1408 			m_freem(m);
1409 			return;
1410 		}
1411 		KKASSERT(data_len <= M_TRAILINGSPACE(n));
1412 		bcopy(data, mtod(n, void *), data_len);
1413 		n->m_len = data_len;
1414 		m->m_next = n;
1415 	} else if (data_len > 0) {
1416 		bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
1417 		m->m_len += data_len;
1418 	}
1419 	mbuftrackid(m, 33);
1420 	if (m->m_flags & M_PKTHDR)
1421 		m->m_pkthdr.len += data_len;
1422 	mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
1423 	rts_input(m, 0);
1424 }
1425 
1426 /*
1427  * This is called to generate routing socket messages indicating
1428  * network interface arrival and departure.
1429  */
1430 void
1431 rt_ifannouncemsg(struct ifnet *ifp, int what)
1432 {
1433 	struct rt_addrinfo addrinfo;
1434 	struct mbuf *m;
1435 
1436 	m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &addrinfo);
1437 	if (m != NULL)
1438 		rts_input(m, 0);
1439 }
1440 
1441 static int
1442 resizewalkarg(struct walkarg *w, int len)
1443 {
1444 	void *newptr;
1445 
1446 	newptr = kmalloc(len, M_RTABLE, M_INTWAIT | M_NULLOK);
1447 	if (newptr == NULL)
1448 		return (ENOMEM);
1449 	if (w->w_tmem != NULL)
1450 		kfree(w->w_tmem, M_RTABLE);
1451 	w->w_tmem = newptr;
1452 	w->w_tmemsize = len;
1453 	bzero(newptr, len);
1454 
1455 	return (0);
1456 }
1457 
1458 static void
1459 ifnet_compute_stats(struct ifnet *ifp)
1460 {
1461 	IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets);
1462 	IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors);
1463 	IFNET_STAT_GET(ifp, opackets, ifp->if_opackets);
1464 	IFNET_STAT_GET(ifp, collisions, ifp->if_collisions);
1465 	IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes);
1466 	IFNET_STAT_GET(ifp, obytes, ifp->if_obytes);
1467 	IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts);
1468 	IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts);
1469 	IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops);
1470 	IFNET_STAT_GET(ifp, noproto, ifp->if_noproto);
1471 	IFNET_STAT_GET(ifp, oqdrops, ifp->if_oqdrops);
1472 }
1473 
1474 static int
1475 if_addrflags(const struct ifaddr *ifa)
1476 {
1477 	switch (ifa->ifa_addr->sa_family) {
1478 #ifdef INET6
1479 	case AF_INET6:
1480 		return ((const struct in6_ifaddr *)ifa)->ia6_flags;
1481 #endif
1482 	default:
1483 		return 0;
1484 	}
1485 }
1486 
1487 static int
1488 sysctl_iflist(int af, struct walkarg *w)
1489 {
1490 	struct ifnet *ifp;
1491 	struct rt_addrinfo rtinfo;
1492 	int msglen, error;
1493 
1494 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
1495 
1496 	ifnet_lock();
1497 	TAILQ_FOREACH(ifp, &ifnetlist, if_link) {
1498 		struct ifaddr_container *ifac, *ifac_mark;
1499 		struct ifaddr_marker mark;
1500 		struct ifaddrhead *head;
1501 		struct ifaddr *ifa;
1502 
1503 		if (w->w_arg && w->w_arg != ifp->if_index)
1504 			continue;
1505 		head = &ifp->if_addrheads[mycpuid];
1506 		/*
1507 		 * There is no need to reference the first ifaddr
1508 		 * even if the following resizewalkarg() blocks,
1509 		 * since the first ifaddr will not be destroyed
1510 		 * when the ifnet lock is held.
1511 		 */
1512 		ifac = TAILQ_FIRST(head);
1513 		ifa = ifac->ifa;
1514 		rtinfo.rti_ifpaddr = ifa->ifa_addr;
1515 		msglen = rt_msgsize(RTM_IFINFO, &rtinfo);
1516 		if (w->w_tmemsize < msglen && resizewalkarg(w, msglen) != 0) {
1517 			ifnet_unlock();
1518 			return (ENOMEM);
1519 		}
1520 		rt_msg_buffer(RTM_IFINFO, &rtinfo, w->w_tmem, msglen);
1521 		rtinfo.rti_ifpaddr = NULL;
1522 		if (w->w_req != NULL && w->w_tmem != NULL) {
1523 			struct if_msghdr *ifm = w->w_tmem;
1524 
1525 			ifm->ifm_index = ifp->if_index;
1526 			ifm->ifm_flags = ifp->if_flags;
1527 			ifnet_compute_stats(ifp);
1528 			ifm->ifm_data = ifp->if_data;
1529 			ifm->ifm_addrs = rtinfo.rti_addrs;
1530 			error = SYSCTL_OUT(w->w_req, ifm, msglen);
1531 			if (error) {
1532 				ifnet_unlock();
1533 				return (error);
1534 			}
1535 		}
1536 		/*
1537 		 * Add a marker, since SYSCTL_OUT() could block and during
1538 		 * that period the list could be changed.
1539 		 */
1540 		ifa_marker_init(&mark, ifp);
1541 		ifac_mark = &mark.ifac;
1542 		TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link);
1543 		while ((ifac = TAILQ_NEXT(ifac_mark, ifa_link)) != NULL) {
1544 			TAILQ_REMOVE(head, ifac_mark, ifa_link);
1545 			TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link);
1546 
1547 			ifa = ifac->ifa;
1548 
1549 			/* Ignore marker */
1550 			if (ifa->ifa_addr->sa_family == AF_UNSPEC)
1551 				continue;
1552 
1553 			if (af && af != ifa->ifa_addr->sa_family)
1554 				continue;
1555 			if (curproc->p_ucred->cr_prison &&
1556 			    prison_if(curproc->p_ucred, ifa->ifa_addr))
1557 				continue;
1558 			rtinfo.rti_ifaaddr = ifa->ifa_addr;
1559 			rtinfo.rti_netmask = ifa->ifa_netmask;
1560 			rtinfo.rti_bcastaddr = ifa->ifa_dstaddr;
1561 			msglen = rt_msgsize(RTM_NEWADDR, &rtinfo);
1562 			/*
1563 			 * Keep a reference on this ifaddr, so that it will
1564 			 * not be destroyed if the following resizewalkarg()
1565 			 * blocks.
1566 			 */
1567 			IFAREF(ifa);
1568 			if (w->w_tmemsize < msglen &&
1569 			    resizewalkarg(w, msglen) != 0) {
1570 				IFAFREE(ifa);
1571 				TAILQ_REMOVE(head, ifac_mark, ifa_link);
1572 				ifnet_unlock();
1573 				return (ENOMEM);
1574 			}
1575 			rt_msg_buffer(RTM_NEWADDR, &rtinfo, w->w_tmem, msglen);
1576 			if (w->w_req != NULL) {
1577 				struct ifa_msghdr *ifam = w->w_tmem;
1578 
1579 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1580 				ifam->ifam_flags = ifa->ifa_flags;
1581 				ifam->ifam_addrs = rtinfo.rti_addrs;
1582 				ifam->ifam_addrflags = if_addrflags(ifa);
1583 				ifam->ifam_metric = ifa->ifa_metric;
1584 				error = SYSCTL_OUT(w->w_req, w->w_tmem, msglen);
1585 				if (error) {
1586 					IFAFREE(ifa);
1587 					TAILQ_REMOVE(head, ifac_mark, ifa_link);
1588 					ifnet_unlock();
1589 					return (error);
1590 				}
1591 			}
1592 			IFAFREE(ifa);
1593 		}
1594 		TAILQ_REMOVE(head, ifac_mark, ifa_link);
1595 		rtinfo.rti_netmask = NULL;
1596 		rtinfo.rti_ifaaddr = NULL;
1597 		rtinfo.rti_bcastaddr = NULL;
1598 	}
1599 	ifnet_unlock();
1600 	return (0);
1601 }
1602 
1603 static int
1604 rttable_walkarg_create(struct rttable_walkarg *w, int op, int arg)
1605 {
1606 	struct rt_addrinfo rtinfo;
1607 	struct sockaddr_storage ss;
1608 	int i, msglen;
1609 
1610 	memset(w, 0, sizeof(*w));
1611 	w->w_op = op;
1612 	w->w_arg = arg;
1613 
1614 	memset(&ss, 0, sizeof(ss));
1615 	ss.ss_len = sizeof(ss);
1616 
1617 	memset(&rtinfo, 0, sizeof(rtinfo));
1618 	for (i = 0; i < RTAX_MAX; ++i)
1619 		rtinfo.rti_info[i] = (struct sockaddr *)&ss;
1620 	msglen = rt_msgsize(RTM_GET, &rtinfo);
1621 
1622 	w->w_bufsz = msglen * RTTABLE_DUMP_MSGCNT_MAX;
1623 	w->w_buf = kmalloc(w->w_bufsz, M_TEMP, M_WAITOK | M_NULLOK);
1624 	if (w->w_buf == NULL)
1625 		return ENOMEM;
1626 	return 0;
1627 }
1628 
1629 static void
1630 rttable_walkarg_destroy(struct rttable_walkarg *w)
1631 {
1632 	kfree(w->w_buf, M_TEMP);
1633 }
1634 
1635 static void
1636 rttable_entry_rtinfo(struct rt_addrinfo *rtinfo, struct radix_node *rn)
1637 {
1638 	struct rtentry *rt = (struct rtentry *)rn;
1639 
1640 	bzero(rtinfo, sizeof(*rtinfo));
1641 	rtinfo->rti_dst = rt_key(rt);
1642 	rtinfo->rti_gateway = rt->rt_gateway;
1643 	rtinfo->rti_netmask = rt_mask(rt);
1644 	rtinfo->rti_genmask = rt->rt_genmask;
1645 	if (rt->rt_ifp != NULL) {
1646 		rtinfo->rti_ifpaddr =
1647 		TAILQ_FIRST(&rt->rt_ifp->if_addrheads[mycpuid])->ifa->ifa_addr;
1648 		rtinfo->rti_ifaaddr = rt->rt_ifa->ifa_addr;
1649 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
1650 			rtinfo->rti_bcastaddr = rt->rt_ifa->ifa_dstaddr;
1651 	}
1652 }
1653 
1654 static int
1655 rttable_walk_entry(struct radix_node *rn, void *xw)
1656 {
1657 	struct rttable_walkarg *w = xw;
1658 	struct rtentry *rt = (struct rtentry *)rn;
1659 	struct rt_addrinfo rtinfo;
1660 	struct rt_msghdr *rtm;
1661 	boolean_t save = FALSE;
1662 	int msglen, w_bufleft;
1663 	void *ptr;
1664 
1665 	rttable_entry_rtinfo(&rtinfo, rn);
1666 	msglen = rt_msgsize(RTM_GET, &rtinfo);
1667 
1668 	w_bufleft = w->w_bufsz - w->w_buflen;
1669 
1670 	if (rn->rn_dupedkey != NULL) {
1671 		struct radix_node *rn1 = rn;
1672 		int total_msglen = msglen;
1673 
1674 		/*
1675 		 * Make sure that we have enough space left for all
1676 		 * dupedkeys, since rn_walktree_at always starts
1677 		 * from the first dupedkey.
1678 		 */
1679 		while ((rn1 = rn1->rn_dupedkey) != NULL) {
1680 			struct rt_addrinfo rtinfo1;
1681 			int msglen1;
1682 
1683 			if (rn1->rn_flags & RNF_ROOT)
1684 				continue;
1685 
1686 			rttable_entry_rtinfo(&rtinfo1, rn1);
1687 			msglen1 = rt_msgsize(RTM_GET, &rtinfo1);
1688 			total_msglen += msglen1;
1689 		}
1690 
1691 		if (total_msglen > w_bufleft) {
1692 			if (total_msglen > w->w_bufsz) {
1693 				static int logged = 0;
1694 
1695 				if (!logged) {
1696 					kprintf("buffer is too small for "
1697 					    "all dupedkeys, increase "
1698 					    "RTTABLE_DUMP_MSGCNT_MAX\n");
1699 					logged = 1;
1700 				}
1701 				return ENOMEM;
1702 			}
1703 			save = TRUE;
1704 		}
1705 	} else if (msglen > w_bufleft) {
1706 		save = TRUE;
1707 	}
1708 
1709 	if (save) {
1710 		/*
1711 		 * Not enough buffer left; remember the position
1712 		 * to start from upon next round.
1713 		 */
1714 		KASSERT(msglen <= w->w_bufsz, ("msg too long %d", msglen));
1715 
1716 		KASSERT(rtinfo.rti_dst->sa_len <= sizeof(w->w_key0),
1717 		    ("key too long %d", rtinfo.rti_dst->sa_len));
1718 		memset(&w->w_key0, 0, sizeof(w->w_key0));
1719 		memcpy(&w->w_key0, rtinfo.rti_dst, rtinfo.rti_dst->sa_len);
1720 		w->w_key = (const char *)&w->w_key0;
1721 
1722 		if (rtinfo.rti_netmask != NULL) {
1723 			KASSERT(
1724 			    rtinfo.rti_netmask->sa_len <= sizeof(w->w_mask0),
1725 			    ("mask too long %d", rtinfo.rti_netmask->sa_len));
1726 			memset(&w->w_mask0, 0, sizeof(w->w_mask0));
1727 			memcpy(&w->w_mask0, rtinfo.rti_netmask,
1728 			    rtinfo.rti_netmask->sa_len);
1729 			w->w_mask = (const char *)&w->w_mask0;
1730 		} else {
1731 			w->w_mask = NULL;
1732 		}
1733 		return EJUSTRETURN;
1734 	}
1735 
1736 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1737 		return 0;
1738 
1739 	ptr = ((uint8_t *)w->w_buf) + w->w_buflen;
1740 	rt_msg_buffer(RTM_GET, &rtinfo, ptr, msglen);
1741 
1742 	rtm = (struct rt_msghdr *)ptr;
1743 	rtm->rtm_flags = rt->rt_flags;
1744 	rtm->rtm_use = rt->rt_use;
1745 	rtm->rtm_rmx = rt->rt_rmx;
1746 	rtm->rtm_index = rt->rt_ifp->if_index;
1747 	rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
1748 	rtm->rtm_addrs = rtinfo.rti_addrs;
1749 
1750 	w->w_buflen += msglen;
1751 
1752 	return 0;
1753 }
1754 
1755 static void
1756 rttable_walk_dispatch(netmsg_t msg)
1757 {
1758 	struct netmsg_rttable_walk *nmsg = (struct netmsg_rttable_walk *)msg;
1759 	struct radix_node_head *rnh = rt_tables[mycpuid][nmsg->af];
1760 	struct rttable_walkarg *w = nmsg->w;
1761 	int error;
1762 
1763 	error = rnh->rnh_walktree_at(rnh, w->w_key, w->w_mask,
1764 	    rttable_walk_entry, w);
1765 	lwkt_replymsg(&nmsg->base.lmsg, error);
1766 }
1767 
1768 static int
1769 sysctl_rttable(int af, struct sysctl_req *req, int op, int arg)
1770 {
1771 	struct rttable_walkarg w;
1772 	int error, i;
1773 
1774 	error = rttable_walkarg_create(&w, op, arg);
1775 	if (error)
1776 		return error;
1777 
1778 	error = EINVAL;
1779 	for (i = 1; i <= AF_MAX; i++) {
1780 		if (rt_tables[mycpuid][i] != NULL && (af == 0 || af == i)) {
1781 			w.w_key = NULL;
1782 			w.w_mask = NULL;
1783 			for (;;) {
1784 				struct netmsg_rttable_walk nmsg;
1785 
1786 				netmsg_init(&nmsg.base, NULL,
1787 				    &curthread->td_msgport, 0,
1788 				    rttable_walk_dispatch);
1789 				nmsg.af = i;
1790 				nmsg.w = &w;
1791 
1792 				w.w_buflen = 0;
1793 
1794 				error = lwkt_domsg(netisr_cpuport(mycpuid),
1795 				    &nmsg.base.lmsg, 0);
1796 				if (error && error != EJUSTRETURN)
1797 					goto done;
1798 
1799 				if (req != NULL && w.w_buflen > 0) {
1800 					int error1;
1801 
1802 					error1 = SYSCTL_OUT(req, w.w_buf,
1803 					    w.w_buflen);
1804 					if (error1) {
1805 						error = error1;
1806 						goto done;
1807 					}
1808 				}
1809 				if (error == 0) /* done */
1810 					break;
1811 			}
1812 		}
1813 	}
1814 done:
1815 	rttable_walkarg_destroy(&w);
1816 	return error;
1817 }
1818 
1819 static int
1820 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
1821 {
1822 	int	*name = (int *)arg1;
1823 	u_int	namelen = arg2;
1824 	int	error = EINVAL;
1825 	int	origcpu, cpu;
1826 	u_char  af;
1827 	struct	walkarg w;
1828 
1829 	name ++;
1830 	namelen--;
1831 	if (req->newptr)
1832 		return (EPERM);
1833 	if (namelen != 3 && namelen != 4)
1834 		return (EINVAL);
1835 	af = name[0];
1836 	bzero(&w, sizeof w);
1837 	w.w_op = name[1];
1838 	w.w_arg = name[2];
1839 	w.w_req = req;
1840 
1841 	/*
1842 	 * Optional third argument specifies cpu, used primarily for
1843 	 * debugging the route table.
1844 	 */
1845 	if (namelen == 4) {
1846 		if (name[3] < 0 || name[3] >= netisr_ncpus)
1847 			return (EINVAL);
1848 		cpu = name[3];
1849 	} else {
1850 		/*
1851 		 * Target cpu is not specified, use cpu0 then, so that
1852 		 * the result set will be relatively stable.
1853 		 */
1854 		cpu = 0;
1855 	}
1856 	origcpu = mycpuid;
1857 	lwkt_migratecpu(cpu);
1858 
1859 	switch (w.w_op) {
1860 	case NET_RT_DUMP:
1861 	case NET_RT_FLAGS:
1862 		error = sysctl_rttable(af, w.w_req, w.w_op, w.w_arg);
1863 		break;
1864 
1865 	case NET_RT_IFLIST:
1866 		error = sysctl_iflist(af, &w);
1867 		break;
1868 	}
1869 	if (w.w_tmem != NULL)
1870 		kfree(w.w_tmem, M_RTABLE);
1871 
1872 	lwkt_migratecpu(origcpu);
1873 	return (error);
1874 }
1875 
1876 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
1877 
1878 /*
1879  * Definitions of protocols supported in the ROUTE domain.
1880  */
1881 
1882 static struct domain routedomain;		/* or at least forward */
1883 
1884 static struct protosw routesw[] = {
1885     {
1886 	.pr_type = SOCK_RAW,
1887 	.pr_domain = &routedomain,
1888 	.pr_protocol = 0,
1889 	.pr_flags = PR_ATOMIC|PR_ADDR,
1890 	.pr_input = NULL,
1891 	.pr_output = route_output,
1892 	.pr_ctlinput = raw_ctlinput,
1893 	.pr_ctloutput = route_ctloutput,
1894 	.pr_ctlport = cpu0_ctlport,
1895 
1896 	.pr_init = raw_init,
1897 	.pr_usrreqs = &route_usrreqs
1898     }
1899 };
1900 
1901 static struct domain routedomain = {
1902 	.dom_family		= AF_ROUTE,
1903 	.dom_name		= "route",
1904 	.dom_init		= NULL,
1905 	.dom_externalize	= NULL,
1906 	.dom_dispose		= NULL,
1907 	.dom_protosw		= routesw,
1908 	.dom_protoswNPROTOSW	= &routesw[(sizeof routesw)/(sizeof routesw[0])],
1909 	.dom_next		= SLIST_ENTRY_INITIALIZER,
1910 	.dom_rtattach		= NULL,
1911 	.dom_rtoffset		= 0,
1912 	.dom_maxrtkey		= 0,
1913 	.dom_ifattach		= NULL,
1914 	.dom_ifdetach		= NULL
1915 };
1916 
1917 DOMAIN_SET(route);
1918 
1919