xref: /dragonfly/sys/net/rtsock.c (revision 72a6624f)
1 /*
2  * Copyright (c) 2004, 2005 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Jeffrey M. Hsu.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1988, 1991, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
62  * $FreeBSD: src/sys/net/rtsock.c,v 1.44.2.11 2002/12/04 14:05:41 ru Exp $
63  */
64 
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/kernel.h>
68 #include <sys/sysctl.h>
69 #include <sys/proc.h>
70 #include <sys/priv.h>
71 #include <sys/malloc.h>
72 #include <sys/mbuf.h>
73 #include <sys/protosw.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/domain.h>
77 #include <sys/jail.h>
78 
79 #include <sys/thread2.h>
80 #include <sys/socketvar2.h>
81 
82 #include <net/if.h>
83 #include <net/if_var.h>
84 #include <net/route.h>
85 #include <net/raw_cb.h>
86 #include <net/netmsg2.h>
87 #include <net/netisr2.h>
88 
89 /* sa_family is after sa_len, rest is data */
90 #define	_SA_MINSIZE	(offsetof(struct sockaddr, sa_family) + \
91 			 sizeof(((struct sockaddr *)0)->sa_family))
92 
93 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
94 
95 static struct route_cb {
96 	int	ip_count;
97 	int	ip6_count;
98 	int	any_count;
99 } route_cb;
100 
101 static const struct sockaddr route_src = { 2, PF_ROUTE, };
102 
103 struct walkarg {
104 	int	w_tmemsize;
105 	int	w_op, w_arg;
106 	void	*w_tmem;
107 	struct sysctl_req *w_req;
108 };
109 
110 #ifndef RTTABLE_DUMP_MSGCNT_MAX
111 /* Should be large enough for dupkeys */
112 #define RTTABLE_DUMP_MSGCNT_MAX		64
113 #endif
114 
115 struct rttable_walkarg {
116 	int	w_op;
117 	int	w_arg;
118 	int	w_bufsz;
119 	void	*w_buf;
120 
121 	int	w_buflen;
122 
123 	const char *w_key;
124 	const char *w_mask;
125 
126 	struct sockaddr_storage w_key0;
127 	struct sockaddr_storage w_mask0;
128 };
129 
130 struct netmsg_rttable_walk {
131 	struct netmsg_base	base;
132 	int			af;
133 	struct rttable_walkarg	*w;
134 };
135 
136 struct routecb {
137 	struct rawcb	rocb_rcb;
138 	unsigned int	rocb_msgfilter;
139 	char		*rocb_missfilter;
140 	size_t		rocb_missfilterlen;
141 };
142 #define	sotoroutecb(so)	((struct routecb *)(so)->so_pcb)
143 
144 static struct mbuf *
145 		rt_msg_mbuf (int, struct rt_addrinfo *);
146 static void	rt_msg_buffer (int, struct rt_addrinfo *, void *buf, int len);
147 static int	rt_msgsize(int type, const struct rt_addrinfo *rtinfo);
148 static int	rt_xaddrs (char *, char *, struct rt_addrinfo *);
149 static int	sysctl_rttable(int af, struct sysctl_req *req, int op, int arg);
150 static int	if_addrflags(const struct ifaddr *ifa);
151 static int	sysctl_iflist (int af, struct walkarg *w);
152 static int	route_output(struct mbuf *, struct socket *, ...);
153 static void	rt_setmetrics (u_long, struct rt_metrics *,
154 			       struct rt_metrics *);
155 
156 /*
157  * It really doesn't make any sense at all for this code to share much
158  * with raw_usrreq.c, since its functionality is so restricted.  XXX
159  */
160 static void
161 rts_abort(netmsg_t msg)
162 {
163 	crit_enter();
164 	raw_usrreqs.pru_abort(msg);
165 	/* msg invalid now */
166 	crit_exit();
167 }
168 
169 static int
170 rts_filter(struct mbuf *m, const struct sockproto *proto,
171 	const struct rawcb *rp)
172 {
173 	const struct routecb *rop = (const struct routecb *)rp;
174 	const struct rt_msghdr *rtm;
175 
176 	KKASSERT(m != NULL);
177 	KKASSERT(proto != NULL);
178 	KKASSERT(rp != NULL);
179 
180 	/* Wrong family for this socket. */
181 	if (proto->sp_family != PF_ROUTE)
182 		return ENOPROTOOPT;
183 
184 	/* If no filter set, just return. */
185 	if (rop->rocb_msgfilter == 0 && rop->rocb_missfilterlen == 0)
186 		return 0;
187 
188 	/* Ensure we can access rtm_type */
189 	if (m->m_len <
190 	    offsetof(struct rt_msghdr, rtm_type) + sizeof(rtm->rtm_type))
191 		return EINVAL;
192 
193 	rtm = mtod(m, const struct rt_msghdr *);
194 	/* If the rtm type is filtered out, return a positive. */
195 	if (rop->rocb_msgfilter != 0 &&
196 	    !(rop->rocb_msgfilter & ROUTE_FILTER(rtm->rtm_type)))
197 		return EEXIST;
198 
199 	if (rop->rocb_missfilterlen != 0 && rtm->rtm_type == RTM_MISS) {
200 		CTASSERT(RTAX_DST == 0);
201 		struct sockaddr *sa;
202 		struct sockaddr_storage ss;
203 		struct sockaddr *dst = (struct sockaddr *)&ss;
204 		char *cp = rop->rocb_missfilter;
205 		char *ep = cp + rop->rocb_missfilterlen;
206 
207 		/* Ensure we can access sa_len */
208 		if (m->m_pkthdr.len < sizeof(*rtm) + _SA_MINSIZE)
209 			return EINVAL;
210 		m_copydata(m, sizeof(*rtm) + offsetof(struct sockaddr, sa_len),
211 		    sizeof(ss.ss_len), (caddr_t)&ss);
212 		if (ss.ss_len < _SA_MINSIZE ||
213 		    ss.ss_len > sizeof(ss) ||
214 		    m->m_pkthdr.len < sizeof(*rtm) + ss.ss_len)
215 			return EINVAL;
216 		/* Copy out the destination sockaddr */
217 		m_copydata(m, sizeof(*rtm), ss.ss_len, (caddr_t)&ss);
218 
219 		/* Find a matching sockaddr in the filter */
220 		while (cp < ep) {
221 			sa = (struct sockaddr *)cp;
222 			if (sa->sa_len == dst->sa_len &&
223 			    memcmp(sa, dst, sa->sa_len) == 0)
224 				break;
225 			cp += RT_ROUNDUP(sa->sa_len);
226 		}
227 		if (cp == ep)
228 			return EEXIST;
229 	}
230 
231 	/* Passed the filter. */
232 	return 0;
233 }
234 
235 
236 /* pru_accept is EOPNOTSUPP */
237 
238 static void
239 rts_attach(netmsg_t msg)
240 {
241 	struct socket *so = msg->base.nm_so;
242 	struct pru_attach_info *ai = msg->attach.nm_ai;
243 	struct rawcb *rp;
244 	struct routecb *rop;
245 	int proto = msg->attach.nm_proto;
246 	int error;
247 
248 	crit_enter();
249 	if (sotorawcb(so) != NULL) {
250 		error = EISCONN;
251 		goto done;
252 	}
253 
254 	rop = kmalloc(sizeof *rop, M_PCB, M_WAITOK | M_ZERO);
255 	rp = &rop->rocb_rcb;
256 
257 	/*
258 	 * The critical section is necessary to block protocols from sending
259 	 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
260 	 * this PCB is extant but incompletely initialized.
261 	 * Probably we should try to do more of this work beforehand and
262 	 * eliminate the critical section.
263 	 */
264 	so->so_pcb = rp;
265 	soreference(so);	/* so_pcb assignment */
266 	error = raw_attach(so, proto, ai->sb_rlimit);
267 	rp = sotorawcb(so);
268 	if (error) {
269 		kfree(rop, M_PCB);
270 		goto done;
271 	}
272 	switch(rp->rcb_proto.sp_protocol) {
273 	case AF_INET:
274 		route_cb.ip_count++;
275 		break;
276 	case AF_INET6:
277 		route_cb.ip6_count++;
278 		break;
279 	}
280 	rp->rcb_faddr = &route_src;
281 	rp->rcb_filter = rts_filter;
282 	route_cb.any_count++;
283 	soisconnected(so);
284 	so->so_options |= SO_USELOOPBACK;
285 	error = 0;
286 done:
287 	crit_exit();
288 	lwkt_replymsg(&msg->lmsg, error);
289 }
290 
291 static void
292 rts_bind(netmsg_t msg)
293 {
294 	crit_enter();
295 	raw_usrreqs.pru_bind(msg); /* xxx just EINVAL */
296 	/* msg invalid now */
297 	crit_exit();
298 }
299 
300 static void
301 rts_connect(netmsg_t msg)
302 {
303 	crit_enter();
304 	raw_usrreqs.pru_connect(msg); /* XXX just EINVAL */
305 	/* msg invalid now */
306 	crit_exit();
307 }
308 
309 /* pru_connect2 is EOPNOTSUPP */
310 /* pru_control is EOPNOTSUPP */
311 
312 static void
313 rts_detach(netmsg_t msg)
314 {
315 	struct socket *so = msg->base.nm_so;
316 	struct rawcb *rp = sotorawcb(so);
317 	struct routecb *rop = (struct routecb *)rp;
318 
319 	crit_enter();
320 	if (rop->rocb_missfilterlen != 0)
321 		kfree(rop->rocb_missfilter, M_PCB);
322 	if (rp != NULL) {
323 		switch(rp->rcb_proto.sp_protocol) {
324 		case AF_INET:
325 			route_cb.ip_count--;
326 			break;
327 		case AF_INET6:
328 			route_cb.ip6_count--;
329 			break;
330 		}
331 		route_cb.any_count--;
332 	}
333 	raw_usrreqs.pru_detach(msg);
334 	/* msg invalid now */
335 	crit_exit();
336 }
337 
338 static void
339 rts_disconnect(netmsg_t msg)
340 {
341 	crit_enter();
342 	raw_usrreqs.pru_disconnect(msg);
343 	/* msg invalid now */
344 	crit_exit();
345 }
346 
347 /* pru_listen is EOPNOTSUPP */
348 
349 static void
350 rts_peeraddr(netmsg_t msg)
351 {
352 	crit_enter();
353 	raw_usrreqs.pru_peeraddr(msg);
354 	/* msg invalid now */
355 	crit_exit();
356 }
357 
358 /* pru_rcvd is EOPNOTSUPP */
359 /* pru_rcvoob is EOPNOTSUPP */
360 
361 static void
362 rts_send(netmsg_t msg)
363 {
364 	crit_enter();
365 	raw_usrreqs.pru_send(msg);
366 	/* msg invalid now */
367 	crit_exit();
368 }
369 
370 /* pru_sense is null */
371 
372 static void
373 rts_shutdown(netmsg_t msg)
374 {
375 	crit_enter();
376 	raw_usrreqs.pru_shutdown(msg);
377 	/* msg invalid now */
378 	crit_exit();
379 }
380 
381 static void
382 rts_sockaddr(netmsg_t msg)
383 {
384 	crit_enter();
385 	raw_usrreqs.pru_sockaddr(msg);
386 	/* msg invalid now */
387 	crit_exit();
388 }
389 
390 static struct pr_usrreqs route_usrreqs = {
391 	.pru_abort = rts_abort,
392 	.pru_accept = pr_generic_notsupp,
393 	.pru_attach = rts_attach,
394 	.pru_bind = rts_bind,
395 	.pru_connect = rts_connect,
396 	.pru_connect2 = pr_generic_notsupp,
397 	.pru_control = pr_generic_notsupp,
398 	.pru_detach = rts_detach,
399 	.pru_disconnect = rts_disconnect,
400 	.pru_listen = pr_generic_notsupp,
401 	.pru_peeraddr = rts_peeraddr,
402 	.pru_rcvd = pr_generic_notsupp,
403 	.pru_rcvoob = pr_generic_notsupp,
404 	.pru_send = rts_send,
405 	.pru_sense = pru_sense_null,
406 	.pru_shutdown = rts_shutdown,
407 	.pru_sockaddr = rts_sockaddr,
408 	.pru_sosend = sosend,
409 	.pru_soreceive = soreceive
410 };
411 
412 static __inline sa_family_t
413 familyof(struct sockaddr *sa)
414 {
415 	return (sa != NULL ? sa->sa_family : 0);
416 }
417 
418 /*
419  * Routing socket input function.  The packet must be serialized onto cpu 0.
420  * We use the cpu0_soport() netisr processing loop to handle it.
421  *
422  * This looks messy but it means that anyone, including interrupt code,
423  * can send a message to the routing socket.
424  */
425 static void
426 rts_input_handler(netmsg_t msg)
427 {
428 	static const struct sockaddr route_dst = { 2, PF_ROUTE, };
429 	struct sockproto route_proto;
430 	struct netmsg_packet *pmsg = &msg->packet;
431 	struct mbuf *m;
432 	sa_family_t family;
433 	struct rawcb *skip;
434 
435 	family = pmsg->base.lmsg.u.ms_result;
436 	route_proto.sp_family = PF_ROUTE;
437 	route_proto.sp_protocol = family;
438 
439 	m = pmsg->nm_packet;
440 	M_ASSERTPKTHDR(m);
441 
442 	skip = m->m_pkthdr.header;
443 	m->m_pkthdr.header = NULL;
444 
445 	raw_input(m, &route_proto, &route_src, &route_dst, skip);
446 }
447 
448 static void
449 rts_input_skip(struct mbuf *m, sa_family_t family, struct rawcb *skip)
450 {
451 	struct netmsg_packet *pmsg;
452 	lwkt_port_t port;
453 
454 	M_ASSERTPKTHDR(m);
455 
456 	port = netisr_cpuport(0);	/* XXX same as for routing socket */
457 	pmsg = &m->m_hdr.mh_netmsg;
458 	netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport,
459 		    0, rts_input_handler);
460 	pmsg->nm_packet = m;
461 	pmsg->base.lmsg.u.ms_result = family;
462 	m->m_pkthdr.header = skip; /* XXX steal field in pkthdr */
463 	lwkt_sendmsg(port, &pmsg->base.lmsg);
464 }
465 
466 static __inline void
467 rts_input(struct mbuf *m, sa_family_t family)
468 {
469 	rts_input_skip(m, family, NULL);
470 }
471 
472 static void
473 route_ctloutput(netmsg_t msg)
474 {
475 	struct socket *so = msg->ctloutput.base.nm_so;
476 	struct sockopt *sopt = msg->ctloutput.nm_sopt;
477 	struct routecb *rop = sotoroutecb(so);
478 	int error;
479 	unsigned int msgfilter;
480 	unsigned char *cp, *ep;
481 	size_t len;
482 	struct sockaddr *sa;
483 
484 	if (sopt->sopt_level != AF_ROUTE) {
485 		error = EINVAL;
486 		goto out;
487 	}
488 
489 	error = 0;
490 
491 	switch (sopt->sopt_dir) {
492 	case SOPT_SET:
493 		switch (sopt->sopt_name) {
494 		case ROUTE_MSGFILTER:
495 			error = soopt_to_kbuf(sopt, &msgfilter,
496 			    sizeof(msgfilter), sizeof(msgfilter));
497 			if (error == 0)
498 				rop->rocb_msgfilter = msgfilter;
499 			break;
500 		case RO_MISSFILTER:
501 			/* Validate the data */
502 			len = 0;
503 			cp = sopt->sopt_val;
504 			ep = cp + sopt->sopt_valsize;
505 			while (cp < ep) {
506 				if (ep - cp <
507 				    offsetof(struct sockaddr, sa_len) +
508 				    sizeof(sa->sa_len))
509 					break;
510 				if (++len > RO_FILTSA_MAX) {
511 					error = ENOBUFS;
512 					break;
513 				}
514 				sa = (struct sockaddr *)cp;
515 				if (sa->sa_len < _SA_MINSIZE ||
516 				    sa->sa_len > sizeof(struct sockaddr_storage))
517 					break;
518 				cp += RT_ROUNDUP(sa->sa_len);
519 			}
520 			if (cp != ep) {
521 				if (error == 0)
522 					error = EINVAL;
523 				break;
524 			}
525 			if (rop->rocb_missfilterlen != 0)
526 				kfree(rop->rocb_missfilter, M_PCB);
527 			if (sopt->sopt_valsize != 0) {
528 				rop->rocb_missfilter =
529 				    kmalloc(sopt->sopt_valsize,
530 				            M_PCB, M_WAITOK | M_NULLOK);
531 				if (rop->rocb_missfilter == NULL) {
532 					rop->rocb_missfilterlen = 0;
533 					error = ENOBUFS;
534 					break;
535 				}
536 			} else
537 				rop->rocb_missfilter = NULL;
538 			rop->rocb_missfilterlen = sopt->sopt_valsize;
539 			if (rop->rocb_missfilterlen != 0)
540 				memcpy(rop->rocb_missfilter, sopt->sopt_val,
541 				    rop->rocb_missfilterlen);
542 			break;
543 		default:
544 			error = ENOPROTOOPT;
545 			break;
546 		}
547 		break;
548 	case SOPT_GET:
549 		switch (sopt->sopt_name) {
550 		case ROUTE_MSGFILTER:
551 			msgfilter = rop->rocb_msgfilter;
552 			soopt_from_kbuf(sopt, &msgfilter, sizeof(msgfilter));
553 			break;
554 		case RO_MISSFILTER:
555 			soopt_from_kbuf(sopt, rop->rocb_missfilter,
556 			    rop->rocb_missfilterlen);
557 			break;
558 		default:
559 			error = ENOPROTOOPT;
560 			break;
561 		}
562 	}
563 out:
564 	lwkt_replymsg(&msg->ctloutput.base.lmsg, error);
565 }
566 
567 
568 
569 static void *
570 reallocbuf_nofree(void *ptr, size_t len, size_t olen)
571 {
572 	void *newptr;
573 
574 	newptr = kmalloc(len, M_RTABLE, M_INTWAIT | M_NULLOK);
575 	if (newptr == NULL)
576 		return NULL;
577 	bcopy(ptr, newptr, olen);
578 	if (olen < len)
579 		bzero((char *)newptr + olen, len - olen);
580 
581 	return (newptr);
582 }
583 
584 /*
585  * Internal helper routine for route_output().
586  */
587 static int
588 _fillrtmsg(struct rt_msghdr **prtm, struct rtentry *rt,
589 	   struct rt_addrinfo *rtinfo)
590 {
591 	int msglen;
592 	struct rt_msghdr *rtm = *prtm;
593 
594 	/* Fill in rt_addrinfo for call to rt_msg_buffer(). */
595 	rtinfo->rti_dst = rt_key(rt);
596 	rtinfo->rti_gateway = rt->rt_gateway;
597 	rtinfo->rti_netmask = rt_mask(rt);		/* might be NULL */
598 	rtinfo->rti_genmask = rt->rt_genmask;		/* might be NULL */
599 	if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
600 		if (rt->rt_ifp != NULL) {
601 			rtinfo->rti_ifpaddr =
602 			    TAILQ_FIRST(&rt->rt_ifp->if_addrheads[mycpuid])
603 			    ->ifa->ifa_addr;
604 			rtinfo->rti_ifaaddr = rt->rt_ifa->ifa_addr;
605 			if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
606 				rtinfo->rti_bcastaddr = rt->rt_ifa->ifa_dstaddr;
607 			rtm->rtm_index = rt->rt_ifp->if_index;
608 		} else {
609 			rtinfo->rti_ifpaddr = NULL;
610 			rtinfo->rti_ifaaddr = NULL;
611 		}
612 	} else if (rt->rt_ifp != NULL) {
613 		rtm->rtm_index = rt->rt_ifp->if_index;
614 	}
615 
616 	msglen = rt_msgsize(rtm->rtm_type, rtinfo);
617 	if (rtm->rtm_msglen < msglen) {
618 		/* NOTE: Caller will free the old rtm accordingly */
619 		rtm = reallocbuf_nofree(rtm, msglen, rtm->rtm_msglen);
620 		if (rtm == NULL)
621 			return (ENOBUFS);
622 		*prtm = rtm;
623 	}
624 	rt_msg_buffer(rtm->rtm_type, rtinfo, rtm, msglen);
625 
626 	rtm->rtm_flags = rt->rt_flags;
627 	rtm->rtm_rmx = rt->rt_rmx;
628 	rtm->rtm_addrs = rtinfo->rti_addrs;
629 
630 	return (0);
631 }
632 
633 struct rtm_arg {
634 	struct rt_msghdr	*bak_rtm;
635 	struct rt_msghdr	*new_rtm;
636 };
637 
638 static int
639 fillrtmsg(struct rtm_arg *arg, struct rtentry *rt,
640 	  struct rt_addrinfo *rtinfo)
641 {
642 	struct rt_msghdr *rtm = arg->new_rtm;
643 	int error;
644 
645 	error = _fillrtmsg(&rtm, rt, rtinfo);
646 	if (!error) {
647 		if (arg->new_rtm != rtm) {
648 			/*
649 			 * _fillrtmsg() just allocated a new rtm;
650 			 * if the previously allocated rtm is not
651 			 * the backing rtm, it should be freed.
652 			 */
653 			if (arg->new_rtm != arg->bak_rtm)
654 				kfree(arg->new_rtm, M_RTABLE);
655 			arg->new_rtm = rtm;
656 		}
657 	}
658 	return error;
659 }
660 
661 static void route_output_add_callback(int, int, struct rt_addrinfo *,
662 					struct rtentry *, void *);
663 static void route_output_delete_callback(int, int, struct rt_addrinfo *,
664 					struct rtentry *, void *);
665 static int route_output_get_callback(int, struct rt_addrinfo *,
666 				     struct rtentry *, void *, int);
667 static int route_output_change_callback(int, struct rt_addrinfo *,
668 					struct rtentry *, void *, int);
669 static int route_output_lock_callback(int, struct rt_addrinfo *,
670 				      struct rtentry *, void *, int);
671 
672 /*ARGSUSED*/
673 static int
674 route_output(struct mbuf *m, struct socket *so, ...)
675 {
676 	struct rtm_arg arg;
677 	struct rt_msghdr *rtm = NULL;
678 	struct rawcb *rp = NULL;
679 	struct pr_output_info *oi;
680 	struct rt_addrinfo rtinfo;
681 	sa_family_t family;
682 	int len, error = 0;
683 	__va_list ap;
684 
685 	M_ASSERTPKTHDR(m);
686 
687 	__va_start(ap, so);
688 	oi = __va_arg(ap, struct pr_output_info *);
689 	__va_end(ap);
690 
691 	family = familyof(NULL);
692 
693 #define gotoerr(e) { error = e; goto flush;}
694 
695 	if (m == NULL ||
696 	    (m->m_len < sizeof(long) &&
697 	     (m = m_pullup(m, sizeof(long))) == NULL))
698 		return (ENOBUFS);
699 	len = m->m_pkthdr.len;
700 	if (len < sizeof(struct rt_msghdr) ||
701 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen)
702 		gotoerr(EINVAL);
703 
704 	rtm = kmalloc(len, M_RTABLE, M_INTWAIT | M_NULLOK);
705 	if (rtm == NULL)
706 		gotoerr(ENOBUFS);
707 
708 	m_copydata(m, 0, len, (caddr_t)rtm);
709 	if (rtm->rtm_version != RTM_VERSION)
710 		gotoerr(EPROTONOSUPPORT);
711 
712 	rtm->rtm_pid = oi->p_pid;
713 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
714 	rtinfo.rti_addrs = rtm->rtm_addrs;
715 	if (rt_xaddrs((char *)(rtm + 1), (char *)rtm + len, &rtinfo) != 0)
716 		gotoerr(EINVAL);
717 
718 	rtinfo.rti_flags = rtm->rtm_flags;
719 	if (rtinfo.rti_dst == NULL || rtinfo.rti_dst->sa_family >= AF_MAX ||
720 	    (rtinfo.rti_gateway && rtinfo.rti_gateway->sa_family >= AF_MAX))
721 		gotoerr(EINVAL);
722 
723 	family = familyof(rtinfo.rti_dst);
724 
725 	/*
726 	 * Verify that the caller has the appropriate privilege; RTM_GET
727 	 * is the only operation the non-superuser is allowed.
728 	 */
729 	if (rtm->rtm_type != RTM_GET &&
730 	    priv_check_cred(so->so_cred, PRIV_ROOT, 0) != 0)
731 		gotoerr(EPERM);
732 
733 	if (rtinfo.rti_genmask != NULL) {
734 		error = rtmask_add_global(rtinfo.rti_genmask,
735 		    rtm->rtm_type != RTM_GET ?
736 		    RTREQ_PRIO_HIGH : RTREQ_PRIO_NORM);
737 		if (error)
738 			goto flush;
739 	}
740 
741 	switch (rtm->rtm_type) {
742 	case RTM_ADD:
743 		if (rtinfo.rti_gateway == NULL) {
744 			error = EINVAL;
745 		} else {
746 			error = rtrequest1_global(RTM_ADD, &rtinfo,
747 			    route_output_add_callback, rtm, RTREQ_PRIO_HIGH);
748 		}
749 		break;
750 	case RTM_DELETE:
751 		/*
752 		 * Backing rtm (bak_rtm) could _not_ be freed during
753 		 * rtrequest1_global or rtsearch_global, even if the
754 		 * callback reallocates the rtm due to its size changes,
755 		 * since rtinfo points to the backing rtm's memory area.
756 		 * After rtrequest1_global or rtsearch_global returns,
757 		 * it is safe to free the backing rtm, since rtinfo will
758 		 * not be used anymore.
759 		 *
760 		 * new_rtm will be used to save the new rtm allocated
761 		 * by rtrequest1_global or rtsearch_global.
762 		 */
763 		arg.bak_rtm = rtm;
764 		arg.new_rtm = rtm;
765 		error = rtrequest1_global(RTM_DELETE, &rtinfo,
766 		    route_output_delete_callback, &arg, RTREQ_PRIO_HIGH);
767 		rtm = arg.new_rtm;
768 		if (rtm != arg.bak_rtm)
769 			kfree(arg.bak_rtm, M_RTABLE);
770 		break;
771 	case RTM_GET:
772 		/* See the comment in RTM_DELETE */
773 		arg.bak_rtm = rtm;
774 		arg.new_rtm = rtm;
775 		error = rtsearch_global(RTM_GET, &rtinfo,
776 		    route_output_get_callback, &arg, RTS_NOEXACTMATCH,
777 		    RTREQ_PRIO_NORM);
778 		rtm = arg.new_rtm;
779 		if (rtm != arg.bak_rtm)
780 			kfree(arg.bak_rtm, M_RTABLE);
781 		break;
782 	case RTM_CHANGE:
783 		error = rtsearch_global(RTM_CHANGE, &rtinfo,
784 		    route_output_change_callback, rtm, RTS_EXACTMATCH,
785 		    RTREQ_PRIO_HIGH);
786 		break;
787 	case RTM_LOCK:
788 		error = rtsearch_global(RTM_LOCK, &rtinfo,
789 		    route_output_lock_callback, rtm, RTS_EXACTMATCH,
790 		    RTREQ_PRIO_HIGH);
791 		break;
792 	default:
793 		error = EOPNOTSUPP;
794 		break;
795 	}
796 flush:
797 	if (rtm != NULL) {
798 		if (error != 0)
799 			rtm->rtm_errno = error;
800 		else
801 			rtm->rtm_flags |= RTF_DONE;
802 	}
803 
804 	/*
805 	 * Check to see if we don't want our own messages.
806 	 */
807 	if (!(so->so_options & SO_USELOOPBACK)) {
808 		if (route_cb.any_count <= 1) {
809 			if (rtm != NULL)
810 				kfree(rtm, M_RTABLE);
811 			m_freem(m);
812 			return (error);
813 		}
814 		/* There is another listener, so construct message */
815 		rp = sotorawcb(so);
816 	}
817 	if (rtm != NULL) {
818 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
819 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
820 			m_freem(m);
821 			m = NULL;
822 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
823 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
824 		kfree(rtm, M_RTABLE);
825 	}
826 	if (m != NULL)
827 		rts_input_skip(m, family, rp);
828 	return (error);
829 }
830 
831 static void
832 route_output_add_callback(int cmd, int error, struct rt_addrinfo *rtinfo,
833 			  struct rtentry *rt, void *arg)
834 {
835 	struct rt_msghdr *rtm = arg;
836 
837 	if (error == 0 && rt != NULL) {
838 		rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
839 		    &rt->rt_rmx);
840 		rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
841 		rt->rt_rmx.rmx_locks |=
842 		    (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
843 		if (rtinfo->rti_genmask != NULL) {
844 			rt->rt_genmask = rtmask_purelookup(rtinfo->rti_genmask);
845 			if (rt->rt_genmask == NULL) {
846 				/*
847 				 * This should not happen, since we
848 				 * have already installed genmask
849 				 * on each CPU before we reach here.
850 				 */
851 				panic("genmask is gone!?");
852 			}
853 		} else {
854 			rt->rt_genmask = NULL;
855 		}
856 		rtm->rtm_index = rt->rt_ifp->if_index;
857 	}
858 }
859 
860 static void
861 route_output_delete_callback(int cmd, int error, struct rt_addrinfo *rtinfo,
862 			  struct rtentry *rt, void *arg)
863 {
864 	if (error == 0 && rt) {
865 		++rt->rt_refcnt;
866 		if (fillrtmsg(arg, rt, rtinfo) != 0) {
867 			error = ENOBUFS;
868 			/* XXX no way to return the error */
869 		}
870 		--rt->rt_refcnt;
871 	}
872 	if (rt && rt->rt_refcnt == 0) {
873 		++rt->rt_refcnt;
874 		rtfree(rt);
875 	}
876 }
877 
878 static int
879 route_output_get_callback(int cmd, struct rt_addrinfo *rtinfo,
880 			  struct rtentry *rt, void *arg, int found_cnt)
881 {
882 	int error, found = 0;
883 
884 	if (((rtinfo->rti_flags ^ rt->rt_flags) & RTF_HOST) == 0)
885 		found = 1;
886 
887 	error = fillrtmsg(arg, rt, rtinfo);
888 	if (!error && found) {
889 		/* Got the exact match, we could return now! */
890 		error = EJUSTRETURN;
891 	}
892 	return error;
893 }
894 
895 static int
896 route_output_change_callback(int cmd, struct rt_addrinfo *rtinfo,
897 			     struct rtentry *rt, void *arg, int found_cnt)
898 {
899 	struct rt_msghdr *rtm = arg;
900 	struct ifaddr *ifa;
901 	int error = 0;
902 
903 	/*
904 	 * new gateway could require new ifaddr, ifp;
905 	 * flags may also be different; ifp may be specified
906 	 * by ll sockaddr when protocol address is ambiguous
907 	 */
908 	if (((rt->rt_flags & RTF_GATEWAY) && rtinfo->rti_gateway != NULL) ||
909 	    rtinfo->rti_ifpaddr != NULL ||
910 	    (rtinfo->rti_ifaaddr != NULL &&
911 	     !sa_equal(rtinfo->rti_ifaaddr, rt->rt_ifa->ifa_addr))) {
912 		error = rt_getifa(rtinfo);
913 		if (error != 0)
914 			goto done;
915 	}
916 	if (rtinfo->rti_gateway != NULL) {
917 		/*
918 		 * We only need to generate rtmsg upon the
919 		 * first route to be changed.
920 		 */
921 		error = rt_setgate(rt, rt_key(rt), rtinfo->rti_gateway);
922 		if (error != 0)
923 			goto done;
924 	}
925 	if ((ifa = rtinfo->rti_ifa) != NULL) {
926 		struct ifaddr *oifa = rt->rt_ifa;
927 
928 		if (oifa != ifa) {
929 			if (oifa && oifa->ifa_rtrequest)
930 				oifa->ifa_rtrequest(RTM_DELETE, rt);
931 			IFAFREE(rt->rt_ifa);
932 			IFAREF(ifa);
933 			rt->rt_ifa = ifa;
934 			rt->rt_ifp = rtinfo->rti_ifp;
935 		}
936 	}
937 	rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx);
938 	if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
939 		rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt);
940 	if (rtinfo->rti_genmask != NULL) {
941 		rt->rt_genmask = rtmask_purelookup(rtinfo->rti_genmask);
942 		if (rt->rt_genmask == NULL) {
943 			/*
944 			 * This should not happen, since we
945 			 * have already installed genmask
946 			 * on each CPU before we reach here.
947 			 */
948 			panic("genmask is gone!?");
949 		}
950 	}
951 	rtm->rtm_index = rt->rt_ifp->if_index;
952 	if (found_cnt == 1)
953 		rt_rtmsg(RTM_CHANGE, rt, rt->rt_ifp, 0);
954 done:
955 	return error;
956 }
957 
958 static int
959 route_output_lock_callback(int cmd, struct rt_addrinfo *rtinfo,
960 			   struct rtentry *rt, void *arg,
961 			   int found_cnt __unused)
962 {
963 	struct rt_msghdr *rtm = arg;
964 
965 	rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
966 	rt->rt_rmx.rmx_locks |=
967 		(rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
968 	return 0;
969 }
970 
971 static void
972 rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics *out)
973 {
974 #define setmetric(flag, elt) if (which & (flag)) out->elt = in->elt;
975 	setmetric(RTV_RPIPE, rmx_recvpipe);
976 	setmetric(RTV_SPIPE, rmx_sendpipe);
977 	setmetric(RTV_SSTHRESH, rmx_ssthresh);
978 	setmetric(RTV_RTT, rmx_rtt);
979 	setmetric(RTV_RTTVAR, rmx_rttvar);
980 	setmetric(RTV_HOPCOUNT, rmx_hopcount);
981 	setmetric(RTV_MTU, rmx_mtu);
982 	setmetric(RTV_EXPIRE, rmx_expire);
983 	setmetric(RTV_MSL, rmx_msl);
984 	setmetric(RTV_IWMAXSEGS, rmx_iwmaxsegs);
985 	setmetric(RTV_IWCAPSEGS, rmx_iwcapsegs);
986 #undef setmetric
987 }
988 
989 /*
990  * Extract the addresses of the passed sockaddrs.
991  * Do a little sanity checking so as to avoid bad memory references.
992  * This data is derived straight from userland.
993  */
994 static int
995 rt_xaddrs(char *cp, char *cplim, struct rt_addrinfo *rtinfo)
996 {
997 	struct sockaddr *sa;
998 	int i;
999 
1000 	for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) {
1001 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
1002 			continue;
1003 		sa = (struct sockaddr *)cp;
1004 		/*
1005 		 * It won't fit.
1006 		 */
1007 		if ((cp + sa->sa_len) > cplim) {
1008 			return (EINVAL);
1009 		}
1010 
1011 		/*
1012 		 * There are no more...  Quit now.
1013 		 * If there are more bits, they are in error.
1014 		 * I've seen this.  route(1) can evidently generate these.
1015 		 * This causes kernel to core dump.
1016 		 * For compatibility, if we see this, point to a safe address.
1017 		 */
1018 		if (sa->sa_len == 0) {
1019 			static struct sockaddr sa_zero = {
1020 				sizeof sa_zero, AF_INET,
1021 			};
1022 
1023 			rtinfo->rti_info[i] = &sa_zero;
1024 			kprintf("rtsock: received more addr bits than sockaddrs.\n");
1025 			return (0); /* should be EINVAL but for compat */
1026 		}
1027 
1028 		/* Accept the sockaddr. */
1029 		rtinfo->rti_info[i] = sa;
1030 		cp += RT_ROUNDUP(sa->sa_len);
1031 	}
1032 	return (0);
1033 }
1034 
1035 static int
1036 rt_msghdrsize(int type)
1037 {
1038 	switch (type) {
1039 	case RTM_DELADDR:
1040 	case RTM_NEWADDR:
1041 		return sizeof(struct ifa_msghdr);
1042 	case RTM_DELMADDR:
1043 	case RTM_NEWMADDR:
1044 		return sizeof(struct ifma_msghdr);
1045 	case RTM_IFINFO:
1046 		return sizeof(struct if_msghdr);
1047 	case RTM_IFANNOUNCE:
1048 	case RTM_IEEE80211:
1049 		return sizeof(struct if_announcemsghdr);
1050 	default:
1051 		return sizeof(struct rt_msghdr);
1052 	}
1053 }
1054 
1055 static int
1056 rt_msgsize(int type, const struct rt_addrinfo *rtinfo)
1057 {
1058 	int len, i;
1059 
1060 	len = rt_msghdrsize(type);
1061 	for (i = 0; i < RTAX_MAX; i++) {
1062 		if (rtinfo->rti_info[i] != NULL)
1063 			len += RT_ROUNDUP(rtinfo->rti_info[i]->sa_len);
1064 	}
1065 	len = ALIGN(len);
1066 	return len;
1067 }
1068 
1069 /*
1070  * Build a routing message in a buffer.
1071  * Copy the addresses in the rtinfo->rti_info[] sockaddr array
1072  * to the end of the buffer after the message header.
1073  *
1074  * Set the rtinfo->rti_addrs bitmask of addresses present in rtinfo->rti_info[].
1075  * This side-effect can be avoided if we reorder the addrs bitmask field in all
1076  * the route messages to line up so we can set it here instead of back in the
1077  * calling routine.
1078  *
1079  * NOTE! The buffer may already contain a partially filled-out rtm via
1080  *	 _fillrtmsg().
1081  */
1082 static void
1083 rt_msg_buffer(int type, struct rt_addrinfo *rtinfo, void *buf, int msglen)
1084 {
1085 	struct rt_msghdr *rtm;
1086 	char *cp;
1087 	int dlen, i;
1088 
1089 	rtm = (struct rt_msghdr *) buf;
1090 	rtm->rtm_version = RTM_VERSION;
1091 	rtm->rtm_type = type;
1092 	rtm->rtm_msglen = msglen;
1093 
1094 	cp = (char *)buf + rt_msghdrsize(type);
1095 	rtinfo->rti_addrs = 0;
1096 	for (i = 0; i < RTAX_MAX; i++) {
1097 		struct sockaddr *sa;
1098 
1099 		if ((sa = rtinfo->rti_info[i]) == NULL)
1100 			continue;
1101 		rtinfo->rti_addrs |= (1 << i);
1102 		dlen = RT_ROUNDUP(sa->sa_len);
1103 		bcopy(sa, cp, dlen);
1104 		cp += dlen;
1105 	}
1106 }
1107 
1108 /*
1109  * Build a routing message in a mbuf chain.
1110  * Copy the addresses in the rtinfo->rti_info[] sockaddr array
1111  * to the end of the mbuf after the message header.
1112  *
1113  * Set the rtinfo->rti_addrs bitmask of addresses present in rtinfo->rti_info[].
1114  * This side-effect can be avoided if we reorder the addrs bitmask field in all
1115  * the route messages to line up so we can set it here instead of back in the
1116  * calling routine.
1117  */
1118 static struct mbuf *
1119 rt_msg_mbuf(int type, struct rt_addrinfo *rtinfo)
1120 {
1121 	struct mbuf *m;
1122 	struct rt_msghdr *rtm;
1123 	int hlen, len;
1124 	int i;
1125 
1126 	hlen = rt_msghdrsize(type);
1127 	KASSERT(hlen <= MCLBYTES, ("rt_msg_mbuf: hlen %d doesn't fit", hlen));
1128 
1129 	m = m_getl(hlen, M_NOWAIT, MT_DATA, M_PKTHDR, NULL);
1130 	if (m == NULL)
1131 		return (NULL);
1132 	mbuftrackid(m, 32);
1133 	m->m_pkthdr.len = m->m_len = hlen;
1134 	m->m_pkthdr.rcvif = NULL;
1135 	rtinfo->rti_addrs = 0;
1136 	len = hlen;
1137 	for (i = 0; i < RTAX_MAX; i++) {
1138 		struct sockaddr *sa;
1139 		int dlen;
1140 
1141 		if ((sa = rtinfo->rti_info[i]) == NULL)
1142 			continue;
1143 		rtinfo->rti_addrs |= (1 << i);
1144 		dlen = RT_ROUNDUP(sa->sa_len);
1145 		m_copyback(m, len, dlen, (caddr_t)sa); /* can grow mbuf chain */
1146 		len += dlen;
1147 	}
1148 	if (m->m_pkthdr.len != len) { /* one of the m_copyback() calls failed */
1149 		m_freem(m);
1150 		return (NULL);
1151 	}
1152 	rtm = mtod(m, struct rt_msghdr *);
1153 	bzero(rtm, hlen);
1154 	rtm->rtm_msglen = len;
1155 	rtm->rtm_version = RTM_VERSION;
1156 	rtm->rtm_type = type;
1157 	return (m);
1158 }
1159 
1160 /*
1161  * This routine is called to generate a message from the routing
1162  * socket indicating that a redirect has occurred, a routing lookup
1163  * has failed, or that a protocol has detected timeouts to a particular
1164  * destination.
1165  */
1166 void
1167 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
1168 {
1169 	struct sockaddr *dst = rtinfo->rti_info[RTAX_DST];
1170 	struct rt_msghdr *rtm;
1171 	struct mbuf *m;
1172 
1173 	if (route_cb.any_count == 0)
1174 		return;
1175 	m = rt_msg_mbuf(type, rtinfo);
1176 	if (m == NULL)
1177 		return;
1178 	rtm = mtod(m, struct rt_msghdr *);
1179 	rtm->rtm_flags = RTF_DONE | flags;
1180 	rtm->rtm_errno = error;
1181 	rtm->rtm_addrs = rtinfo->rti_addrs;
1182 	rts_input(m, familyof(dst));
1183 }
1184 
1185 void
1186 rt_dstmsg(int type, struct sockaddr *dst, int error)
1187 {
1188 	struct rt_msghdr *rtm;
1189 	struct rt_addrinfo addrs;
1190 	struct mbuf *m;
1191 
1192 	if (route_cb.any_count == 0)
1193 		return;
1194 	bzero(&addrs, sizeof(struct rt_addrinfo));
1195 	addrs.rti_info[RTAX_DST] = dst;
1196 	m = rt_msg_mbuf(type, &addrs);
1197 	if (m == NULL)
1198 		return;
1199 	rtm = mtod(m, struct rt_msghdr *);
1200 	rtm->rtm_flags = RTF_DONE;
1201 	rtm->rtm_errno = error;
1202 	rtm->rtm_addrs = addrs.rti_addrs;
1203 	rts_input(m, familyof(dst));
1204 }
1205 
1206 /*
1207  * This routine is called to generate a message from the routing
1208  * socket indicating that the status of a network interface has changed.
1209  */
1210 void
1211 rt_ifmsg(struct ifnet *ifp)
1212 {
1213 	struct if_msghdr *ifm;
1214 	struct mbuf *m;
1215 	struct rt_addrinfo rtinfo;
1216 
1217 	if (route_cb.any_count == 0)
1218 		return;
1219 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
1220 	m = rt_msg_mbuf(RTM_IFINFO, &rtinfo);
1221 	if (m == NULL)
1222 		return;
1223 	ifm = mtod(m, struct if_msghdr *);
1224 	ifm->ifm_index = ifp->if_index;
1225 	ifm->ifm_flags = ifp->if_flags;
1226 	ifm->ifm_data = ifp->if_data;
1227 	ifm->ifm_addrs = 0;
1228 	rts_input(m, 0);
1229 }
1230 
1231 static void
1232 rt_ifamsg(int cmd, struct ifaddr *ifa)
1233 {
1234 	struct ifa_msghdr *ifam;
1235 	struct rt_addrinfo rtinfo;
1236 	struct mbuf *m;
1237 	struct ifnet *ifp = ifa->ifa_ifp;
1238 
1239 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
1240 	rtinfo.rti_ifaaddr = ifa->ifa_addr;
1241 	rtinfo.rti_ifpaddr =
1242 		TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa->ifa_addr;
1243 	rtinfo.rti_netmask = ifa->ifa_netmask;
1244 	rtinfo.rti_bcastaddr = ifa->ifa_dstaddr;
1245 
1246 	m = rt_msg_mbuf(cmd, &rtinfo);
1247 	if (m == NULL)
1248 		return;
1249 
1250 	ifam = mtod(m, struct ifa_msghdr *);
1251 	ifam->ifam_index = ifp->if_index;
1252 	ifam->ifam_flags = ifa->ifa_flags;
1253 	ifam->ifam_addrs = rtinfo.rti_addrs;
1254 	ifam->ifam_addrflags = if_addrflags(ifa);
1255 	ifam->ifam_metric = ifa->ifa_metric;
1256 
1257 	rts_input(m, familyof(ifa->ifa_addr));
1258 }
1259 
1260 void
1261 rt_rtmsg(int cmd, struct rtentry *rt, struct ifnet *ifp, int error)
1262 {
1263 	struct rt_msghdr *rtm;
1264 	struct rt_addrinfo rtinfo;
1265 	struct mbuf *m;
1266 	struct sockaddr *dst;
1267 
1268 	if (rt == NULL)
1269 		return;
1270 
1271 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
1272 	rtinfo.rti_dst = dst = rt_key(rt);
1273 	rtinfo.rti_gateway = rt->rt_gateway;
1274 	rtinfo.rti_netmask = rt_mask(rt);
1275 	if (ifp != NULL) {
1276 		rtinfo.rti_ifpaddr =
1277 		TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa->ifa_addr;
1278 	}
1279 	if (rt->rt_ifa != NULL)
1280 		rtinfo.rti_ifaaddr = rt->rt_ifa->ifa_addr;
1281 
1282 	m = rt_msg_mbuf(cmd, &rtinfo);
1283 	if (m == NULL)
1284 		return;
1285 
1286 	rtm = mtod(m, struct rt_msghdr *);
1287 	if (ifp != NULL)
1288 		rtm->rtm_index = ifp->if_index;
1289 	rtm->rtm_flags |= rt->rt_flags;
1290 	rtm->rtm_errno = error;
1291 	rtm->rtm_addrs = rtinfo.rti_addrs;
1292 
1293 	rts_input(m, familyof(dst));
1294 }
1295 
1296 /*
1297  * This is called to generate messages from the routing socket
1298  * indicating a network interface has had addresses associated with it.
1299  * if we ever reverse the logic and replace messages TO the routing
1300  * socket indicate a request to configure interfaces, then it will
1301  * be unnecessary as the routing socket will automatically generate
1302  * copies of it.
1303  */
1304 void
1305 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
1306 {
1307 	if (route_cb.any_count == 0)
1308 		return;
1309 
1310 	if (cmd == RTM_ADD) {
1311 		rt_ifamsg(RTM_NEWADDR, ifa);
1312 		rt_rtmsg(RTM_ADD, rt, ifa->ifa_ifp, error);
1313 	} else {
1314 		KASSERT((cmd == RTM_DELETE), ("unknown cmd %d", cmd));
1315 		rt_rtmsg(RTM_DELETE, rt, ifa->ifa_ifp, error);
1316 		rt_ifamsg(RTM_DELADDR, ifa);
1317 	}
1318 }
1319 
1320 /*
1321  * This is the analogue to the rt_newaddrmsg which performs the same
1322  * function but for multicast group memberhips.  This is easier since
1323  * there is no route state to worry about.
1324  */
1325 void
1326 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
1327 {
1328 	struct rt_addrinfo rtinfo;
1329 	struct mbuf *m = NULL;
1330 	struct ifnet *ifp = ifma->ifma_ifp;
1331 	struct ifma_msghdr *ifmam;
1332 
1333 	if (route_cb.any_count == 0)
1334 		return;
1335 
1336 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
1337 	rtinfo.rti_ifaaddr = ifma->ifma_addr;
1338 	if (ifp != NULL && !TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) {
1339 		rtinfo.rti_ifpaddr =
1340 		TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa->ifa_addr;
1341 	}
1342 	/*
1343 	 * If a link-layer address is present, present it as a ``gateway''
1344 	 * (similarly to how ARP entries, e.g., are presented).
1345 	 */
1346 	rtinfo.rti_gateway = ifma->ifma_lladdr;
1347 
1348 	m = rt_msg_mbuf(cmd, &rtinfo);
1349 	if (m == NULL)
1350 		return;
1351 
1352 	ifmam = mtod(m, struct ifma_msghdr *);
1353 	ifmam->ifmam_index = ifp->if_index;
1354 	ifmam->ifmam_addrs = rtinfo.rti_addrs;
1355 
1356 	rts_input(m, familyof(ifma->ifma_addr));
1357 }
1358 
1359 static struct mbuf *
1360 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
1361 		     struct rt_addrinfo *info)
1362 {
1363 	struct if_announcemsghdr *ifan;
1364 	struct mbuf *m;
1365 
1366 	if (route_cb.any_count == 0)
1367 		return NULL;
1368 
1369 	bzero(info, sizeof(*info));
1370 	m = rt_msg_mbuf(type, info);
1371 	if (m == NULL)
1372 		return NULL;
1373 
1374 	ifan = mtod(m, struct if_announcemsghdr *);
1375 	ifan->ifan_index = ifp->if_index;
1376 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof ifan->ifan_name);
1377 	ifan->ifan_what = what;
1378 	return m;
1379 }
1380 
1381 /*
1382  * This is called to generate routing socket messages indicating
1383  * IEEE80211 wireless events.
1384  * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
1385  */
1386 void
1387 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
1388 {
1389 	struct rt_addrinfo info;
1390 	struct mbuf *m;
1391 
1392 	m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
1393 	if (m == NULL)
1394 		return;
1395 
1396 	/*
1397 	 * Append the ieee80211 data.  Try to stick it in the
1398 	 * mbuf containing the ifannounce msg; otherwise allocate
1399 	 * a new mbuf and append.
1400 	 *
1401 	 * NB: we assume m is a single mbuf.
1402 	 */
1403 	if (data_len > M_TRAILINGSPACE(m)) {
1404 		/* XXX use m_getb(data_len, M_NOWAIT, MT_DATA, 0); */
1405 		struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
1406 		if (n == NULL) {
1407 			m_freem(m);
1408 			return;
1409 		}
1410 		KKASSERT(data_len <= M_TRAILINGSPACE(n));
1411 		bcopy(data, mtod(n, void *), data_len);
1412 		n->m_len = data_len;
1413 		m->m_next = n;
1414 	} else if (data_len > 0) {
1415 		bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
1416 		m->m_len += data_len;
1417 	}
1418 	mbuftrackid(m, 33);
1419 	if (m->m_flags & M_PKTHDR)
1420 		m->m_pkthdr.len += data_len;
1421 	mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
1422 	rts_input(m, 0);
1423 }
1424 
1425 /*
1426  * This is called to generate routing socket messages indicating
1427  * network interface arrival and departure.
1428  */
1429 void
1430 rt_ifannouncemsg(struct ifnet *ifp, int what)
1431 {
1432 	struct rt_addrinfo addrinfo;
1433 	struct mbuf *m;
1434 
1435 	m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &addrinfo);
1436 	if (m != NULL)
1437 		rts_input(m, 0);
1438 }
1439 
1440 static int
1441 resizewalkarg(struct walkarg *w, int len)
1442 {
1443 	void *newptr;
1444 
1445 	newptr = kmalloc(len, M_RTABLE, M_INTWAIT | M_NULLOK);
1446 	if (newptr == NULL)
1447 		return (ENOMEM);
1448 	if (w->w_tmem != NULL)
1449 		kfree(w->w_tmem, M_RTABLE);
1450 	w->w_tmem = newptr;
1451 	w->w_tmemsize = len;
1452 	bzero(newptr, len);
1453 
1454 	return (0);
1455 }
1456 
1457 static void
1458 ifnet_compute_stats(struct ifnet *ifp)
1459 {
1460 	IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets);
1461 	IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors);
1462 	IFNET_STAT_GET(ifp, opackets, ifp->if_opackets);
1463 	IFNET_STAT_GET(ifp, collisions, ifp->if_collisions);
1464 	IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes);
1465 	IFNET_STAT_GET(ifp, obytes, ifp->if_obytes);
1466 	IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts);
1467 	IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts);
1468 	IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops);
1469 	IFNET_STAT_GET(ifp, noproto, ifp->if_noproto);
1470 	IFNET_STAT_GET(ifp, oqdrops, ifp->if_oqdrops);
1471 }
1472 
1473 static int
1474 if_addrflags(const struct ifaddr *ifa)
1475 {
1476 	switch (ifa->ifa_addr->sa_family) {
1477 #ifdef INET6
1478 	case AF_INET6:
1479 		return ((const struct in6_ifaddr *)ifa)->ia6_flags;
1480 #endif
1481 	default:
1482 		return 0;
1483 	}
1484 }
1485 
1486 static int
1487 sysctl_iflist(int af, struct walkarg *w)
1488 {
1489 	struct ifnet *ifp;
1490 	struct rt_addrinfo rtinfo;
1491 	int msglen, error;
1492 
1493 	bzero(&rtinfo, sizeof(struct rt_addrinfo));
1494 
1495 	ifnet_lock();
1496 	TAILQ_FOREACH(ifp, &ifnetlist, if_link) {
1497 		struct ifaddr_container *ifac, *ifac_mark;
1498 		struct ifaddr_marker mark;
1499 		struct ifaddrhead *head;
1500 		struct ifaddr *ifa;
1501 
1502 		if (w->w_arg && w->w_arg != ifp->if_index)
1503 			continue;
1504 		head = &ifp->if_addrheads[mycpuid];
1505 		/*
1506 		 * There is no need to reference the first ifaddr
1507 		 * even if the following resizewalkarg() blocks,
1508 		 * since the first ifaddr will not be destroyed
1509 		 * when the ifnet lock is held.
1510 		 */
1511 		ifac = TAILQ_FIRST(head);
1512 		ifa = ifac->ifa;
1513 		rtinfo.rti_ifpaddr = ifa->ifa_addr;
1514 		msglen = rt_msgsize(RTM_IFINFO, &rtinfo);
1515 		if (w->w_tmemsize < msglen && resizewalkarg(w, msglen) != 0) {
1516 			ifnet_unlock();
1517 			return (ENOMEM);
1518 		}
1519 		rt_msg_buffer(RTM_IFINFO, &rtinfo, w->w_tmem, msglen);
1520 		rtinfo.rti_ifpaddr = NULL;
1521 		if (w->w_req != NULL && w->w_tmem != NULL) {
1522 			struct if_msghdr *ifm = w->w_tmem;
1523 
1524 			ifm->ifm_index = ifp->if_index;
1525 			ifm->ifm_flags = ifp->if_flags;
1526 			ifnet_compute_stats(ifp);
1527 			ifm->ifm_data = ifp->if_data;
1528 			ifm->ifm_addrs = rtinfo.rti_addrs;
1529 			error = SYSCTL_OUT(w->w_req, ifm, msglen);
1530 			if (error) {
1531 				ifnet_unlock();
1532 				return (error);
1533 			}
1534 		}
1535 		/*
1536 		 * Add a marker, since SYSCTL_OUT() could block and during
1537 		 * that period the list could be changed.
1538 		 */
1539 		ifa_marker_init(&mark, ifp);
1540 		ifac_mark = &mark.ifac;
1541 		TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link);
1542 		while ((ifac = TAILQ_NEXT(ifac_mark, ifa_link)) != NULL) {
1543 			TAILQ_REMOVE(head, ifac_mark, ifa_link);
1544 			TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link);
1545 
1546 			ifa = ifac->ifa;
1547 
1548 			/* Ignore marker */
1549 			if (ifa->ifa_addr->sa_family == AF_UNSPEC)
1550 				continue;
1551 
1552 			if (af && af != ifa->ifa_addr->sa_family)
1553 				continue;
1554 			if (curproc->p_ucred->cr_prison &&
1555 			    prison_if(curproc->p_ucred, ifa->ifa_addr))
1556 				continue;
1557 			rtinfo.rti_ifaaddr = ifa->ifa_addr;
1558 			rtinfo.rti_netmask = ifa->ifa_netmask;
1559 			rtinfo.rti_bcastaddr = ifa->ifa_dstaddr;
1560 			msglen = rt_msgsize(RTM_NEWADDR, &rtinfo);
1561 			/*
1562 			 * Keep a reference on this ifaddr, so that it will
1563 			 * not be destroyed if the following resizewalkarg()
1564 			 * blocks.
1565 			 */
1566 			IFAREF(ifa);
1567 			if (w->w_tmemsize < msglen &&
1568 			    resizewalkarg(w, msglen) != 0) {
1569 				IFAFREE(ifa);
1570 				TAILQ_REMOVE(head, ifac_mark, ifa_link);
1571 				ifnet_unlock();
1572 				return (ENOMEM);
1573 			}
1574 			rt_msg_buffer(RTM_NEWADDR, &rtinfo, w->w_tmem, msglen);
1575 			if (w->w_req != NULL) {
1576 				struct ifa_msghdr *ifam = w->w_tmem;
1577 
1578 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1579 				ifam->ifam_flags = ifa->ifa_flags;
1580 				ifam->ifam_addrs = rtinfo.rti_addrs;
1581 				ifam->ifam_addrflags = if_addrflags(ifa);
1582 				ifam->ifam_metric = ifa->ifa_metric;
1583 				error = SYSCTL_OUT(w->w_req, w->w_tmem, msglen);
1584 				if (error) {
1585 					IFAFREE(ifa);
1586 					TAILQ_REMOVE(head, ifac_mark, ifa_link);
1587 					ifnet_unlock();
1588 					return (error);
1589 				}
1590 			}
1591 			IFAFREE(ifa);
1592 		}
1593 		TAILQ_REMOVE(head, ifac_mark, ifa_link);
1594 		rtinfo.rti_netmask = NULL;
1595 		rtinfo.rti_ifaaddr = NULL;
1596 		rtinfo.rti_bcastaddr = NULL;
1597 	}
1598 	ifnet_unlock();
1599 	return (0);
1600 }
1601 
1602 static int
1603 rttable_walkarg_create(struct rttable_walkarg *w, int op, int arg)
1604 {
1605 	struct rt_addrinfo rtinfo;
1606 	struct sockaddr_storage ss;
1607 	int i, msglen;
1608 
1609 	memset(w, 0, sizeof(*w));
1610 	w->w_op = op;
1611 	w->w_arg = arg;
1612 
1613 	memset(&ss, 0, sizeof(ss));
1614 	ss.ss_len = sizeof(ss);
1615 
1616 	memset(&rtinfo, 0, sizeof(rtinfo));
1617 	for (i = 0; i < RTAX_MAX; ++i)
1618 		rtinfo.rti_info[i] = (struct sockaddr *)&ss;
1619 	msglen = rt_msgsize(RTM_GET, &rtinfo);
1620 
1621 	w->w_bufsz = msglen * RTTABLE_DUMP_MSGCNT_MAX;
1622 	w->w_buf = kmalloc(w->w_bufsz, M_TEMP, M_WAITOK | M_NULLOK);
1623 	if (w->w_buf == NULL)
1624 		return ENOMEM;
1625 	return 0;
1626 }
1627 
1628 static void
1629 rttable_walkarg_destroy(struct rttable_walkarg *w)
1630 {
1631 	kfree(w->w_buf, M_TEMP);
1632 }
1633 
1634 static void
1635 rttable_entry_rtinfo(struct rt_addrinfo *rtinfo, struct radix_node *rn)
1636 {
1637 	struct rtentry *rt = (struct rtentry *)rn;
1638 
1639 	bzero(rtinfo, sizeof(*rtinfo));
1640 	rtinfo->rti_dst = rt_key(rt);
1641 	rtinfo->rti_gateway = rt->rt_gateway;
1642 	rtinfo->rti_netmask = rt_mask(rt);
1643 	rtinfo->rti_genmask = rt->rt_genmask;
1644 	if (rt->rt_ifp != NULL) {
1645 		rtinfo->rti_ifpaddr =
1646 		TAILQ_FIRST(&rt->rt_ifp->if_addrheads[mycpuid])->ifa->ifa_addr;
1647 		rtinfo->rti_ifaaddr = rt->rt_ifa->ifa_addr;
1648 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
1649 			rtinfo->rti_bcastaddr = rt->rt_ifa->ifa_dstaddr;
1650 	}
1651 }
1652 
1653 static int
1654 rttable_walk_entry(struct radix_node *rn, void *xw)
1655 {
1656 	struct rttable_walkarg *w = xw;
1657 	struct rtentry *rt = (struct rtentry *)rn;
1658 	struct rt_addrinfo rtinfo;
1659 	struct rt_msghdr *rtm;
1660 	boolean_t save = FALSE;
1661 	int msglen, w_bufleft;
1662 	void *ptr;
1663 
1664 	rttable_entry_rtinfo(&rtinfo, rn);
1665 	msglen = rt_msgsize(RTM_GET, &rtinfo);
1666 
1667 	w_bufleft = w->w_bufsz - w->w_buflen;
1668 
1669 	if (rn->rn_dupedkey != NULL) {
1670 		struct radix_node *rn1 = rn;
1671 		int total_msglen = msglen;
1672 
1673 		/*
1674 		 * Make sure that we have enough space left for all
1675 		 * dupedkeys, since rn_walktree_at always starts
1676 		 * from the first dupedkey.
1677 		 */
1678 		while ((rn1 = rn1->rn_dupedkey) != NULL) {
1679 			struct rt_addrinfo rtinfo1;
1680 			int msglen1;
1681 
1682 			if (rn1->rn_flags & RNF_ROOT)
1683 				continue;
1684 
1685 			rttable_entry_rtinfo(&rtinfo1, rn1);
1686 			msglen1 = rt_msgsize(RTM_GET, &rtinfo1);
1687 			total_msglen += msglen1;
1688 		}
1689 
1690 		if (total_msglen > w_bufleft) {
1691 			if (total_msglen > w->w_bufsz) {
1692 				static int logged = 0;
1693 
1694 				if (!logged) {
1695 					kprintf("buffer is too small for "
1696 					    "all dupedkeys, increase "
1697 					    "RTTABLE_DUMP_MSGCNT_MAX\n");
1698 					logged = 1;
1699 				}
1700 				return ENOMEM;
1701 			}
1702 			save = TRUE;
1703 		}
1704 	} else if (msglen > w_bufleft) {
1705 		save = TRUE;
1706 	}
1707 
1708 	if (save) {
1709 		/*
1710 		 * Not enough buffer left; remember the position
1711 		 * to start from upon next round.
1712 		 */
1713 		KASSERT(msglen <= w->w_bufsz, ("msg too long %d", msglen));
1714 
1715 		KASSERT(rtinfo.rti_dst->sa_len <= sizeof(w->w_key0),
1716 		    ("key too long %d", rtinfo.rti_dst->sa_len));
1717 		memset(&w->w_key0, 0, sizeof(w->w_key0));
1718 		memcpy(&w->w_key0, rtinfo.rti_dst, rtinfo.rti_dst->sa_len);
1719 		w->w_key = (const char *)&w->w_key0;
1720 
1721 		if (rtinfo.rti_netmask != NULL) {
1722 			KASSERT(
1723 			    rtinfo.rti_netmask->sa_len <= sizeof(w->w_mask0),
1724 			    ("mask too long %d", rtinfo.rti_netmask->sa_len));
1725 			memset(&w->w_mask0, 0, sizeof(w->w_mask0));
1726 			memcpy(&w->w_mask0, rtinfo.rti_netmask,
1727 			    rtinfo.rti_netmask->sa_len);
1728 			w->w_mask = (const char *)&w->w_mask0;
1729 		} else {
1730 			w->w_mask = NULL;
1731 		}
1732 		return EJUSTRETURN;
1733 	}
1734 
1735 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1736 		return 0;
1737 
1738 	ptr = ((uint8_t *)w->w_buf) + w->w_buflen;
1739 	rt_msg_buffer(RTM_GET, &rtinfo, ptr, msglen);
1740 
1741 	rtm = (struct rt_msghdr *)ptr;
1742 	rtm->rtm_flags = rt->rt_flags;
1743 	rtm->rtm_use = rt->rt_use;
1744 	rtm->rtm_rmx = rt->rt_rmx;
1745 	rtm->rtm_index = rt->rt_ifp->if_index;
1746 	rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
1747 	rtm->rtm_addrs = rtinfo.rti_addrs;
1748 
1749 	w->w_buflen += msglen;
1750 
1751 	return 0;
1752 }
1753 
1754 static void
1755 rttable_walk_dispatch(netmsg_t msg)
1756 {
1757 	struct netmsg_rttable_walk *nmsg = (struct netmsg_rttable_walk *)msg;
1758 	struct radix_node_head *rnh = rt_tables[mycpuid][nmsg->af];
1759 	struct rttable_walkarg *w = nmsg->w;
1760 	int error;
1761 
1762 	error = rnh->rnh_walktree_at(rnh, w->w_key, w->w_mask,
1763 	    rttable_walk_entry, w);
1764 	lwkt_replymsg(&nmsg->base.lmsg, error);
1765 }
1766 
1767 static int
1768 sysctl_rttable(int af, struct sysctl_req *req, int op, int arg)
1769 {
1770 	struct rttable_walkarg w;
1771 	int error, i;
1772 
1773 	error = rttable_walkarg_create(&w, op, arg);
1774 	if (error)
1775 		return error;
1776 
1777 	error = EINVAL;
1778 	for (i = 1; i <= AF_MAX; i++) {
1779 		if (rt_tables[mycpuid][i] != NULL && (af == 0 || af == i)) {
1780 			w.w_key = NULL;
1781 			w.w_mask = NULL;
1782 			for (;;) {
1783 				struct netmsg_rttable_walk nmsg;
1784 
1785 				netmsg_init(&nmsg.base, NULL,
1786 				    &curthread->td_msgport, 0,
1787 				    rttable_walk_dispatch);
1788 				nmsg.af = i;
1789 				nmsg.w = &w;
1790 
1791 				w.w_buflen = 0;
1792 
1793 				error = lwkt_domsg(netisr_cpuport(mycpuid),
1794 				    &nmsg.base.lmsg, 0);
1795 				if (error && error != EJUSTRETURN)
1796 					goto done;
1797 
1798 				if (req != NULL && w.w_buflen > 0) {
1799 					int error1;
1800 
1801 					error1 = SYSCTL_OUT(req, w.w_buf,
1802 					    w.w_buflen);
1803 					if (error1) {
1804 						error = error1;
1805 						goto done;
1806 					}
1807 				}
1808 				if (error == 0) /* done */
1809 					break;
1810 			}
1811 		}
1812 	}
1813 done:
1814 	rttable_walkarg_destroy(&w);
1815 	return error;
1816 }
1817 
1818 static int
1819 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
1820 {
1821 	int	*name = (int *)arg1;
1822 	u_int	namelen = arg2;
1823 	int	error = EINVAL;
1824 	int	origcpu, cpu;
1825 	u_char  af;
1826 	struct	walkarg w;
1827 
1828 	name ++;
1829 	namelen--;
1830 	if (req->newptr)
1831 		return (EPERM);
1832 	if (namelen != 3 && namelen != 4)
1833 		return (EINVAL);
1834 	af = name[0];
1835 	bzero(&w, sizeof w);
1836 	w.w_op = name[1];
1837 	w.w_arg = name[2];
1838 	w.w_req = req;
1839 
1840 	/*
1841 	 * Optional third argument specifies cpu, used primarily for
1842 	 * debugging the route table.
1843 	 */
1844 	if (namelen == 4) {
1845 		if (name[3] < 0 || name[3] >= netisr_ncpus)
1846 			return (EINVAL);
1847 		cpu = name[3];
1848 	} else {
1849 		/*
1850 		 * Target cpu is not specified, use cpu0 then, so that
1851 		 * the result set will be relatively stable.
1852 		 */
1853 		cpu = 0;
1854 	}
1855 	origcpu = mycpuid;
1856 	lwkt_migratecpu(cpu);
1857 
1858 	switch (w.w_op) {
1859 	case NET_RT_DUMP:
1860 	case NET_RT_FLAGS:
1861 		error = sysctl_rttable(af, w.w_req, w.w_op, w.w_arg);
1862 		break;
1863 
1864 	case NET_RT_IFLIST:
1865 		error = sysctl_iflist(af, &w);
1866 		break;
1867 	}
1868 	if (w.w_tmem != NULL)
1869 		kfree(w.w_tmem, M_RTABLE);
1870 
1871 	lwkt_migratecpu(origcpu);
1872 	return (error);
1873 }
1874 
1875 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
1876 
1877 /*
1878  * Definitions of protocols supported in the ROUTE domain.
1879  */
1880 
1881 static struct domain routedomain;		/* or at least forward */
1882 
1883 static struct protosw routesw[] = {
1884     {
1885 	.pr_type = SOCK_RAW,
1886 	.pr_domain = &routedomain,
1887 	.pr_protocol = 0,
1888 	.pr_flags = PR_ATOMIC|PR_ADDR,
1889 	.pr_input = NULL,
1890 	.pr_output = route_output,
1891 	.pr_ctlinput = raw_ctlinput,
1892 	.pr_ctloutput = route_ctloutput,
1893 	.pr_ctlport = cpu0_ctlport,
1894 
1895 	.pr_init = raw_init,
1896 	.pr_usrreqs = &route_usrreqs
1897     }
1898 };
1899 
1900 static struct domain routedomain = {
1901 	.dom_family		= AF_ROUTE,
1902 	.dom_name		= "route",
1903 	.dom_init		= NULL,
1904 	.dom_externalize	= NULL,
1905 	.dom_dispose		= NULL,
1906 	.dom_protosw		= routesw,
1907 	.dom_protoswNPROTOSW	= &routesw[(sizeof routesw)/(sizeof routesw[0])],
1908 	.dom_next		= SLIST_ENTRY_INITIALIZER,
1909 	.dom_rtattach		= NULL,
1910 	.dom_rtoffset		= 0,
1911 	.dom_maxrtkey		= 0,
1912 	.dom_ifattach		= NULL,
1913 	.dom_ifdetach		= NULL
1914 };
1915 
1916 DOMAIN_SET(route);
1917 
1918