1 /* 2 * Copyright (c) 2004, 2005 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Jeffrey M. Hsu. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of The DragonFly Project nor the names of its 16 * contributors may be used to endorse or promote products derived 17 * from this software without specific, prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)rtsock.c 8.7 (Berkeley) 10/12/95 62 * $FreeBSD: src/sys/net/rtsock.c,v 1.44.2.11 2002/12/04 14:05:41 ru Exp $ 63 */ 64 65 #include "opt_inet6.h" 66 67 #include <sys/param.h> 68 #include <sys/systm.h> 69 #include <sys/kernel.h> 70 #include <sys/sysctl.h> 71 #include <sys/proc.h> 72 #include <sys/caps.h> 73 #include <sys/malloc.h> 74 #include <sys/mbuf.h> 75 #include <sys/protosw.h> 76 #include <sys/socket.h> 77 #include <sys/socketvar.h> 78 #include <sys/domain.h> 79 #include <sys/jail.h> 80 81 #include <sys/thread2.h> 82 #include <sys/socketvar2.h> 83 84 #include <net/if.h> 85 #include <net/if_var.h> 86 #include <net/route.h> 87 #include <net/raw_cb.h> 88 #include <net/netmsg2.h> 89 #include <net/netisr2.h> 90 91 #ifdef INET6 92 #include <netinet/in_var.h> 93 #endif 94 95 /* sa_family is after sa_len, rest is data */ 96 #define _SA_MINSIZE (offsetof(struct sockaddr, sa_family) + \ 97 sizeof(((struct sockaddr *)0)->sa_family)) 98 99 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); 100 101 static struct route_cb { 102 int ip_count; 103 int ip6_count; 104 int any_count; 105 } route_cb; 106 107 static const struct sockaddr route_src = { 2, PF_ROUTE, }; 108 109 struct walkarg { 110 int w_tmemsize; 111 int w_op, w_arg; 112 void *w_tmem; 113 struct sysctl_req *w_req; 114 }; 115 116 #ifndef RTTABLE_DUMP_MSGCNT_MAX 117 /* Should be large enough for dupkeys */ 118 #define RTTABLE_DUMP_MSGCNT_MAX 64 119 #endif 120 121 struct rttable_walkarg { 122 int w_op; 123 int w_arg; 124 int w_bufsz; 125 void *w_buf; 126 127 int w_buflen; 128 129 const char *w_key; 130 const char *w_mask; 131 132 struct sockaddr_storage w_key0; 133 struct sockaddr_storage w_mask0; 134 }; 135 136 struct netmsg_rttable_walk { 137 struct netmsg_base base; 138 int af; 139 struct rttable_walkarg *w; 140 }; 141 142 struct routecb { 143 struct rawcb rocb_rcb; 144 unsigned int rocb_msgfilter; 145 char *rocb_missfilter; 146 size_t rocb_missfilterlen; 147 }; 148 #define sotoroutecb(so) ((struct routecb *)(so)->so_pcb) 149 150 static struct mbuf * 151 rt_msg_mbuf (int, struct rt_addrinfo *); 152 static void rt_msg_buffer (int, struct rt_addrinfo *, void *buf, int len); 153 static int rt_msgsize(int type, const struct rt_addrinfo *rtinfo); 154 static int rt_xaddrs (char *, char *, struct rt_addrinfo *); 155 static int sysctl_rttable(int af, struct sysctl_req *req, int op, int arg); 156 static int if_addrflags(const struct ifaddr *ifa); 157 static int sysctl_iflist (int af, struct walkarg *w); 158 static int route_output(struct mbuf *, struct socket *, ...); 159 static void rt_setmetrics (u_long, struct rt_metrics *, 160 struct rt_metrics *); 161 162 /* 163 * It really doesn't make any sense at all for this code to share much 164 * with raw_usrreq.c, since its functionality is so restricted. XXX 165 */ 166 static void 167 rts_abort(netmsg_t msg) 168 { 169 crit_enter(); 170 raw_usrreqs.pru_abort(msg); 171 /* msg invalid now */ 172 crit_exit(); 173 } 174 175 static int 176 rts_filter(struct mbuf *m, const struct sockproto *proto, 177 const struct rawcb *rp) 178 { 179 const struct routecb *rop = (const struct routecb *)rp; 180 const struct rt_msghdr *rtm; 181 182 KKASSERT(m != NULL); 183 KKASSERT(proto != NULL); 184 KKASSERT(rp != NULL); 185 186 /* Wrong family for this socket. */ 187 if (proto->sp_family != PF_ROUTE) 188 return ENOPROTOOPT; 189 190 /* If no filter set, just return. */ 191 if (rop->rocb_msgfilter == 0 && rop->rocb_missfilterlen == 0) 192 return 0; 193 194 /* Ensure we can access rtm_type */ 195 if (m->m_len < 196 offsetof(struct rt_msghdr, rtm_type) + sizeof(rtm->rtm_type)) 197 return EINVAL; 198 199 rtm = mtod(m, const struct rt_msghdr *); 200 /* If the rtm type is filtered out, return a positive. */ 201 if (rop->rocb_msgfilter != 0 && 202 !(rop->rocb_msgfilter & ROUTE_FILTER(rtm->rtm_type))) 203 return EEXIST; 204 205 if (rop->rocb_missfilterlen != 0 && rtm->rtm_type == RTM_MISS) { 206 CTASSERT(RTAX_DST == 0); 207 struct sockaddr *sa; 208 struct sockaddr_storage ss; 209 struct sockaddr *dst = (struct sockaddr *)&ss; 210 char *cp = rop->rocb_missfilter; 211 char *ep = cp + rop->rocb_missfilterlen; 212 213 /* Ensure we can access sa_len */ 214 if (m->m_pkthdr.len < sizeof(*rtm) + _SA_MINSIZE) 215 return EINVAL; 216 m_copydata(m, sizeof(*rtm) + offsetof(struct sockaddr, sa_len), 217 sizeof(ss.ss_len), (caddr_t)&ss); 218 if (ss.ss_len < _SA_MINSIZE || 219 ss.ss_len > sizeof(ss) || 220 m->m_pkthdr.len < sizeof(*rtm) + ss.ss_len) 221 return EINVAL; 222 /* Copy out the destination sockaddr */ 223 m_copydata(m, sizeof(*rtm), ss.ss_len, (caddr_t)&ss); 224 225 /* Find a matching sockaddr in the filter */ 226 while (cp < ep) { 227 sa = (struct sockaddr *)cp; 228 if (sa->sa_len == dst->sa_len && 229 memcmp(sa, dst, sa->sa_len) == 0) 230 break; 231 cp += RT_ROUNDUP(sa->sa_len); 232 } 233 if (cp == ep) 234 return EEXIST; 235 } 236 237 /* Passed the filter. */ 238 return 0; 239 } 240 241 242 /* pru_accept is EOPNOTSUPP */ 243 244 static void 245 rts_attach(netmsg_t msg) 246 { 247 struct socket *so = msg->base.nm_so; 248 struct pru_attach_info *ai = msg->attach.nm_ai; 249 struct rawcb *rp; 250 struct routecb *rop; 251 int proto = msg->attach.nm_proto; 252 int error; 253 254 crit_enter(); 255 if (sotorawcb(so) != NULL) { 256 error = EISCONN; 257 goto done; 258 } 259 260 rop = kmalloc(sizeof *rop, M_PCB, M_WAITOK | M_ZERO); 261 rp = &rop->rocb_rcb; 262 263 /* 264 * The critical section is necessary to block protocols from sending 265 * error notifications (like RTM_REDIRECT or RTM_LOSING) while 266 * this PCB is extant but incompletely initialized. 267 * Probably we should try to do more of this work beforehand and 268 * eliminate the critical section. 269 */ 270 so->so_pcb = rp; 271 soreference(so); /* so_pcb assignment */ 272 error = raw_attach(so, proto, ai->sb_rlimit); 273 rp = sotorawcb(so); 274 if (error) { 275 kfree(rop, M_PCB); 276 goto done; 277 } 278 switch(rp->rcb_proto.sp_protocol) { 279 case AF_INET: 280 route_cb.ip_count++; 281 break; 282 case AF_INET6: 283 route_cb.ip6_count++; 284 break; 285 } 286 rp->rcb_faddr = &route_src; 287 rp->rcb_filter = rts_filter; 288 route_cb.any_count++; 289 soisconnected(so); 290 so->so_options |= SO_USELOOPBACK; 291 error = 0; 292 done: 293 crit_exit(); 294 lwkt_replymsg(&msg->lmsg, error); 295 } 296 297 static void 298 rts_bind(netmsg_t msg) 299 { 300 crit_enter(); 301 raw_usrreqs.pru_bind(msg); /* xxx just EINVAL */ 302 /* msg invalid now */ 303 crit_exit(); 304 } 305 306 static void 307 rts_connect(netmsg_t msg) 308 { 309 crit_enter(); 310 raw_usrreqs.pru_connect(msg); /* XXX just EINVAL */ 311 /* msg invalid now */ 312 crit_exit(); 313 } 314 315 /* pru_connect2 is EOPNOTSUPP */ 316 /* pru_control is EOPNOTSUPP */ 317 318 static void 319 rts_detach(netmsg_t msg) 320 { 321 struct socket *so = msg->base.nm_so; 322 struct rawcb *rp = sotorawcb(so); 323 struct routecb *rop = (struct routecb *)rp; 324 325 crit_enter(); 326 if (rop->rocb_missfilterlen != 0) 327 kfree(rop->rocb_missfilter, M_PCB); 328 if (rp != NULL) { 329 switch(rp->rcb_proto.sp_protocol) { 330 case AF_INET: 331 route_cb.ip_count--; 332 break; 333 case AF_INET6: 334 route_cb.ip6_count--; 335 break; 336 } 337 route_cb.any_count--; 338 } 339 raw_usrreqs.pru_detach(msg); 340 /* msg invalid now */ 341 crit_exit(); 342 } 343 344 static void 345 rts_disconnect(netmsg_t msg) 346 { 347 crit_enter(); 348 raw_usrreqs.pru_disconnect(msg); 349 /* msg invalid now */ 350 crit_exit(); 351 } 352 353 /* pru_listen is EOPNOTSUPP */ 354 355 static void 356 rts_peeraddr(netmsg_t msg) 357 { 358 crit_enter(); 359 raw_usrreqs.pru_peeraddr(msg); 360 /* msg invalid now */ 361 crit_exit(); 362 } 363 364 /* pru_rcvd is EOPNOTSUPP */ 365 /* pru_rcvoob is EOPNOTSUPP */ 366 367 static void 368 rts_send(netmsg_t msg) 369 { 370 crit_enter(); 371 raw_usrreqs.pru_send(msg); 372 /* msg invalid now */ 373 crit_exit(); 374 } 375 376 /* pru_sense is null */ 377 378 static void 379 rts_shutdown(netmsg_t msg) 380 { 381 crit_enter(); 382 raw_usrreqs.pru_shutdown(msg); 383 /* msg invalid now */ 384 crit_exit(); 385 } 386 387 static void 388 rts_sockaddr(netmsg_t msg) 389 { 390 crit_enter(); 391 raw_usrreqs.pru_sockaddr(msg); 392 /* msg invalid now */ 393 crit_exit(); 394 } 395 396 static struct pr_usrreqs route_usrreqs = { 397 .pru_abort = rts_abort, 398 .pru_accept = pr_generic_notsupp, 399 .pru_attach = rts_attach, 400 .pru_bind = rts_bind, 401 .pru_connect = rts_connect, 402 .pru_connect2 = pr_generic_notsupp, 403 .pru_control = pr_generic_notsupp, 404 .pru_detach = rts_detach, 405 .pru_disconnect = rts_disconnect, 406 .pru_listen = pr_generic_notsupp, 407 .pru_peeraddr = rts_peeraddr, 408 .pru_rcvd = pr_generic_notsupp, 409 .pru_rcvoob = pr_generic_notsupp, 410 .pru_send = rts_send, 411 .pru_sense = pru_sense_null, 412 .pru_shutdown = rts_shutdown, 413 .pru_sockaddr = rts_sockaddr, 414 .pru_sosend = sosend, 415 .pru_soreceive = soreceive 416 }; 417 418 static __inline sa_family_t 419 familyof(struct sockaddr *sa) 420 { 421 return (sa != NULL ? sa->sa_family : 0); 422 } 423 424 /* 425 * Routing socket input function. The packet must be serialized onto cpu 0. 426 * We use the cpu0_soport() netisr processing loop to handle it. 427 * 428 * This looks messy but it means that anyone, including interrupt code, 429 * can send a message to the routing socket. 430 */ 431 static void 432 rts_input_handler(netmsg_t msg) 433 { 434 static const struct sockaddr route_dst = { 2, PF_ROUTE, }; 435 struct sockproto route_proto; 436 struct netmsg_packet *pmsg = &msg->packet; 437 struct mbuf *m; 438 sa_family_t family; 439 struct rawcb *skip; 440 441 family = pmsg->base.lmsg.u.ms_result; 442 route_proto.sp_family = PF_ROUTE; 443 route_proto.sp_protocol = family; 444 445 m = pmsg->nm_packet; 446 M_ASSERTPKTHDR(m); 447 448 skip = m->m_pkthdr.header; 449 m->m_pkthdr.header = NULL; 450 451 raw_input(m, &route_proto, &route_src, &route_dst, skip); 452 } 453 454 static void 455 rts_input_skip(struct mbuf *m, sa_family_t family, struct rawcb *skip) 456 { 457 struct netmsg_packet *pmsg; 458 lwkt_port_t port; 459 460 M_ASSERTPKTHDR(m); 461 462 port = netisr_cpuport(0); /* XXX same as for routing socket */ 463 pmsg = &m->m_hdr.mh_netmsg; 464 netmsg_init(&pmsg->base, NULL, &netisr_apanic_rport, 465 0, rts_input_handler); 466 pmsg->nm_packet = m; 467 pmsg->base.lmsg.u.ms_result = family; 468 m->m_pkthdr.header = skip; /* XXX steal field in pkthdr */ 469 lwkt_sendmsg(port, &pmsg->base.lmsg); 470 } 471 472 static __inline void 473 rts_input(struct mbuf *m, sa_family_t family) 474 { 475 rts_input_skip(m, family, NULL); 476 } 477 478 static void 479 route_ctloutput(netmsg_t msg) 480 { 481 struct socket *so = msg->ctloutput.base.nm_so; 482 struct sockopt *sopt = msg->ctloutput.nm_sopt; 483 struct routecb *rop = sotoroutecb(so); 484 int error; 485 unsigned int msgfilter; 486 unsigned char *cp, *ep; 487 size_t len; 488 struct sockaddr *sa; 489 490 if (sopt->sopt_level != AF_ROUTE) { 491 error = EINVAL; 492 goto out; 493 } 494 495 error = 0; 496 497 switch (sopt->sopt_dir) { 498 case SOPT_SET: 499 switch (sopt->sopt_name) { 500 case ROUTE_MSGFILTER: 501 error = soopt_to_kbuf(sopt, &msgfilter, 502 sizeof(msgfilter), sizeof(msgfilter)); 503 if (error == 0) 504 rop->rocb_msgfilter = msgfilter; 505 break; 506 case RO_MISSFILTER: 507 /* Validate the data */ 508 len = 0; 509 cp = sopt->sopt_val; 510 ep = cp + sopt->sopt_valsize; 511 while (cp < ep) { 512 if (ep - cp < 513 offsetof(struct sockaddr, sa_len) + 514 sizeof(sa->sa_len)) 515 break; 516 if (++len > RO_FILTSA_MAX) { 517 error = ENOBUFS; 518 break; 519 } 520 sa = (struct sockaddr *)cp; 521 if (sa->sa_len < _SA_MINSIZE || 522 sa->sa_len > sizeof(struct sockaddr_storage)) 523 break; 524 cp += RT_ROUNDUP(sa->sa_len); 525 } 526 if (cp != ep) { 527 if (error == 0) 528 error = EINVAL; 529 break; 530 } 531 if (rop->rocb_missfilterlen != 0) 532 kfree(rop->rocb_missfilter, M_PCB); 533 if (sopt->sopt_valsize != 0) { 534 rop->rocb_missfilter = 535 kmalloc(sopt->sopt_valsize, 536 M_PCB, M_WAITOK | M_NULLOK); 537 if (rop->rocb_missfilter == NULL) { 538 rop->rocb_missfilterlen = 0; 539 error = ENOBUFS; 540 break; 541 } 542 } else 543 rop->rocb_missfilter = NULL; 544 rop->rocb_missfilterlen = sopt->sopt_valsize; 545 if (rop->rocb_missfilterlen != 0) 546 memcpy(rop->rocb_missfilter, sopt->sopt_val, 547 rop->rocb_missfilterlen); 548 break; 549 default: 550 error = ENOPROTOOPT; 551 break; 552 } 553 break; 554 case SOPT_GET: 555 switch (sopt->sopt_name) { 556 case ROUTE_MSGFILTER: 557 msgfilter = rop->rocb_msgfilter; 558 soopt_from_kbuf(sopt, &msgfilter, sizeof(msgfilter)); 559 break; 560 case RO_MISSFILTER: 561 soopt_from_kbuf(sopt, rop->rocb_missfilter, 562 rop->rocb_missfilterlen); 563 break; 564 default: 565 error = ENOPROTOOPT; 566 break; 567 } 568 } 569 out: 570 lwkt_replymsg(&msg->ctloutput.base.lmsg, error); 571 } 572 573 574 575 static void * 576 reallocbuf_nofree(void *ptr, size_t len, size_t olen) 577 { 578 void *newptr; 579 580 newptr = kmalloc(len, M_RTABLE, M_INTWAIT | M_NULLOK); 581 if (newptr == NULL) 582 return NULL; 583 bcopy(ptr, newptr, olen); 584 if (olen < len) 585 bzero((char *)newptr + olen, len - olen); 586 587 return (newptr); 588 } 589 590 /* 591 * Internal helper routine for route_output(). 592 */ 593 static int 594 _fillrtmsg(struct rt_msghdr **prtm, struct rtentry *rt, 595 struct rt_addrinfo *rtinfo) 596 { 597 int msglen; 598 struct rt_msghdr *rtm = *prtm; 599 600 /* Fill in rt_addrinfo for call to rt_msg_buffer(). */ 601 rtinfo->rti_dst = rt_key(rt); 602 rtinfo->rti_gateway = rt->rt_gateway; 603 rtinfo->rti_netmask = rt_mask(rt); /* might be NULL */ 604 rtinfo->rti_genmask = rt->rt_genmask; /* might be NULL */ 605 if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { 606 if (rt->rt_ifp != NULL) { 607 rtinfo->rti_ifpaddr = 608 TAILQ_FIRST(&rt->rt_ifp->if_addrheads[mycpuid]) 609 ->ifa->ifa_addr; 610 rtinfo->rti_ifaaddr = rt->rt_ifa->ifa_addr; 611 if (rt->rt_ifp->if_flags & IFF_POINTOPOINT) 612 rtinfo->rti_bcastaddr = rt->rt_ifa->ifa_dstaddr; 613 rtm->rtm_index = rt->rt_ifp->if_index; 614 } else { 615 rtinfo->rti_ifpaddr = NULL; 616 rtinfo->rti_ifaaddr = NULL; 617 } 618 } else if (rt->rt_ifp != NULL) { 619 rtm->rtm_index = rt->rt_ifp->if_index; 620 } 621 622 msglen = rt_msgsize(rtm->rtm_type, rtinfo); 623 if (rtm->rtm_msglen < msglen) { 624 /* NOTE: Caller will free the old rtm accordingly */ 625 rtm = reallocbuf_nofree(rtm, msglen, rtm->rtm_msglen); 626 if (rtm == NULL) 627 return (ENOBUFS); 628 *prtm = rtm; 629 } 630 rt_msg_buffer(rtm->rtm_type, rtinfo, rtm, msglen); 631 632 rtm->rtm_flags = rt->rt_flags; 633 rtm->rtm_rmx = rt->rt_rmx; 634 rtm->rtm_addrs = rtinfo->rti_addrs; 635 636 return (0); 637 } 638 639 struct rtm_arg { 640 struct rt_msghdr *bak_rtm; 641 struct rt_msghdr *new_rtm; 642 }; 643 644 static int 645 fillrtmsg(struct rtm_arg *arg, struct rtentry *rt, 646 struct rt_addrinfo *rtinfo) 647 { 648 struct rt_msghdr *rtm = arg->new_rtm; 649 int error; 650 651 error = _fillrtmsg(&rtm, rt, rtinfo); 652 if (!error) { 653 if (arg->new_rtm != rtm) { 654 /* 655 * _fillrtmsg() just allocated a new rtm; 656 * if the previously allocated rtm is not 657 * the backing rtm, it should be freed. 658 */ 659 if (arg->new_rtm != arg->bak_rtm) 660 kfree(arg->new_rtm, M_RTABLE); 661 arg->new_rtm = rtm; 662 } 663 } 664 return error; 665 } 666 667 static void route_output_add_callback(int, int, struct rt_addrinfo *, 668 struct rtentry *, void *); 669 static void route_output_delete_callback(int, int, struct rt_addrinfo *, 670 struct rtentry *, void *); 671 static int route_output_get_callback(int, struct rt_addrinfo *, 672 struct rtentry *, void *, int); 673 static int route_output_change_callback(int, struct rt_addrinfo *, 674 struct rtentry *, void *, int); 675 static int route_output_lock_callback(int, struct rt_addrinfo *, 676 struct rtentry *, void *, int); 677 678 /*ARGSUSED*/ 679 static int 680 route_output(struct mbuf *m, struct socket *so, ...) 681 { 682 struct rtm_arg arg; 683 struct rt_msghdr *rtm = NULL; 684 struct rawcb *rp = NULL; 685 struct pr_output_info *oi; 686 struct rt_addrinfo rtinfo; 687 sa_family_t family; 688 int len, error = 0; 689 __va_list ap; 690 691 M_ASSERTPKTHDR(m); 692 693 __va_start(ap, so); 694 oi = __va_arg(ap, struct pr_output_info *); 695 __va_end(ap); 696 697 family = familyof(NULL); 698 699 #define gotoerr(e) { error = e; goto flush;} 700 701 if (m == NULL || 702 (m->m_len < sizeof(long) && 703 (m = m_pullup(m, sizeof(long))) == NULL)) 704 return (ENOBUFS); 705 len = m->m_pkthdr.len; 706 if (len < sizeof(struct rt_msghdr) || 707 len != mtod(m, struct rt_msghdr *)->rtm_msglen) 708 gotoerr(EINVAL); 709 710 rtm = kmalloc(len, M_RTABLE, M_INTWAIT | M_NULLOK); 711 if (rtm == NULL) 712 gotoerr(ENOBUFS); 713 714 m_copydata(m, 0, len, (caddr_t)rtm); 715 if (rtm->rtm_version != RTM_VERSION) 716 gotoerr(EPROTONOSUPPORT); 717 718 rtm->rtm_pid = oi->p_pid; 719 bzero(&rtinfo, sizeof(struct rt_addrinfo)); 720 rtinfo.rti_addrs = rtm->rtm_addrs; 721 if (rt_xaddrs((char *)(rtm + 1), (char *)rtm + len, &rtinfo) != 0) 722 gotoerr(EINVAL); 723 724 rtinfo.rti_flags = rtm->rtm_flags; 725 if (rtinfo.rti_dst == NULL || rtinfo.rti_dst->sa_family >= AF_MAX || 726 (rtinfo.rti_gateway && rtinfo.rti_gateway->sa_family >= AF_MAX)) 727 gotoerr(EINVAL); 728 729 family = familyof(rtinfo.rti_dst); 730 731 /* 732 * Verify that the caller has the appropriate privilege; RTM_GET 733 * is the only operation the non-superuser is allowed. 734 */ 735 if (rtm->rtm_type != RTM_GET && 736 caps_priv_check(so->so_cred, SYSCAP_RESTRICTEDROOT) != 0) 737 { 738 gotoerr(EPERM); 739 } 740 741 if (rtinfo.rti_genmask != NULL) { 742 error = rtmask_add_global(rtinfo.rti_genmask, 743 rtm->rtm_type != RTM_GET ? 744 RTREQ_PRIO_HIGH : RTREQ_PRIO_NORM); 745 if (error) 746 goto flush; 747 } 748 749 switch (rtm->rtm_type) { 750 case RTM_ADD: 751 if (rtinfo.rti_gateway == NULL) { 752 error = EINVAL; 753 } else { 754 error = rtrequest1_global(RTM_ADD, &rtinfo, 755 route_output_add_callback, rtm, RTREQ_PRIO_HIGH); 756 } 757 break; 758 case RTM_DELETE: 759 /* 760 * Backing rtm (bak_rtm) could _not_ be freed during 761 * rtrequest1_global or rtsearch_global, even if the 762 * callback reallocates the rtm due to its size changes, 763 * since rtinfo points to the backing rtm's memory area. 764 * After rtrequest1_global or rtsearch_global returns, 765 * it is safe to free the backing rtm, since rtinfo will 766 * not be used anymore. 767 * 768 * new_rtm will be used to save the new rtm allocated 769 * by rtrequest1_global or rtsearch_global. 770 */ 771 arg.bak_rtm = rtm; 772 arg.new_rtm = rtm; 773 error = rtrequest1_global(RTM_DELETE, &rtinfo, 774 route_output_delete_callback, &arg, RTREQ_PRIO_HIGH); 775 rtm = arg.new_rtm; 776 if (rtm != arg.bak_rtm) 777 kfree(arg.bak_rtm, M_RTABLE); 778 break; 779 case RTM_GET: 780 /* See the comment in RTM_DELETE */ 781 arg.bak_rtm = rtm; 782 arg.new_rtm = rtm; 783 error = rtsearch_global(RTM_GET, &rtinfo, 784 route_output_get_callback, &arg, RTS_NOEXACTMATCH, 785 RTREQ_PRIO_NORM); 786 rtm = arg.new_rtm; 787 if (rtm != arg.bak_rtm) 788 kfree(arg.bak_rtm, M_RTABLE); 789 break; 790 case RTM_CHANGE: 791 error = rtsearch_global(RTM_CHANGE, &rtinfo, 792 route_output_change_callback, rtm, RTS_EXACTMATCH, 793 RTREQ_PRIO_HIGH); 794 break; 795 case RTM_LOCK: 796 error = rtsearch_global(RTM_LOCK, &rtinfo, 797 route_output_lock_callback, rtm, RTS_EXACTMATCH, 798 RTREQ_PRIO_HIGH); 799 break; 800 default: 801 error = EOPNOTSUPP; 802 break; 803 } 804 flush: 805 if (rtm != NULL) { 806 if (error != 0) 807 rtm->rtm_errno = error; 808 else 809 rtm->rtm_flags |= RTF_DONE; 810 } 811 812 /* 813 * Check to see if we don't want our own messages. 814 */ 815 if (!(so->so_options & SO_USELOOPBACK)) { 816 if (route_cb.any_count <= 1) { 817 if (rtm != NULL) 818 kfree(rtm, M_RTABLE); 819 m_freem(m); 820 return (error); 821 } 822 /* There is another listener, so construct message */ 823 rp = sotorawcb(so); 824 } 825 if (rtm != NULL) { 826 if (m_copyback2(m, 0, rtm->rtm_msglen, (caddr_t)rtm, M_NOWAIT) 827 != 0) { 828 m_freem(m); 829 m = NULL; 830 } 831 kfree(rtm, M_RTABLE); 832 } 833 if (m != NULL) 834 rts_input_skip(m, family, rp); 835 return (error); 836 } 837 838 static void 839 route_output_add_callback(int cmd, int error, struct rt_addrinfo *rtinfo, 840 struct rtentry *rt, void *arg) 841 { 842 struct rt_msghdr *rtm = arg; 843 844 if (error == 0 && rt != NULL) { 845 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 846 &rt->rt_rmx); 847 rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); 848 rt->rt_rmx.rmx_locks |= 849 (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); 850 if (rtinfo->rti_genmask != NULL) { 851 rt->rt_genmask = rtmask_purelookup(rtinfo->rti_genmask); 852 if (rt->rt_genmask == NULL) { 853 /* 854 * This should not happen, since we 855 * have already installed genmask 856 * on each CPU before we reach here. 857 */ 858 panic("genmask is gone!?"); 859 } 860 } else { 861 rt->rt_genmask = NULL; 862 } 863 rtm->rtm_index = rt->rt_ifp->if_index; 864 } 865 } 866 867 static void 868 route_output_delete_callback(int cmd, int error, struct rt_addrinfo *rtinfo, 869 struct rtentry *rt, void *arg) 870 { 871 if (error == 0 && rt) { 872 ++rt->rt_refcnt; 873 if (fillrtmsg(arg, rt, rtinfo) != 0) { 874 error = ENOBUFS; 875 /* XXX no way to return the error */ 876 } 877 --rt->rt_refcnt; 878 } 879 if (rt && rt->rt_refcnt == 0) { 880 ++rt->rt_refcnt; 881 rtfree(rt); 882 } 883 } 884 885 static int 886 route_output_get_callback(int cmd, struct rt_addrinfo *rtinfo, 887 struct rtentry *rt, void *arg, int found_cnt) 888 { 889 int error, found = 0; 890 891 if (((rtinfo->rti_flags ^ rt->rt_flags) & RTF_HOST) == 0) 892 found = 1; 893 894 error = fillrtmsg(arg, rt, rtinfo); 895 if (!error && found) { 896 /* Got the exact match, we could return now! */ 897 error = EJUSTRETURN; 898 } 899 return error; 900 } 901 902 static int 903 route_output_change_callback(int cmd, struct rt_addrinfo *rtinfo, 904 struct rtentry *rt, void *arg, int found_cnt) 905 { 906 struct rt_msghdr *rtm = arg; 907 struct ifaddr *ifa; 908 int error = 0; 909 910 /* 911 * new gateway could require new ifaddr, ifp; 912 * flags may also be different; ifp may be specified 913 * by ll sockaddr when protocol address is ambiguous 914 */ 915 if (((rt->rt_flags & RTF_GATEWAY) && rtinfo->rti_gateway != NULL) || 916 rtinfo->rti_ifpaddr != NULL || 917 (rtinfo->rti_ifaaddr != NULL && 918 !sa_equal(rtinfo->rti_ifaaddr, rt->rt_ifa->ifa_addr))) { 919 error = rt_getifa(rtinfo); 920 if (error != 0) 921 goto done; 922 } 923 if (rtinfo->rti_gateway != NULL) { 924 /* 925 * We only need to generate rtmsg upon the 926 * first route to be changed. 927 */ 928 error = rt_setgate(rt, rt_key(rt), rtinfo->rti_gateway); 929 if (error != 0) 930 goto done; 931 } 932 if ((ifa = rtinfo->rti_ifa) != NULL) { 933 struct ifaddr *oifa = rt->rt_ifa; 934 935 if (oifa != ifa) { 936 if (oifa && oifa->ifa_rtrequest) 937 oifa->ifa_rtrequest(RTM_DELETE, rt); 938 IFAFREE(rt->rt_ifa); 939 IFAREF(ifa); 940 rt->rt_ifa = ifa; 941 rt->rt_ifp = rtinfo->rti_ifp; 942 } 943 } 944 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); 945 if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) 946 rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt); 947 if (rtinfo->rti_genmask != NULL) { 948 rt->rt_genmask = rtmask_purelookup(rtinfo->rti_genmask); 949 if (rt->rt_genmask == NULL) { 950 /* 951 * This should not happen, since we 952 * have already installed genmask 953 * on each CPU before we reach here. 954 */ 955 panic("genmask is gone!?"); 956 } 957 } 958 rtm->rtm_index = rt->rt_ifp->if_index; 959 if (found_cnt == 1) 960 rt_rtmsg(RTM_CHANGE, rt, rt->rt_ifp, 0); 961 done: 962 return error; 963 } 964 965 static int 966 route_output_lock_callback(int cmd, struct rt_addrinfo *rtinfo, 967 struct rtentry *rt, void *arg, 968 int found_cnt __unused) 969 { 970 struct rt_msghdr *rtm = arg; 971 972 rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); 973 rt->rt_rmx.rmx_locks |= 974 (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); 975 return 0; 976 } 977 978 static void 979 rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics *out) 980 { 981 #define setmetric(flag, elt) if (which & (flag)) out->elt = in->elt; 982 setmetric(RTV_RPIPE, rmx_recvpipe); 983 setmetric(RTV_SPIPE, rmx_sendpipe); 984 setmetric(RTV_SSTHRESH, rmx_ssthresh); 985 setmetric(RTV_RTT, rmx_rtt); 986 setmetric(RTV_RTTVAR, rmx_rttvar); 987 setmetric(RTV_HOPCOUNT, rmx_hopcount); 988 setmetric(RTV_MTU, rmx_mtu); 989 setmetric(RTV_EXPIRE, rmx_expire); 990 setmetric(RTV_MSL, rmx_msl); 991 setmetric(RTV_IWMAXSEGS, rmx_iwmaxsegs); 992 setmetric(RTV_IWCAPSEGS, rmx_iwcapsegs); 993 #undef setmetric 994 } 995 996 /* 997 * Extract the addresses of the passed sockaddrs. 998 * Do a little sanity checking so as to avoid bad memory references. 999 * This data is derived straight from userland. 1000 */ 1001 static int 1002 rt_xaddrs(char *cp, char *cplim, struct rt_addrinfo *rtinfo) 1003 { 1004 struct sockaddr *sa; 1005 int i; 1006 1007 for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) { 1008 if ((rtinfo->rti_addrs & (1 << i)) == 0) 1009 continue; 1010 sa = (struct sockaddr *)cp; 1011 /* 1012 * It won't fit. 1013 */ 1014 if ((cp + sa->sa_len) > cplim) { 1015 return (EINVAL); 1016 } 1017 1018 /* 1019 * There are no more... Quit now. 1020 * If there are more bits, they are in error. 1021 * I've seen this. route(1) can evidently generate these. 1022 * This causes kernel to core dump. 1023 * For compatibility, if we see this, point to a safe address. 1024 */ 1025 if (sa->sa_len == 0) { 1026 static struct sockaddr sa_zero = { 1027 sizeof sa_zero, AF_INET, 1028 }; 1029 1030 rtinfo->rti_info[i] = &sa_zero; 1031 kprintf("rtsock: received more addr bits than sockaddrs.\n"); 1032 return (0); /* should be EINVAL but for compat */ 1033 } 1034 1035 /* Accept the sockaddr. */ 1036 rtinfo->rti_info[i] = sa; 1037 cp += RT_ROUNDUP(sa->sa_len); 1038 } 1039 return (0); 1040 } 1041 1042 static int 1043 rt_msghdrsize(int type) 1044 { 1045 switch (type) { 1046 case RTM_DELADDR: 1047 case RTM_NEWADDR: 1048 return sizeof(struct ifa_msghdr); 1049 case RTM_DELMADDR: 1050 case RTM_NEWMADDR: 1051 return sizeof(struct ifma_msghdr); 1052 case RTM_IFINFO: 1053 return sizeof(struct if_msghdr); 1054 case RTM_IFANNOUNCE: 1055 case RTM_IEEE80211: 1056 return sizeof(struct if_announcemsghdr); 1057 default: 1058 return sizeof(struct rt_msghdr); 1059 } 1060 } 1061 1062 static int 1063 rt_msgsize(int type, const struct rt_addrinfo *rtinfo) 1064 { 1065 int len, i; 1066 1067 len = rt_msghdrsize(type); 1068 for (i = 0; i < RTAX_MAX; i++) { 1069 if (rtinfo->rti_info[i] != NULL) 1070 len += RT_ROUNDUP(rtinfo->rti_info[i]->sa_len); 1071 } 1072 len = ALIGN(len); 1073 return len; 1074 } 1075 1076 /* 1077 * Build a routing message in a buffer. 1078 * Copy the addresses in the rtinfo->rti_info[] sockaddr array 1079 * to the end of the buffer after the message header. 1080 * 1081 * Set the rtinfo->rti_addrs bitmask of addresses present in rtinfo->rti_info[]. 1082 * This side-effect can be avoided if we reorder the addrs bitmask field in all 1083 * the route messages to line up so we can set it here instead of back in the 1084 * calling routine. 1085 * 1086 * NOTE! The buffer may already contain a partially filled-out rtm via 1087 * _fillrtmsg(). 1088 */ 1089 static void 1090 rt_msg_buffer(int type, struct rt_addrinfo *rtinfo, void *buf, int msglen) 1091 { 1092 struct rt_msghdr *rtm; 1093 char *cp; 1094 int dlen, i; 1095 1096 rtm = (struct rt_msghdr *) buf; 1097 rtm->rtm_version = RTM_VERSION; 1098 rtm->rtm_type = type; 1099 rtm->rtm_msglen = msglen; 1100 1101 cp = (char *)buf + rt_msghdrsize(type); 1102 rtinfo->rti_addrs = 0; 1103 for (i = 0; i < RTAX_MAX; i++) { 1104 struct sockaddr *sa; 1105 1106 if ((sa = rtinfo->rti_info[i]) == NULL) 1107 continue; 1108 rtinfo->rti_addrs |= (1 << i); 1109 dlen = RT_ROUNDUP(sa->sa_len); 1110 bcopy(sa, cp, dlen); 1111 cp += dlen; 1112 } 1113 } 1114 1115 /* 1116 * Build a routing message in a mbuf chain. 1117 * Copy the addresses in the rtinfo->rti_info[] sockaddr array 1118 * to the end of the mbuf after the message header. 1119 * 1120 * Set the rtinfo->rti_addrs bitmask of addresses present in rtinfo->rti_info[]. 1121 * This side-effect can be avoided if we reorder the addrs bitmask field in all 1122 * the route messages to line up so we can set it here instead of back in the 1123 * calling routine. 1124 */ 1125 static struct mbuf * 1126 rt_msg_mbuf(int type, struct rt_addrinfo *rtinfo) 1127 { 1128 struct mbuf *m; 1129 struct rt_msghdr *rtm; 1130 struct sockaddr *sa; 1131 int hlen, dlen, len, i; 1132 1133 hlen = rt_msghdrsize(type); 1134 KASSERT(hlen <= MCLBYTES, ("rt_msg_mbuf: hlen %d doesn't fit", hlen)); 1135 1136 m = m_getl(hlen, M_NOWAIT, MT_DATA, M_PKTHDR, NULL); 1137 if (m == NULL) 1138 return (NULL); 1139 mbuftrackid(m, 32); 1140 m->m_pkthdr.rcvif = NULL; 1141 rtinfo->rti_addrs = 0; 1142 len = hlen; 1143 for (i = 0; i < RTAX_MAX; i++) { 1144 if ((sa = rtinfo->rti_info[i]) == NULL) 1145 continue; 1146 rtinfo->rti_addrs |= (1 << i); 1147 dlen = RT_ROUNDUP(sa->sa_len); 1148 if (m_copyback2(m, len, dlen, (caddr_t)sa, M_NOWAIT) != 0) { 1149 m_freem(m); 1150 return (NULL); 1151 } 1152 len += dlen; 1153 } 1154 rtm = mtod(m, struct rt_msghdr *); 1155 bzero(rtm, hlen); 1156 rtm->rtm_msglen = len; 1157 rtm->rtm_version = RTM_VERSION; 1158 rtm->rtm_type = type; 1159 return (m); 1160 } 1161 1162 /* 1163 * This routine is called to generate a message from the routing 1164 * socket indicating that a redirect has occurred, a routing lookup 1165 * has failed, or that a protocol has detected timeouts to a particular 1166 * destination. 1167 */ 1168 void 1169 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error) 1170 { 1171 struct sockaddr *dst = rtinfo->rti_info[RTAX_DST]; 1172 struct rt_msghdr *rtm; 1173 struct mbuf *m; 1174 1175 if (route_cb.any_count == 0) 1176 return; 1177 m = rt_msg_mbuf(type, rtinfo); 1178 if (m == NULL) 1179 return; 1180 rtm = mtod(m, struct rt_msghdr *); 1181 rtm->rtm_flags = RTF_DONE | flags; 1182 rtm->rtm_errno = error; 1183 rtm->rtm_addrs = rtinfo->rti_addrs; 1184 rts_input(m, familyof(dst)); 1185 } 1186 1187 void 1188 rt_dstmsg(int type, struct sockaddr *dst, int error) 1189 { 1190 struct rt_msghdr *rtm; 1191 struct rt_addrinfo addrs; 1192 struct mbuf *m; 1193 1194 if (route_cb.any_count == 0) 1195 return; 1196 bzero(&addrs, sizeof(struct rt_addrinfo)); 1197 addrs.rti_info[RTAX_DST] = dst; 1198 m = rt_msg_mbuf(type, &addrs); 1199 if (m == NULL) 1200 return; 1201 rtm = mtod(m, struct rt_msghdr *); 1202 rtm->rtm_flags = RTF_DONE; 1203 rtm->rtm_errno = error; 1204 rtm->rtm_addrs = addrs.rti_addrs; 1205 rts_input(m, familyof(dst)); 1206 } 1207 1208 /* 1209 * This routine is called to generate a message from the routing 1210 * socket indicating that the status of a network interface has changed. 1211 */ 1212 void 1213 rt_ifmsg(struct ifnet *ifp) 1214 { 1215 struct if_msghdr *ifm; 1216 struct mbuf *m; 1217 struct rt_addrinfo rtinfo; 1218 1219 if (route_cb.any_count == 0) 1220 return; 1221 bzero(&rtinfo, sizeof(struct rt_addrinfo)); 1222 m = rt_msg_mbuf(RTM_IFINFO, &rtinfo); 1223 if (m == NULL) 1224 return; 1225 ifm = mtod(m, struct if_msghdr *); 1226 ifm->ifm_index = ifp->if_index; 1227 ifm->ifm_flags = ifp->if_flags; 1228 ifm->ifm_data = ifp->if_data; 1229 ifm->ifm_addrs = 0; 1230 rts_input(m, 0); 1231 } 1232 1233 static void 1234 rt_ifamsg(int cmd, struct ifaddr *ifa) 1235 { 1236 struct ifa_msghdr *ifam; 1237 struct rt_addrinfo rtinfo; 1238 struct mbuf *m; 1239 struct ifnet *ifp = ifa->ifa_ifp; 1240 1241 bzero(&rtinfo, sizeof(struct rt_addrinfo)); 1242 rtinfo.rti_ifaaddr = ifa->ifa_addr; 1243 rtinfo.rti_ifpaddr = 1244 TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa->ifa_addr; 1245 rtinfo.rti_netmask = ifa->ifa_netmask; 1246 rtinfo.rti_bcastaddr = ifa->ifa_dstaddr; 1247 1248 m = rt_msg_mbuf(cmd, &rtinfo); 1249 if (m == NULL) 1250 return; 1251 1252 ifam = mtod(m, struct ifa_msghdr *); 1253 ifam->ifam_index = ifp->if_index; 1254 ifam->ifam_flags = ifa->ifa_flags; 1255 ifam->ifam_addrs = rtinfo.rti_addrs; 1256 ifam->ifam_addrflags = if_addrflags(ifa); 1257 ifam->ifam_metric = ifa->ifa_metric; 1258 1259 rts_input(m, familyof(ifa->ifa_addr)); 1260 } 1261 1262 void 1263 rt_rtmsg(int cmd, struct rtentry *rt, struct ifnet *ifp, int error) 1264 { 1265 struct rt_msghdr *rtm; 1266 struct rt_addrinfo rtinfo; 1267 struct mbuf *m; 1268 struct sockaddr *dst; 1269 1270 if (rt == NULL) 1271 return; 1272 1273 bzero(&rtinfo, sizeof(struct rt_addrinfo)); 1274 rtinfo.rti_dst = dst = rt_key(rt); 1275 rtinfo.rti_gateway = rt->rt_gateway; 1276 rtinfo.rti_netmask = rt_mask(rt); 1277 if (ifp != NULL) { 1278 rtinfo.rti_ifpaddr = 1279 TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa->ifa_addr; 1280 } 1281 if (rt->rt_ifa != NULL) 1282 rtinfo.rti_ifaaddr = rt->rt_ifa->ifa_addr; 1283 1284 m = rt_msg_mbuf(cmd, &rtinfo); 1285 if (m == NULL) 1286 return; 1287 1288 rtm = mtod(m, struct rt_msghdr *); 1289 if (ifp != NULL) 1290 rtm->rtm_index = ifp->if_index; 1291 rtm->rtm_flags |= rt->rt_flags; 1292 rtm->rtm_errno = error; 1293 rtm->rtm_addrs = rtinfo.rti_addrs; 1294 1295 rts_input(m, familyof(dst)); 1296 } 1297 1298 /* 1299 * This is called to generate messages from the routing socket 1300 * indicating a network interface has had addresses associated with it. 1301 * if we ever reverse the logic and replace messages TO the routing 1302 * socket indicate a request to configure interfaces, then it will 1303 * be unnecessary as the routing socket will automatically generate 1304 * copies of it. 1305 */ 1306 void 1307 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) 1308 { 1309 if (route_cb.any_count == 0) 1310 return; 1311 1312 if (cmd == RTM_ADD) { 1313 rt_ifamsg(RTM_NEWADDR, ifa); 1314 rt_rtmsg(RTM_ADD, rt, ifa->ifa_ifp, error); 1315 } else { 1316 KASSERT((cmd == RTM_DELETE), ("unknown cmd %d", cmd)); 1317 rt_rtmsg(RTM_DELETE, rt, ifa->ifa_ifp, error); 1318 rt_ifamsg(RTM_DELADDR, ifa); 1319 } 1320 } 1321 1322 /* 1323 * This is the analogue to the rt_newaddrmsg which performs the same 1324 * function but for multicast group memberhips. This is easier since 1325 * there is no route state to worry about. 1326 */ 1327 void 1328 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma) 1329 { 1330 struct rt_addrinfo rtinfo; 1331 struct mbuf *m = NULL; 1332 struct ifnet *ifp = ifma->ifma_ifp; 1333 struct ifma_msghdr *ifmam; 1334 1335 if (route_cb.any_count == 0) 1336 return; 1337 1338 bzero(&rtinfo, sizeof(struct rt_addrinfo)); 1339 rtinfo.rti_ifaaddr = ifma->ifma_addr; 1340 if (ifp != NULL && !TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) { 1341 rtinfo.rti_ifpaddr = 1342 TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa->ifa_addr; 1343 } 1344 /* 1345 * If a link-layer address is present, present it as a ``gateway'' 1346 * (similarly to how ARP entries, e.g., are presented). 1347 */ 1348 rtinfo.rti_gateway = ifma->ifma_lladdr; 1349 1350 m = rt_msg_mbuf(cmd, &rtinfo); 1351 if (m == NULL) 1352 return; 1353 1354 ifmam = mtod(m, struct ifma_msghdr *); 1355 ifmam->ifmam_index = ifp->if_index; 1356 ifmam->ifmam_addrs = rtinfo.rti_addrs; 1357 1358 rts_input(m, familyof(ifma->ifma_addr)); 1359 } 1360 1361 static struct mbuf * 1362 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what, 1363 struct rt_addrinfo *info) 1364 { 1365 struct if_announcemsghdr *ifan; 1366 struct mbuf *m; 1367 1368 if (route_cb.any_count == 0) 1369 return NULL; 1370 1371 bzero(info, sizeof(*info)); 1372 m = rt_msg_mbuf(type, info); 1373 if (m == NULL) 1374 return NULL; 1375 1376 ifan = mtod(m, struct if_announcemsghdr *); 1377 ifan->ifan_index = ifp->if_index; 1378 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof ifan->ifan_name); 1379 ifan->ifan_what = what; 1380 return m; 1381 } 1382 1383 /* 1384 * This is called to generate routing socket messages indicating 1385 * IEEE80211 wireless events. 1386 * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way. 1387 */ 1388 void 1389 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len) 1390 { 1391 struct rt_addrinfo info; 1392 struct mbuf *m; 1393 1394 m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info); 1395 if (m == NULL) 1396 return; 1397 1398 /* 1399 * Append the ieee80211 data. Try to stick it in the 1400 * mbuf containing the ifannounce msg; otherwise allocate 1401 * a new mbuf and append. 1402 * 1403 * NB: we assume m is a single mbuf. 1404 */ 1405 if (data_len > M_TRAILINGSPACE(m)) { 1406 /* XXX use m_getb(data_len, M_NOWAIT, MT_DATA, 0); */ 1407 struct mbuf *n = m_get(M_NOWAIT, MT_DATA); 1408 if (n == NULL) { 1409 m_freem(m); 1410 return; 1411 } 1412 KKASSERT(data_len <= M_TRAILINGSPACE(n)); 1413 bcopy(data, mtod(n, void *), data_len); 1414 n->m_len = data_len; 1415 m->m_next = n; 1416 } else if (data_len > 0) { 1417 bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len); 1418 m->m_len += data_len; 1419 } 1420 mbuftrackid(m, 33); 1421 if (m->m_flags & M_PKTHDR) 1422 m->m_pkthdr.len += data_len; 1423 mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len; 1424 rts_input(m, 0); 1425 } 1426 1427 /* 1428 * This is called to generate routing socket messages indicating 1429 * network interface arrival and departure. 1430 */ 1431 void 1432 rt_ifannouncemsg(struct ifnet *ifp, int what) 1433 { 1434 struct rt_addrinfo addrinfo; 1435 struct mbuf *m; 1436 1437 m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &addrinfo); 1438 if (m != NULL) 1439 rts_input(m, 0); 1440 } 1441 1442 static int 1443 resizewalkarg(struct walkarg *w, int len) 1444 { 1445 void *newptr; 1446 1447 newptr = kmalloc(len, M_RTABLE, M_INTWAIT | M_NULLOK); 1448 if (newptr == NULL) 1449 return (ENOMEM); 1450 if (w->w_tmem != NULL) 1451 kfree(w->w_tmem, M_RTABLE); 1452 w->w_tmem = newptr; 1453 w->w_tmemsize = len; 1454 bzero(newptr, len); 1455 1456 return (0); 1457 } 1458 1459 static void 1460 ifnet_compute_stats(struct ifnet *ifp) 1461 { 1462 IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets); 1463 IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors); 1464 IFNET_STAT_GET(ifp, opackets, ifp->if_opackets); 1465 IFNET_STAT_GET(ifp, collisions, ifp->if_collisions); 1466 IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes); 1467 IFNET_STAT_GET(ifp, obytes, ifp->if_obytes); 1468 IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts); 1469 IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts); 1470 IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops); 1471 IFNET_STAT_GET(ifp, noproto, ifp->if_noproto); 1472 IFNET_STAT_GET(ifp, oqdrops, ifp->if_oqdrops); 1473 } 1474 1475 static int 1476 if_addrflags(const struct ifaddr *ifa) 1477 { 1478 switch (ifa->ifa_addr->sa_family) { 1479 #ifdef INET6 1480 case AF_INET6: 1481 return ((const struct in6_ifaddr *)ifa)->ia6_flags; 1482 #endif 1483 default: 1484 return 0; 1485 } 1486 } 1487 1488 static int 1489 sysctl_iflist(int af, struct walkarg *w) 1490 { 1491 struct ifnet *ifp; 1492 struct rt_addrinfo rtinfo; 1493 int msglen, error; 1494 1495 bzero(&rtinfo, sizeof(struct rt_addrinfo)); 1496 1497 ifnet_lock(); 1498 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 1499 struct ifaddr_container *ifac, *ifac_mark; 1500 struct ifaddr_marker mark; 1501 struct ifaddrhead *head; 1502 struct ifaddr *ifa; 1503 1504 if (w->w_arg && w->w_arg != ifp->if_index) 1505 continue; 1506 head = &ifp->if_addrheads[mycpuid]; 1507 /* 1508 * There is no need to reference the first ifaddr 1509 * even if the following resizewalkarg() blocks, 1510 * since the first ifaddr will not be destroyed 1511 * when the ifnet lock is held. 1512 */ 1513 ifac = TAILQ_FIRST(head); 1514 ifa = ifac->ifa; 1515 rtinfo.rti_ifpaddr = ifa->ifa_addr; 1516 msglen = rt_msgsize(RTM_IFINFO, &rtinfo); 1517 if (w->w_tmemsize < msglen && resizewalkarg(w, msglen) != 0) { 1518 ifnet_unlock(); 1519 return (ENOMEM); 1520 } 1521 rt_msg_buffer(RTM_IFINFO, &rtinfo, w->w_tmem, msglen); 1522 rtinfo.rti_ifpaddr = NULL; 1523 if (w->w_req != NULL && w->w_tmem != NULL) { 1524 struct if_msghdr *ifm = w->w_tmem; 1525 1526 ifm->ifm_index = ifp->if_index; 1527 ifm->ifm_flags = ifp->if_flags; 1528 ifnet_compute_stats(ifp); 1529 ifm->ifm_data = ifp->if_data; 1530 ifm->ifm_addrs = rtinfo.rti_addrs; 1531 error = SYSCTL_OUT(w->w_req, ifm, msglen); 1532 if (error) { 1533 ifnet_unlock(); 1534 return (error); 1535 } 1536 } 1537 /* 1538 * Add a marker, since SYSCTL_OUT() could block and during 1539 * that period the list could be changed. 1540 */ 1541 ifa_marker_init(&mark, ifp); 1542 ifac_mark = &mark.ifac; 1543 TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link); 1544 while ((ifac = TAILQ_NEXT(ifac_mark, ifa_link)) != NULL) { 1545 TAILQ_REMOVE(head, ifac_mark, ifa_link); 1546 TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link); 1547 1548 ifa = ifac->ifa; 1549 1550 /* Ignore marker */ 1551 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 1552 continue; 1553 1554 if (af && af != ifa->ifa_addr->sa_family) 1555 continue; 1556 if (curproc->p_ucred->cr_prison && 1557 prison_if(curproc->p_ucred, ifa->ifa_addr)) 1558 continue; 1559 rtinfo.rti_ifaaddr = ifa->ifa_addr; 1560 rtinfo.rti_netmask = ifa->ifa_netmask; 1561 rtinfo.rti_bcastaddr = ifa->ifa_dstaddr; 1562 msglen = rt_msgsize(RTM_NEWADDR, &rtinfo); 1563 /* 1564 * Keep a reference on this ifaddr, so that it will 1565 * not be destroyed if the following resizewalkarg() 1566 * blocks. 1567 */ 1568 IFAREF(ifa); 1569 if (w->w_tmemsize < msglen && 1570 resizewalkarg(w, msglen) != 0) { 1571 IFAFREE(ifa); 1572 TAILQ_REMOVE(head, ifac_mark, ifa_link); 1573 ifnet_unlock(); 1574 return (ENOMEM); 1575 } 1576 rt_msg_buffer(RTM_NEWADDR, &rtinfo, w->w_tmem, msglen); 1577 if (w->w_req != NULL) { 1578 struct ifa_msghdr *ifam = w->w_tmem; 1579 1580 ifam->ifam_index = ifa->ifa_ifp->if_index; 1581 ifam->ifam_flags = ifa->ifa_flags; 1582 ifam->ifam_addrs = rtinfo.rti_addrs; 1583 ifam->ifam_addrflags = if_addrflags(ifa); 1584 ifam->ifam_metric = ifa->ifa_metric; 1585 error = SYSCTL_OUT(w->w_req, w->w_tmem, msglen); 1586 if (error) { 1587 IFAFREE(ifa); 1588 TAILQ_REMOVE(head, ifac_mark, ifa_link); 1589 ifnet_unlock(); 1590 return (error); 1591 } 1592 } 1593 IFAFREE(ifa); 1594 } 1595 TAILQ_REMOVE(head, ifac_mark, ifa_link); 1596 rtinfo.rti_netmask = NULL; 1597 rtinfo.rti_ifaaddr = NULL; 1598 rtinfo.rti_bcastaddr = NULL; 1599 } 1600 ifnet_unlock(); 1601 return (0); 1602 } 1603 1604 static int 1605 rttable_walkarg_create(struct rttable_walkarg *w, int op, int arg) 1606 { 1607 struct rt_addrinfo rtinfo; 1608 struct sockaddr_storage ss; 1609 int i, msglen; 1610 1611 memset(w, 0, sizeof(*w)); 1612 w->w_op = op; 1613 w->w_arg = arg; 1614 1615 memset(&ss, 0, sizeof(ss)); 1616 ss.ss_len = sizeof(ss); 1617 1618 memset(&rtinfo, 0, sizeof(rtinfo)); 1619 for (i = 0; i < RTAX_MAX; ++i) 1620 rtinfo.rti_info[i] = (struct sockaddr *)&ss; 1621 msglen = rt_msgsize(RTM_GET, &rtinfo); 1622 1623 w->w_bufsz = msglen * RTTABLE_DUMP_MSGCNT_MAX; 1624 w->w_buf = kmalloc(w->w_bufsz, M_TEMP, M_WAITOK | M_NULLOK); 1625 if (w->w_buf == NULL) 1626 return ENOMEM; 1627 return 0; 1628 } 1629 1630 static void 1631 rttable_walkarg_destroy(struct rttable_walkarg *w) 1632 { 1633 kfree(w->w_buf, M_TEMP); 1634 } 1635 1636 static void 1637 rttable_entry_rtinfo(struct rt_addrinfo *rtinfo, struct radix_node *rn) 1638 { 1639 struct rtentry *rt = (struct rtentry *)rn; 1640 1641 bzero(rtinfo, sizeof(*rtinfo)); 1642 rtinfo->rti_dst = rt_key(rt); 1643 rtinfo->rti_gateway = rt->rt_gateway; 1644 rtinfo->rti_netmask = rt_mask(rt); 1645 rtinfo->rti_genmask = rt->rt_genmask; 1646 if (rt->rt_ifp != NULL) { 1647 rtinfo->rti_ifpaddr = 1648 TAILQ_FIRST(&rt->rt_ifp->if_addrheads[mycpuid])->ifa->ifa_addr; 1649 rtinfo->rti_ifaaddr = rt->rt_ifa->ifa_addr; 1650 if (rt->rt_ifp->if_flags & IFF_POINTOPOINT) 1651 rtinfo->rti_bcastaddr = rt->rt_ifa->ifa_dstaddr; 1652 } 1653 } 1654 1655 static int 1656 rttable_walk_entry(struct radix_node *rn, void *xw) 1657 { 1658 struct rttable_walkarg *w = xw; 1659 struct rtentry *rt = (struct rtentry *)rn; 1660 struct rt_addrinfo rtinfo; 1661 struct rt_msghdr *rtm; 1662 boolean_t save = FALSE; 1663 int msglen, w_bufleft; 1664 void *ptr; 1665 1666 rttable_entry_rtinfo(&rtinfo, rn); 1667 msglen = rt_msgsize(RTM_GET, &rtinfo); 1668 1669 w_bufleft = w->w_bufsz - w->w_buflen; 1670 1671 if (rn->rn_dupedkey != NULL) { 1672 struct radix_node *rn1 = rn; 1673 int total_msglen = msglen; 1674 1675 /* 1676 * Make sure that we have enough space left for all 1677 * dupedkeys, since rn_walktree_at always starts 1678 * from the first dupedkey. 1679 */ 1680 while ((rn1 = rn1->rn_dupedkey) != NULL) { 1681 struct rt_addrinfo rtinfo1; 1682 int msglen1; 1683 1684 if (rn1->rn_flags & RNF_ROOT) 1685 continue; 1686 1687 rttable_entry_rtinfo(&rtinfo1, rn1); 1688 msglen1 = rt_msgsize(RTM_GET, &rtinfo1); 1689 total_msglen += msglen1; 1690 } 1691 1692 if (total_msglen > w_bufleft) { 1693 if (total_msglen > w->w_bufsz) { 1694 static int logged = 0; 1695 1696 if (!logged) { 1697 kprintf("buffer is too small for " 1698 "all dupedkeys, increase " 1699 "RTTABLE_DUMP_MSGCNT_MAX\n"); 1700 logged = 1; 1701 } 1702 return ENOMEM; 1703 } 1704 save = TRUE; 1705 } 1706 } else if (msglen > w_bufleft) { 1707 save = TRUE; 1708 } 1709 1710 if (save) { 1711 /* 1712 * Not enough buffer left; remember the position 1713 * to start from upon next round. 1714 */ 1715 KASSERT(msglen <= w->w_bufsz, ("msg too long %d", msglen)); 1716 1717 KASSERT(rtinfo.rti_dst->sa_len <= sizeof(w->w_key0), 1718 ("key too long %d", rtinfo.rti_dst->sa_len)); 1719 memset(&w->w_key0, 0, sizeof(w->w_key0)); 1720 memcpy(&w->w_key0, rtinfo.rti_dst, rtinfo.rti_dst->sa_len); 1721 w->w_key = (const char *)&w->w_key0; 1722 1723 if (rtinfo.rti_netmask != NULL) { 1724 KASSERT( 1725 rtinfo.rti_netmask->sa_len <= sizeof(w->w_mask0), 1726 ("mask too long %d", rtinfo.rti_netmask->sa_len)); 1727 memset(&w->w_mask0, 0, sizeof(w->w_mask0)); 1728 memcpy(&w->w_mask0, rtinfo.rti_netmask, 1729 rtinfo.rti_netmask->sa_len); 1730 w->w_mask = (const char *)&w->w_mask0; 1731 } else { 1732 w->w_mask = NULL; 1733 } 1734 return EJUSTRETURN; 1735 } 1736 1737 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1738 return 0; 1739 1740 ptr = ((uint8_t *)w->w_buf) + w->w_buflen; 1741 rt_msg_buffer(RTM_GET, &rtinfo, ptr, msglen); 1742 1743 rtm = (struct rt_msghdr *)ptr; 1744 rtm->rtm_flags = rt->rt_flags; 1745 rtm->rtm_use = rt->rt_use; 1746 rtm->rtm_rmx = rt->rt_rmx; 1747 rtm->rtm_index = rt->rt_ifp->if_index; 1748 rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0; 1749 rtm->rtm_addrs = rtinfo.rti_addrs; 1750 1751 w->w_buflen += msglen; 1752 1753 return 0; 1754 } 1755 1756 static void 1757 rttable_walk_dispatch(netmsg_t msg) 1758 { 1759 struct netmsg_rttable_walk *nmsg = (struct netmsg_rttable_walk *)msg; 1760 struct radix_node_head *rnh = rt_tables[mycpuid][nmsg->af]; 1761 struct rttable_walkarg *w = nmsg->w; 1762 int error; 1763 1764 error = rnh->rnh_walktree_at(rnh, w->w_key, w->w_mask, 1765 rttable_walk_entry, w); 1766 lwkt_replymsg(&nmsg->base.lmsg, error); 1767 } 1768 1769 static int 1770 sysctl_rttable(int af, struct sysctl_req *req, int op, int arg) 1771 { 1772 struct rttable_walkarg w; 1773 int error, i; 1774 1775 error = rttable_walkarg_create(&w, op, arg); 1776 if (error) 1777 return error; 1778 1779 error = EINVAL; 1780 for (i = 1; i <= AF_MAX; i++) { 1781 if (rt_tables[mycpuid][i] != NULL && (af == 0 || af == i)) { 1782 w.w_key = NULL; 1783 w.w_mask = NULL; 1784 for (;;) { 1785 struct netmsg_rttable_walk nmsg; 1786 1787 netmsg_init(&nmsg.base, NULL, 1788 &curthread->td_msgport, 0, 1789 rttable_walk_dispatch); 1790 nmsg.af = i; 1791 nmsg.w = &w; 1792 1793 w.w_buflen = 0; 1794 1795 error = lwkt_domsg(netisr_cpuport(mycpuid), 1796 &nmsg.base.lmsg, 0); 1797 if (error && error != EJUSTRETURN) 1798 goto done; 1799 1800 if (req != NULL && w.w_buflen > 0) { 1801 int error1; 1802 1803 error1 = SYSCTL_OUT(req, w.w_buf, 1804 w.w_buflen); 1805 if (error1) { 1806 error = error1; 1807 goto done; 1808 } 1809 } 1810 if (error == 0) /* done */ 1811 break; 1812 } 1813 } 1814 } 1815 done: 1816 rttable_walkarg_destroy(&w); 1817 return error; 1818 } 1819 1820 static int 1821 sysctl_rtsock(SYSCTL_HANDLER_ARGS) 1822 { 1823 int *name = (int *)arg1; 1824 u_int namelen = arg2; 1825 int error = EINVAL; 1826 int origcpu, cpu; 1827 u_char af; 1828 struct walkarg w; 1829 1830 name ++; 1831 namelen--; 1832 if (req->newptr) 1833 return (EPERM); 1834 if (namelen != 3 && namelen != 4) 1835 return (EINVAL); 1836 af = name[0]; 1837 bzero(&w, sizeof w); 1838 w.w_op = name[1]; 1839 w.w_arg = name[2]; 1840 w.w_req = req; 1841 1842 /* 1843 * Optional third argument specifies cpu, used primarily for 1844 * debugging the route table. 1845 */ 1846 if (namelen == 4) { 1847 if (name[3] < 0 || name[3] >= netisr_ncpus) 1848 return (EINVAL); 1849 cpu = name[3]; 1850 } else { 1851 /* 1852 * Target cpu is not specified, use cpu0 then, so that 1853 * the result set will be relatively stable. 1854 */ 1855 cpu = 0; 1856 } 1857 origcpu = mycpuid; 1858 lwkt_migratecpu(cpu); 1859 1860 switch (w.w_op) { 1861 case NET_RT_DUMP: 1862 case NET_RT_FLAGS: 1863 error = sysctl_rttable(af, w.w_req, w.w_op, w.w_arg); 1864 break; 1865 1866 case NET_RT_IFLIST: 1867 error = sysctl_iflist(af, &w); 1868 break; 1869 } 1870 if (w.w_tmem != NULL) 1871 kfree(w.w_tmem, M_RTABLE); 1872 1873 lwkt_migratecpu(origcpu); 1874 return (error); 1875 } 1876 1877 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, ""); 1878 1879 /* 1880 * Definitions of protocols supported in the ROUTE domain. 1881 */ 1882 1883 static struct domain routedomain; /* or at least forward */ 1884 1885 static struct protosw routesw[] = { 1886 { 1887 .pr_type = SOCK_RAW, 1888 .pr_domain = &routedomain, 1889 .pr_protocol = 0, 1890 .pr_flags = PR_ATOMIC|PR_ADDR, 1891 .pr_input = NULL, 1892 .pr_output = route_output, 1893 .pr_ctlinput = raw_ctlinput, 1894 .pr_ctloutput = route_ctloutput, 1895 .pr_ctlport = cpu0_ctlport, 1896 1897 .pr_init = raw_init, 1898 .pr_usrreqs = &route_usrreqs 1899 } 1900 }; 1901 1902 static struct domain routedomain = { 1903 .dom_family = AF_ROUTE, 1904 .dom_name = "route", 1905 .dom_init = NULL, 1906 .dom_externalize = NULL, 1907 .dom_dispose = NULL, 1908 .dom_protosw = routesw, 1909 .dom_protoswNPROTOSW = &routesw[NELEM(routesw)], 1910 .dom_next = SLIST_ENTRY_INITIALIZER, 1911 .dom_rtattach = NULL, 1912 .dom_rtoffset = 0, 1913 .dom_maxrtkey = 0, 1914 .dom_ifattach = NULL, 1915 .dom_ifdetach = NULL 1916 }; 1917 1918 DOMAIN_SET(route); 1919 1920