1 /* $OpenBSD: rtsock.c,v 1.198 2016/09/01 09:35:28 mpi Exp $ */ 2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 62 */ 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/proc.h> 67 #include <sys/sysctl.h> 68 #include <sys/mbuf.h> 69 #include <sys/socket.h> 70 #include <sys/socketvar.h> 71 #include <sys/domain.h> 72 #include <sys/protosw.h> 73 74 #include <net/if.h> 75 #include <net/if_dl.h> 76 #include <net/if_var.h> 77 #include <net/route.h> 78 #include <net/raw_cb.h> 79 80 #include <netinet/in.h> 81 82 #ifdef MPLS 83 #include <netmpls/mpls.h> 84 #endif 85 86 #include <sys/stdarg.h> 87 #include <sys/kernel.h> 88 #include <sys/timeout.h> 89 90 struct sockaddr route_dst = { 2, PF_ROUTE, }; 91 struct sockaddr route_src = { 2, PF_ROUTE, }; 92 struct sockproto route_proto = { PF_ROUTE, }; 93 94 struct walkarg { 95 int w_op, w_arg, w_given, w_needed, w_tmemsize; 96 caddr_t w_where, w_tmem; 97 }; 98 99 int route_ctloutput(int, struct socket *, int, int, struct mbuf **); 100 void route_input(struct mbuf *m0, ...); 101 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); 102 int route_cleargateway(struct rtentry *, void *, unsigned int); 103 104 struct mbuf *rt_msg1(int, struct rt_addrinfo *); 105 int rt_msg2(int, int, struct rt_addrinfo *, caddr_t, 106 struct walkarg *); 107 void rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); 108 109 int sysctl_iflist(int, struct walkarg *); 110 int sysctl_ifnames(struct walkarg *); 111 112 struct routecb { 113 struct rawcb rcb; 114 struct timeout timeout; 115 unsigned int msgfilter; 116 unsigned int flags; 117 u_int rtableid; 118 }; 119 #define sotoroutecb(so) ((struct routecb *)(so)->so_pcb) 120 121 struct route_cb { 122 int ip_count; 123 int ip6_count; 124 int mpls_count; 125 int any_count; 126 }; 127 128 struct route_cb route_cb; 129 130 /* 131 * These flags and timeout are used for indicating to userland (via a 132 * RTM_DESYNC msg) when the route socket has overflowed and messages 133 * have been lost. 134 */ 135 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */ 136 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before 137 queueing more packets */ 138 139 #define ROUTE_DESYNC_RESEND_TIMEOUT (hz / 5) /* In hz */ 140 141 void rt_senddesync(void *); 142 143 int 144 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 145 struct mbuf *control, struct proc *p) 146 { 147 struct rawcb *rp; 148 struct routecb *rop; 149 int s, af; 150 int error = 0; 151 152 s = splsoftnet(); 153 rp = sotorawcb(so); 154 155 switch (req) { 156 case PRU_ATTACH: 157 /* 158 * use the rawcb but allocate a routecb, this 159 * code does not care about the additional fields 160 * and works directly on the raw socket. 161 */ 162 rop = malloc(sizeof(struct routecb), M_PCB, M_WAITOK|M_ZERO); 163 rp = &rop->rcb; 164 so->so_pcb = rp; 165 /* Init the timeout structure */ 166 timeout_set(&((struct routecb *)rp)->timeout, rt_senddesync, rp); 167 /* 168 * Don't call raw_usrreq() in the attach case, because 169 * we want to allow non-privileged processes to listen 170 * on and send "safe" commands to the routing socket. 171 */ 172 if (curproc == 0) 173 error = EACCES; 174 else 175 error = raw_attach(so, (int)(long)nam); 176 if (error) { 177 free(rop, M_PCB, sizeof(struct routecb)); 178 splx(s); 179 return (error); 180 } 181 rop->rtableid = curproc->p_p->ps_rtableid; 182 af = rp->rcb_proto.sp_protocol; 183 if (af == AF_INET) 184 route_cb.ip_count++; 185 else if (af == AF_INET6) 186 route_cb.ip6_count++; 187 #ifdef MPLS 188 else if (af == AF_MPLS) 189 route_cb.mpls_count++; 190 #endif 191 rp->rcb_faddr = &route_src; 192 route_cb.any_count++; 193 soisconnected(so); 194 so->so_options |= SO_USELOOPBACK; 195 break; 196 197 case PRU_RCVD: 198 rop = (struct routecb *)rp; 199 200 /* 201 * If we are in a FLUSH state, check if the buffer is 202 * empty so that we can clear the flag. 203 */ 204 if (((rop->flags & ROUTECB_FLAG_FLUSH) != 0) && 205 ((sbspace(&rp->rcb_socket->so_rcv) == 206 rp->rcb_socket->so_rcv.sb_hiwat))) 207 rop->flags &= ~ROUTECB_FLAG_FLUSH; 208 break; 209 210 case PRU_DETACH: 211 if (rp) { 212 timeout_del(&((struct routecb *)rp)->timeout); 213 af = rp->rcb_proto.sp_protocol; 214 if (af == AF_INET) 215 route_cb.ip_count--; 216 else if (af == AF_INET6) 217 route_cb.ip6_count--; 218 #ifdef MPLS 219 else if (af == AF_MPLS) 220 route_cb.mpls_count--; 221 #endif 222 route_cb.any_count--; 223 } 224 /* FALLTHROUGH */ 225 default: 226 error = raw_usrreq(so, req, m, nam, control, p); 227 } 228 229 splx(s); 230 return (error); 231 } 232 233 int 234 route_ctloutput(int op, struct socket *so, int level, int optname, 235 struct mbuf **mp) 236 { 237 struct routecb *rop = sotoroutecb(so); 238 struct mbuf *m = *mp; 239 int error = 0; 240 unsigned int tid; 241 242 if (level != AF_ROUTE) { 243 error = EINVAL; 244 if (op == PRCO_SETOPT && *mp) 245 m_free(*mp); 246 return (error); 247 } 248 249 switch (op) { 250 case PRCO_SETOPT: 251 switch (optname) { 252 case ROUTE_MSGFILTER: 253 if (m == NULL || m->m_len != sizeof(unsigned int)) 254 error = EINVAL; 255 else 256 rop->msgfilter = *mtod(m, unsigned int *); 257 break; 258 case ROUTE_TABLEFILTER: 259 if (m == NULL || m->m_len != sizeof(unsigned int)) { 260 error = EINVAL; 261 break; 262 } 263 tid = *mtod(m, unsigned int *); 264 if (tid != RTABLE_ANY && !rtable_exists(tid)) 265 error = ENOENT; 266 else 267 rop->rtableid = tid; 268 break; 269 default: 270 error = ENOPROTOOPT; 271 break; 272 } 273 if (m) 274 m_free(m); 275 break; 276 case PRCO_GETOPT: 277 switch (optname) { 278 case ROUTE_MSGFILTER: 279 *mp = m = m_get(M_WAIT, MT_SOOPTS); 280 m->m_len = sizeof(unsigned int); 281 *mtod(m, unsigned int *) = rop->msgfilter; 282 break; 283 case ROUTE_TABLEFILTER: 284 *mp = m = m_get(M_WAIT, MT_SOOPTS); 285 m->m_len = sizeof(unsigned int); 286 *mtod(m, unsigned int *) = rop->rtableid; 287 break; 288 default: 289 error = ENOPROTOOPT; 290 break; 291 } 292 } 293 return (error); 294 } 295 296 void 297 rt_senddesync(void *data) 298 { 299 struct rawcb *rp; 300 struct routecb *rop; 301 struct mbuf *desync_mbuf; 302 303 rp = (struct rawcb *)data; 304 rop = (struct routecb *)rp; 305 306 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ 307 if ((rop->flags & ROUTECB_FLAG_DESYNC) != 0) { 308 /* 309 * If we fail to alloc memory or if sbappendaddr() 310 * fails, re-add timeout and try again. 311 */ 312 desync_mbuf = rt_msg1(RTM_DESYNC, NULL); 313 if ((desync_mbuf != NULL) && 314 (sbappendaddr(&rp->rcb_socket->so_rcv, &route_src, 315 desync_mbuf, (struct mbuf *)NULL) != 0)) { 316 rop->flags &= ~ROUTECB_FLAG_DESYNC; 317 sorwakeup(rp->rcb_socket); 318 } else { 319 m_freem(desync_mbuf); 320 /* Re-add timeout to try sending msg again */ 321 timeout_add(&rop->timeout, ROUTE_DESYNC_RESEND_TIMEOUT); 322 } 323 } 324 } 325 326 void 327 route_input(struct mbuf *m0, ...) 328 { 329 struct rawcb *rp; 330 struct routecb *rop; 331 struct rt_msghdr *rtm; 332 struct mbuf *m = m0; 333 int sockets = 0; 334 struct socket *last = NULL; 335 va_list ap; 336 struct sockproto *proto; 337 struct sockaddr *sosrc, *sodst; 338 339 va_start(ap, m0); 340 proto = va_arg(ap, struct sockproto *); 341 sosrc = va_arg(ap, struct sockaddr *); 342 sodst = va_arg(ap, struct sockaddr *); 343 va_end(ap); 344 345 /* ensure that we can access the rtm_type via mtod() */ 346 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { 347 m_freem(m); 348 return; 349 } 350 351 LIST_FOREACH(rp, &rawcb, rcb_list) { 352 if (rp->rcb_socket->so_state & SS_CANTRCVMORE) 353 continue; 354 if (rp->rcb_proto.sp_family != proto->sp_family) 355 continue; 356 if (rp->rcb_proto.sp_protocol && proto->sp_protocol && 357 rp->rcb_proto.sp_protocol != proto->sp_protocol) 358 continue; 359 /* 360 * We assume the lower level routines have 361 * placed the address in a canonical format 362 * suitable for a structure comparison. 363 * 364 * Note that if the lengths are not the same 365 * the comparison will fail at the first byte. 366 */ 367 #define equal(a1, a2) \ 368 (bcmp((caddr_t)(a1), (caddr_t)(a2), a1->sa_len) == 0) 369 if (rp->rcb_laddr && !equal(rp->rcb_laddr, sodst)) 370 continue; 371 if (rp->rcb_faddr && !equal(rp->rcb_faddr, sosrc)) 372 continue; 373 374 /* filter messages that the process does not want */ 375 rop = (struct routecb *)rp; 376 rtm = mtod(m, struct rt_msghdr *); 377 /* but RTM_DESYNC can't be filtered */ 378 if (rtm->rtm_type != RTM_DESYNC && rop->msgfilter != 0 && 379 !(rop->msgfilter & (1 << rtm->rtm_type))) 380 continue; 381 switch (rtm->rtm_type) { 382 case RTM_IFANNOUNCE: 383 case RTM_DESYNC: 384 /* no tableid */ 385 break; 386 case RTM_RESOLVE: 387 case RTM_NEWADDR: 388 case RTM_DELADDR: 389 case RTM_IFINFO: 390 /* check against rdomain id */ 391 if (rop->rtableid != RTABLE_ANY && 392 rtable_l2(rop->rtableid) != rtm->rtm_tableid) 393 continue; 394 break; 395 default: 396 /* check against rtable id */ 397 if (rop->rtableid != RTABLE_ANY && 398 rop->rtableid != rtm->rtm_tableid) 399 continue; 400 break; 401 } 402 403 /* 404 * Check to see if the flush flag is set. If so, don't queue 405 * any more messages until the flag is cleared. 406 */ 407 if ((rop->flags & ROUTECB_FLAG_FLUSH) != 0) 408 continue; 409 410 if (last) { 411 struct mbuf *n; 412 if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { 413 if (sbspace(&last->so_rcv) < (2 * MSIZE) || 414 sbappendaddr(&last->so_rcv, sosrc, 415 n, (struct mbuf *)NULL) == 0) { 416 /* 417 * Flag socket as desync'ed and 418 * flush required 419 */ 420 sotoroutecb(last)->flags |= 421 ROUTECB_FLAG_DESYNC | 422 ROUTECB_FLAG_FLUSH; 423 rt_senddesync((void *) sotorawcb(last)); 424 m_freem(n); 425 } else { 426 sorwakeup(last); 427 sockets++; 428 } 429 } 430 } 431 last = rp->rcb_socket; 432 } 433 if (last) { 434 if (sbspace(&last->so_rcv) < (2 * MSIZE) || 435 sbappendaddr(&last->so_rcv, sosrc, 436 m, (struct mbuf *)NULL) == 0) { 437 /* Flag socket as desync'ed and flush required */ 438 sotoroutecb(last)->flags |= 439 ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; 440 rt_senddesync((void *) sotorawcb(last)); 441 m_freem(m); 442 } else { 443 sorwakeup(last); 444 sockets++; 445 } 446 } else 447 m_freem(m); 448 } 449 450 int 451 route_output(struct mbuf *m, ...) 452 { 453 struct rt_msghdr *rtm = NULL; 454 struct rtentry *rt = NULL; 455 struct rtentry *saved_nrt = NULL; 456 struct rt_addrinfo info; 457 int plen, len, newgate = 0, error = 0; 458 struct ifnet *ifp = NULL; 459 struct ifaddr *ifa = NULL; 460 struct socket *so; 461 struct rawcb *rp = NULL; 462 struct sockaddr_rtlabel sa_rl; 463 struct sockaddr_in6 sa_mask; 464 #ifdef MPLS 465 struct sockaddr_mpls sa_mpls, *psa_mpls; 466 #endif 467 va_list ap; 468 u_int tableid; 469 u_int8_t prio; 470 u_char vers; 471 472 va_start(ap, m); 473 so = va_arg(ap, struct socket *); 474 va_end(ap); 475 476 info.rti_info[RTAX_DST] = NULL; /* for error handling (goto flush) */ 477 if (m == NULL || ((m->m_len < sizeof(int32_t)) && 478 (m = m_pullup(m, sizeof(int32_t))) == 0)) 479 return (ENOBUFS); 480 if ((m->m_flags & M_PKTHDR) == 0) 481 panic("route_output"); 482 len = m->m_pkthdr.len; 483 if (len < offsetof(struct rt_msghdr, rtm_type) + 1 || 484 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 485 error = EINVAL; 486 goto fail; 487 } 488 vers = mtod(m, struct rt_msghdr *)->rtm_version; 489 switch (vers) { 490 case RTM_VERSION: 491 if (len < sizeof(struct rt_msghdr)) { 492 error = EINVAL; 493 goto fail; 494 } 495 if (len > RTM_MAXSIZE) { 496 error = EMSGSIZE; 497 goto fail; 498 } 499 rtm = malloc(len, M_RTABLE, M_NOWAIT); 500 if (rtm == NULL) { 501 error = ENOBUFS; 502 goto fail; 503 } 504 m_copydata(m, 0, len, (caddr_t)rtm); 505 break; 506 default: 507 error = EPROTONOSUPPORT; 508 goto fail; 509 } 510 rtm->rtm_pid = curproc->p_p->ps_pid; 511 if (rtm->rtm_hdrlen == 0) /* old client */ 512 rtm->rtm_hdrlen = sizeof(struct rt_msghdr); 513 if (len < rtm->rtm_hdrlen) { 514 error = EINVAL; 515 goto fail; 516 } 517 518 /* Verify that the caller is sending an appropriate message early */ 519 switch (rtm->rtm_type) { 520 case RTM_ADD: 521 case RTM_DELETE: 522 case RTM_GET: 523 case RTM_CHANGE: 524 case RTM_LOCK: 525 break; 526 default: 527 error = EOPNOTSUPP; 528 goto fail; 529 } 530 531 /* 532 * Verify that the caller has the appropriate privilege; RTM_GET 533 * is the only operation the non-superuser is allowed. 534 */ 535 if (rtm->rtm_type != RTM_GET && suser(curproc, 0) != 0) { 536 error = EACCES; 537 goto fail; 538 } 539 540 tableid = rtm->rtm_tableid; 541 if (!rtable_exists(tableid)) { 542 if (rtm->rtm_type == RTM_ADD) { 543 if ((error = rtable_add(tableid)) != 0) 544 goto flush; 545 } else { 546 error = EINVAL; 547 goto flush; 548 } 549 } 550 551 552 /* Do not let userland play with kernel-only flags. */ 553 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) { 554 error = EINVAL; 555 goto fail; 556 } 557 558 /* make sure that kernel-only bits are not set */ 559 rtm->rtm_priority &= RTP_MASK; 560 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED); 561 rtm->rtm_fmask &= RTF_FMASK; 562 563 if (rtm->rtm_priority != 0) { 564 if (rtm->rtm_priority > RTP_MAX || 565 rtm->rtm_priority == RTP_LOCAL) { 566 error = EINVAL; 567 goto fail; 568 } 569 prio = rtm->rtm_priority; 570 } else if (rtm->rtm_type != RTM_ADD) 571 prio = RTP_ANY; 572 else if (rtm->rtm_flags & RTF_STATIC) 573 prio = 0; 574 else 575 prio = RTP_DEFAULT; 576 577 bzero(&info, sizeof(info)); 578 info.rti_addrs = rtm->rtm_addrs; 579 rt_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, len + (caddr_t)rtm, &info); 580 info.rti_flags = rtm->rtm_flags; 581 if (info.rti_info[RTAX_DST] == NULL || 582 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 583 (info.rti_info[RTAX_GATEWAY] != NULL && 584 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) || 585 info.rti_info[RTAX_GENMASK] != NULL) { 586 error = EINVAL; 587 goto flush; 588 } 589 #ifdef MPLS 590 info.rti_mpls = rtm->rtm_mpls; 591 #endif 592 593 if (info.rti_info[RTAX_GATEWAY] != NULL && 594 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && 595 (info.rti_flags & RTF_CLONING) == 0) { 596 info.rti_flags |= RTF_LLINFO; 597 } 598 599 switch (rtm->rtm_type) { 600 case RTM_ADD: 601 if (info.rti_info[RTAX_GATEWAY] == NULL) { 602 error = EINVAL; 603 goto flush; 604 } 605 606 rt = rtable_match(tableid, info.rti_info[RTAX_DST], NULL); 607 if ((error = route_arp_conflict(rt, &info))) { 608 rtfree(rt); 609 rt = NULL; 610 goto flush; 611 } 612 613 /* 614 * We cannot go through a delete/create/insert cycle for 615 * cached route because this can lead to races in the 616 * receive path. Instead we upade the L2 cache. 617 */ 618 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) 619 goto change; 620 621 rtfree(rt); 622 rt = NULL; 623 624 error = rtrequest(RTM_ADD, &info, prio, &saved_nrt, tableid); 625 if (error == 0) { 626 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 627 &saved_nrt->rt_rmx); 628 /* write back the priority the kernel used */ 629 rtm->rtm_priority = saved_nrt->rt_priority & RTP_MASK; 630 rtm->rtm_index = saved_nrt->rt_ifidx; 631 rtm->rtm_flags = saved_nrt->rt_flags; 632 rtfree(saved_nrt); 633 } 634 break; 635 case RTM_DELETE: 636 if (!rtable_exists(tableid)) { 637 error = EAFNOSUPPORT; 638 goto flush; 639 } 640 641 rt = rtable_lookup(tableid, info.rti_info[RTAX_DST], 642 info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY], 643 prio); 644 645 /* 646 * Invalidate the cache of automagically created and 647 * referenced L2 entries to make sure that ``rt_gwroute'' 648 * pointer stays valid for other CPUs. 649 */ 650 if ((rt != NULL) && (ISSET(rt->rt_flags, RTF_CACHED))) { 651 ifp = if_get(rt->rt_ifidx); 652 KASSERT(ifp != NULL); 653 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt); 654 if_put(ifp); 655 /* Reset the MTU of the gateway route. */ 656 rtable_walk(tableid, rt_key(rt)->sa_family, 657 route_cleargateway, rt); 658 goto report; 659 } 660 rtfree(rt); 661 rt = NULL; 662 663 error = rtrequest(RTM_DELETE, &info, prio, &rt, tableid); 664 if (error == 0) 665 goto report; 666 break; 667 case RTM_GET: 668 case RTM_CHANGE: 669 case RTM_LOCK: 670 if (!rtable_exists(tableid)) { 671 error = EAFNOSUPPORT; 672 goto flush; 673 } 674 675 rt = rtable_lookup(tableid, info.rti_info[RTAX_DST], 676 info.rti_info[RTAX_NETMASK], info.rti_info[RTAX_GATEWAY], 677 prio); 678 #ifndef SMALL_KERNEL 679 /* 680 * If we got multipath routes, we require users to specify 681 * a matching gateway, except for RTM_GET. 682 */ 683 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) && 684 (info.rti_info[RTAX_GATEWAY] == NULL) && 685 (rtm->rtm_type != RTM_GET)) { 686 rtfree(rt); 687 rt = NULL; 688 } 689 #endif 690 /* 691 * If RTAX_GATEWAY is the argument we're trying to 692 * change, try to find a compatible route. 693 */ 694 if ((rt == NULL) && (info.rti_info[RTAX_GATEWAY] != NULL) && 695 (rtm->rtm_type == RTM_CHANGE)) { 696 rt = rtable_lookup(tableid, info.rti_info[RTAX_DST], 697 info.rti_info[RTAX_NETMASK], NULL, prio); 698 #ifndef SMALL_KERNEL 699 /* Ensure we don't pick a multipath one. */ 700 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) { 701 rtfree(rt); 702 rt = NULL; 703 } 704 #endif 705 } 706 707 if (rt == NULL) { 708 error = ESRCH; 709 goto flush; 710 } 711 712 /* 713 * RTM_CHANGE/LOCK need a perfect match. 714 */ 715 plen = rtable_satoplen(info.rti_info[RTAX_DST]->sa_family, 716 info.rti_info[RTAX_NETMASK]); 717 if (rtm->rtm_type != RTM_GET && rt_plen(rt) != plen ) { 718 error = ESRCH; 719 goto flush; 720 } 721 722 switch (rtm->rtm_type) { 723 case RTM_GET: 724 report: 725 info.rti_info[RTAX_DST] = rt_key(rt); 726 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 727 info.rti_info[RTAX_NETMASK] = 728 rt_plen2mask(rt, &sa_mask); 729 info.rti_info[RTAX_LABEL] = 730 rtlabel_id2sa(rt->rt_labelid, &sa_rl); 731 #ifdef MPLS 732 if (rt->rt_flags & RTF_MPLS) { 733 bzero(&sa_mpls, sizeof(sa_mpls)); 734 sa_mpls.smpls_family = AF_MPLS; 735 sa_mpls.smpls_len = sizeof(sa_mpls); 736 sa_mpls.smpls_label = ((struct rt_mpls *) 737 rt->rt_llinfo)->mpls_label; 738 info.rti_info[RTAX_SRC] = 739 (struct sockaddr *)&sa_mpls; 740 info.rti_mpls = ((struct rt_mpls *) 741 rt->rt_llinfo)->mpls_operation; 742 rtm->rtm_mpls = info.rti_mpls; 743 } 744 #endif 745 info.rti_info[RTAX_IFP] = NULL; 746 info.rti_info[RTAX_IFA] = NULL; 747 ifp = if_get(rt->rt_ifidx); 748 if (ifp != NULL && rtm->rtm_addrs & (RTA_IFP|RTA_IFA)) { 749 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 750 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 751 if (ifp->if_flags & IFF_POINTOPOINT) 752 info.rti_info[RTAX_BRD] = 753 rt->rt_ifa->ifa_dstaddr; 754 else 755 info.rti_info[RTAX_BRD] = NULL; 756 } 757 if_put(ifp); 758 len = rt_msg2(rtm->rtm_type, RTM_VERSION, &info, NULL, 759 NULL); 760 if (len > rtm->rtm_msglen) { 761 struct rt_msghdr *new_rtm; 762 new_rtm = malloc(len, M_RTABLE, M_NOWAIT); 763 if (new_rtm == NULL) { 764 error = ENOBUFS; 765 goto flush; 766 } 767 memcpy(new_rtm, rtm, rtm->rtm_msglen); 768 free(rtm, M_RTABLE, 0); 769 rtm = new_rtm; 770 } 771 rt_msg2(rtm->rtm_type, RTM_VERSION, &info, (caddr_t)rtm, 772 NULL); 773 rtm->rtm_flags = rt->rt_flags; 774 rtm->rtm_use = 0; 775 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 776 rtm->rtm_index = rt->rt_ifidx; 777 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 778 rtm->rtm_addrs = info.rti_addrs; 779 break; 780 781 case RTM_CHANGE: 782 if (info.rti_info[RTAX_GATEWAY] != NULL) 783 if (rt->rt_gateway == NULL || 784 bcmp(rt->rt_gateway, 785 info.rti_info[RTAX_GATEWAY], 786 info.rti_info[RTAX_GATEWAY]->sa_len)) { 787 newgate = 1; 788 } 789 /* 790 * Check reachable gateway before changing the route. 791 * New gateway could require new ifaddr, ifp; 792 * flags may also be different; ifp may be specified 793 * by ll sockaddr when protocol address is ambiguous. 794 */ 795 if (newgate || info.rti_info[RTAX_IFP] != NULL || 796 info.rti_info[RTAX_IFA] != NULL) { 797 if ((error = rt_getifa(&info, tableid)) != 0) 798 goto flush; 799 ifa = info.rti_ifa; 800 if (rt->rt_ifa != ifa) { 801 ifp = if_get(rt->rt_ifidx); 802 KASSERT(ifp != NULL); 803 ifp->if_rtrequest(ifp, RTM_DELETE, rt); 804 ifafree(rt->rt_ifa); 805 if_put(ifp); 806 807 ifa->ifa_refcnt++; 808 rt->rt_ifa = ifa; 809 rt->rt_ifidx = ifa->ifa_ifp->if_index; 810 #ifndef SMALL_KERNEL 811 /* recheck link state after ifp change*/ 812 rt_if_linkstate_change(rt, ifa->ifa_ifp, 813 tableid); 814 #endif 815 } 816 } 817 change: 818 if (info.rti_info[RTAX_GATEWAY] != NULL && (error = 819 rt_setgate(rt, info.rti_info[RTAX_GATEWAY], 820 tableid))) 821 goto flush; 822 #ifdef MPLS 823 if ((rtm->rtm_flags & RTF_MPLS) && 824 info.rti_info[RTAX_SRC] != NULL) { 825 struct rt_mpls *rt_mpls; 826 827 psa_mpls = (struct sockaddr_mpls *) 828 info.rti_info[RTAX_SRC]; 829 830 if (rt->rt_llinfo == NULL) { 831 rt->rt_llinfo = 832 malloc(sizeof(struct rt_mpls), 833 M_TEMP, M_NOWAIT|M_ZERO); 834 } 835 if (rt->rt_llinfo == NULL) { 836 error = ENOMEM; 837 goto flush; 838 } 839 840 rt_mpls = (struct rt_mpls *)rt->rt_llinfo; 841 842 if (psa_mpls != NULL) { 843 rt_mpls->mpls_label = 844 psa_mpls->smpls_label; 845 } 846 847 rt_mpls->mpls_operation = info.rti_mpls; 848 849 /* XXX: set experimental bits */ 850 851 rt->rt_flags |= RTF_MPLS; 852 } else if (newgate || ((rtm->rtm_fmask & RTF_MPLS) && 853 !(rtm->rtm_flags & RTF_MPLS))) { 854 /* if gateway changed remove MPLS information */ 855 if (rt->rt_llinfo != NULL && 856 rt->rt_flags & RTF_MPLS) { 857 free(rt->rt_llinfo, M_TEMP, 0); 858 rt->rt_llinfo = NULL; 859 rt->rt_flags &= ~RTF_MPLS; 860 } 861 } 862 #endif 863 /* Hack to allow some flags to be toggled */ 864 if (rtm->rtm_fmask) 865 rt->rt_flags = 866 (rt->rt_flags & ~rtm->rtm_fmask) | 867 (rtm->rtm_flags & rtm->rtm_fmask); 868 869 rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 870 &rt->rt_rmx); 871 rtm->rtm_index = rt->rt_ifidx; 872 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 873 rtm->rtm_flags = rt->rt_flags; 874 875 ifp = if_get(rt->rt_ifidx); 876 KASSERT(ifp != NULL); 877 ifp->if_rtrequest(ifp, RTM_ADD, rt); 878 if_put(ifp); 879 880 if (info.rti_info[RTAX_LABEL] != NULL) { 881 char *rtlabel = ((struct sockaddr_rtlabel *) 882 info.rti_info[RTAX_LABEL])->sr_label; 883 rtlabel_unref(rt->rt_labelid); 884 rt->rt_labelid = rtlabel_name2id(rtlabel); 885 } 886 if_group_routechange(info.rti_info[RTAX_DST], 887 info.rti_info[RTAX_NETMASK]); 888 /* FALLTHROUGH */ 889 case RTM_LOCK: 890 rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); 891 rt->rt_rmx.rmx_locks |= 892 (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); 893 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 894 break; 895 } 896 break; 897 } 898 899 flush: 900 if (rtm) { 901 if (error) 902 rtm->rtm_errno = error; 903 else { 904 rtm->rtm_flags |= RTF_DONE; 905 } 906 } 907 if (info.rti_info[RTAX_DST]) 908 route_proto.sp_protocol = info.rti_info[RTAX_DST]->sa_family; 909 if (rt) 910 rtfree(rt); 911 912 /* 913 * Check to see if we don't want our own messages. 914 */ 915 if (!(so->so_options & SO_USELOOPBACK)) { 916 if (route_cb.any_count <= 1) { 917 fail: 918 free(rtm, M_RTABLE, 0); 919 m_freem(m); 920 return (error); 921 } 922 /* There is another listener, so construct message */ 923 rp = sotorawcb(so); 924 } 925 if (rp) 926 rp->rcb_proto.sp_family = 0; /* Avoid us */ 927 if (rtm) { 928 if (m_copyback(m, 0, rtm->rtm_msglen, rtm, M_NOWAIT)) { 929 m_freem(m); 930 m = NULL; 931 } else if (m->m_pkthdr.len > rtm->rtm_msglen) 932 m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); 933 free(rtm, M_RTABLE, 0); 934 } 935 if (m) 936 route_input(m, &route_proto, &route_src, &route_dst); 937 if (rp) 938 rp->rcb_proto.sp_family = PF_ROUTE; 939 940 return (error); 941 } 942 943 int 944 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid) 945 { 946 struct rtentry *nhrt = arg; 947 948 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt && 949 !ISSET(rt->rt_locks, RTV_MTU)) 950 rt->rt_mtu = 0; 951 952 return (0); 953 } 954 955 /* 956 * Check if the user request to insert an ARP entry does not conflict 957 * with existing ones. 958 * 959 * Only two entries are allowed for a given IP address: a private one 960 * (priv) and a public one (pub). 961 */ 962 int 963 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info) 964 { 965 #if defined(ART) && !defined(SMALL_KERNEL) 966 int proxy = (info->rti_flags & RTF_ANNOUNCE); 967 968 if ((info->rti_flags & RTF_LLINFO) == 0 || 969 (info->rti_info[RTAX_DST]->sa_family != AF_INET)) 970 return (0); 971 972 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO)) 973 return (0); 974 975 /* If the entry is cached, it can be updated. */ 976 if (ISSET(rt->rt_flags, RTF_CACHED)) 977 return (0); 978 979 /* 980 * Same destination, not cached and both "priv" or "pub" conflict. 981 * If a second entry exists, it always conflict. 982 */ 983 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) || 984 (rtable_mpath_next(rt) != NULL)) 985 return (EEXIST); 986 987 /* No conflict but an entry exist so we need to force mpath. */ 988 info->rti_flags |= RTF_MPATH; 989 #endif /* ART && !SMALL_KERNEL */ 990 return (0); 991 } 992 993 void 994 rt_setmetrics(u_long which, const struct rt_metrics *in, 995 struct rt_kmetrics *out) 996 { 997 int64_t expire; 998 999 if (which & RTV_MTU) 1000 out->rmx_mtu = in->rmx_mtu; 1001 if (which & RTV_EXPIRE) { 1002 expire = in->rmx_expire; 1003 if (expire != 0) { 1004 expire -= time_second; 1005 expire += time_uptime; 1006 } 1007 1008 out->rmx_expire = expire; 1009 } 1010 /* RTV_PRIORITY handled before */ 1011 } 1012 1013 void 1014 rt_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out) 1015 { 1016 int64_t expire; 1017 1018 expire = in->rmx_expire; 1019 if (expire != 0) { 1020 expire -= time_uptime; 1021 expire += time_second; 1022 } 1023 1024 bzero(out, sizeof(*out)); 1025 out->rmx_locks = in->rmx_locks; 1026 out->rmx_mtu = in->rmx_mtu; 1027 out->rmx_expire = expire; 1028 out->rmx_pksent = in->rmx_pksent; 1029 } 1030 1031 #define ROUNDUP(a) \ 1032 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 1033 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) 1034 1035 void 1036 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 1037 { 1038 struct sockaddr *sa; 1039 int i; 1040 1041 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); 1042 for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) { 1043 if ((rtinfo->rti_addrs & (1 << i)) == 0) 1044 continue; 1045 rtinfo->rti_info[i] = sa = (struct sockaddr *)cp; 1046 ADVANCE(cp, sa); 1047 } 1048 } 1049 1050 struct mbuf * 1051 rt_msg1(int type, struct rt_addrinfo *rtinfo) 1052 { 1053 struct rt_msghdr *rtm; 1054 struct mbuf *m; 1055 int i; 1056 struct sockaddr *sa; 1057 int len, dlen, hlen; 1058 1059 switch (type) { 1060 case RTM_DELADDR: 1061 case RTM_NEWADDR: 1062 len = sizeof(struct ifa_msghdr); 1063 break; 1064 case RTM_IFINFO: 1065 len = sizeof(struct if_msghdr); 1066 break; 1067 case RTM_IFANNOUNCE: 1068 len = sizeof(struct if_announcemsghdr); 1069 break; 1070 default: 1071 len = sizeof(struct rt_msghdr); 1072 break; 1073 } 1074 if (len > MCLBYTES) 1075 panic("rt_msg1"); 1076 m = m_gethdr(M_DONTWAIT, MT_DATA); 1077 if (m && len > MHLEN) { 1078 MCLGET(m, M_DONTWAIT); 1079 if ((m->m_flags & M_EXT) == 0) { 1080 m_free(m); 1081 m = NULL; 1082 } 1083 } 1084 if (m == NULL) 1085 return (m); 1086 m->m_pkthdr.len = m->m_len = hlen = len; 1087 m->m_pkthdr.ph_ifidx = 0; 1088 rtm = mtod(m, struct rt_msghdr *); 1089 bzero(rtm, len); 1090 for (i = 0; i < RTAX_MAX; i++) { 1091 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1092 continue; 1093 rtinfo->rti_addrs |= (1 << i); 1094 dlen = ROUNDUP(sa->sa_len); 1095 if (m_copyback(m, len, dlen, sa, M_NOWAIT)) { 1096 m_freem(m); 1097 return (NULL); 1098 } 1099 len += dlen; 1100 } 1101 rtm->rtm_msglen = len; 1102 rtm->rtm_hdrlen = hlen; 1103 rtm->rtm_version = RTM_VERSION; 1104 rtm->rtm_type = type; 1105 return (m); 1106 } 1107 1108 int 1109 rt_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp, 1110 struct walkarg *w) 1111 { 1112 int i; 1113 int len, dlen, hlen, second_time = 0; 1114 caddr_t cp0; 1115 1116 rtinfo->rti_addrs = 0; 1117 again: 1118 switch (type) { 1119 case RTM_DELADDR: 1120 case RTM_NEWADDR: 1121 len = sizeof(struct ifa_msghdr); 1122 break; 1123 case RTM_IFINFO: 1124 len = sizeof(struct if_msghdr); 1125 break; 1126 default: 1127 len = sizeof(struct rt_msghdr); 1128 break; 1129 } 1130 hlen = len; 1131 if ((cp0 = cp) != NULL) 1132 cp += len; 1133 for (i = 0; i < RTAX_MAX; i++) { 1134 struct sockaddr *sa; 1135 1136 if ((sa = rtinfo->rti_info[i]) == NULL) 1137 continue; 1138 rtinfo->rti_addrs |= (1 << i); 1139 dlen = ROUNDUP(sa->sa_len); 1140 if (cp) { 1141 bcopy(sa, cp, (size_t)dlen); 1142 cp += dlen; 1143 } 1144 len += dlen; 1145 } 1146 /* align message length to the next natural boundary */ 1147 len = ALIGN(len); 1148 if (cp == 0 && w != NULL && !second_time) { 1149 struct walkarg *rw = w; 1150 1151 rw->w_needed += len; 1152 if (rw->w_needed <= 0 && rw->w_where) { 1153 if (rw->w_tmemsize < len) { 1154 free(rw->w_tmem, M_RTABLE, 0); 1155 rw->w_tmem = malloc(len, M_RTABLE, M_NOWAIT); 1156 if (rw->w_tmem) 1157 rw->w_tmemsize = len; 1158 } 1159 if (rw->w_tmem) { 1160 cp = rw->w_tmem; 1161 second_time = 1; 1162 goto again; 1163 } else 1164 rw->w_where = 0; 1165 } 1166 } 1167 if (cp && w) /* clear the message header */ 1168 bzero(cp0, hlen); 1169 1170 if (cp) { 1171 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 1172 1173 rtm->rtm_version = RTM_VERSION; 1174 rtm->rtm_type = type; 1175 rtm->rtm_msglen = len; 1176 rtm->rtm_hdrlen = hlen; 1177 } 1178 return (len); 1179 } 1180 1181 /* 1182 * This routine is called to generate a message from the routing 1183 * socket indicating that a redirect has occurred, a routing lookup 1184 * has failed, or that a protocol has detected timeouts to a particular 1185 * destination. 1186 */ 1187 void 1188 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio, 1189 u_int ifidx, int error, u_int tableid) 1190 { 1191 struct rt_msghdr *rtm; 1192 struct mbuf *m; 1193 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 1194 1195 if (route_cb.any_count == 0) 1196 return; 1197 m = rt_msg1(type, rtinfo); 1198 if (m == NULL) 1199 return; 1200 rtm = mtod(m, struct rt_msghdr *); 1201 rtm->rtm_flags = RTF_DONE | flags; 1202 rtm->rtm_priority = prio; 1203 rtm->rtm_errno = error; 1204 rtm->rtm_tableid = tableid; 1205 rtm->rtm_addrs = rtinfo->rti_addrs; 1206 rtm->rtm_index = ifidx; 1207 if (sa == NULL) 1208 route_proto.sp_protocol = 0; 1209 else 1210 route_proto.sp_protocol = sa->sa_family; 1211 route_input(m, &route_proto, &route_src, &route_dst); 1212 } 1213 1214 /* 1215 * This routine is called to generate a message from the routing 1216 * socket indicating that the status of a network interface has changed. 1217 */ 1218 void 1219 rt_ifmsg(struct ifnet *ifp) 1220 { 1221 struct if_msghdr *ifm; 1222 struct mbuf *m; 1223 1224 if (route_cb.any_count == 0) 1225 return; 1226 m = rt_msg1(RTM_IFINFO, NULL); 1227 if (m == NULL) 1228 return; 1229 ifm = mtod(m, struct if_msghdr *); 1230 ifm->ifm_index = ifp->if_index; 1231 ifm->ifm_tableid = ifp->if_rdomain; 1232 ifm->ifm_flags = ifp->if_flags; 1233 ifm->ifm_xflags = ifp->if_xflags; 1234 ifm->ifm_data = ifp->if_data; 1235 ifm->ifm_addrs = 0; 1236 route_proto.sp_protocol = 0; 1237 route_input(m, &route_proto, &route_src, &route_dst); 1238 } 1239 1240 /* 1241 * This is called to generate messages from the routing socket 1242 * indicating a network interface has had addresses associated with it. 1243 * if we ever reverse the logic and replace messages TO the routing 1244 * socket indicate a request to configure interfaces, then it will 1245 * be unnecessary as the routing socket will automatically generate 1246 * copies of it. 1247 */ 1248 void 1249 rt_sendaddrmsg(struct rtentry *rt, int cmd, struct ifaddr *ifa) 1250 { 1251 struct ifnet *ifp = ifa->ifa_ifp; 1252 struct mbuf *m = NULL; 1253 struct rt_addrinfo info; 1254 struct ifa_msghdr *ifam; 1255 1256 if (route_cb.any_count == 0) 1257 return; 1258 1259 memset(&info, 0, sizeof(info)); 1260 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1261 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1262 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1263 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1264 if ((m = rt_msg1(cmd, &info)) == NULL) 1265 return; 1266 ifam = mtod(m, struct ifa_msghdr *); 1267 ifam->ifam_index = ifp->if_index; 1268 ifam->ifam_metric = ifa->ifa_metric; 1269 ifam->ifam_flags = ifa->ifa_flags; 1270 ifam->ifam_addrs = info.rti_addrs; 1271 ifam->ifam_tableid = ifp->if_rdomain; 1272 1273 if (ifa->ifa_addr == NULL) 1274 route_proto.sp_protocol = 0; 1275 else 1276 route_proto.sp_protocol = ifa->ifa_addr->sa_family; 1277 route_input(m, &route_proto, &route_src, &route_dst); 1278 } 1279 1280 /* 1281 * This is called to generate routing socket messages indicating 1282 * network interface arrival and departure. 1283 */ 1284 void 1285 rt_ifannouncemsg(struct ifnet *ifp, int what) 1286 { 1287 struct if_announcemsghdr *ifan; 1288 struct mbuf *m; 1289 1290 if (route_cb.any_count == 0) 1291 return; 1292 m = rt_msg1(RTM_IFANNOUNCE, NULL); 1293 if (m == NULL) 1294 return; 1295 ifan = mtod(m, struct if_announcemsghdr *); 1296 ifan->ifan_index = ifp->if_index; 1297 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); 1298 ifan->ifan_what = what; 1299 route_proto.sp_protocol = 0; 1300 route_input(m, &route_proto, &route_src, &route_dst); 1301 } 1302 1303 /* 1304 * This is used in dumping the kernel table via sysctl(). 1305 */ 1306 int 1307 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id) 1308 { 1309 struct walkarg *w = v; 1310 int error = 0, size; 1311 struct rt_addrinfo info; 1312 struct ifnet *ifp; 1313 #ifdef MPLS 1314 struct sockaddr_mpls sa_mpls; 1315 #endif 1316 struct sockaddr_rtlabel sa_rl; 1317 struct sockaddr_in6 sa_mask; 1318 1319 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1320 return 0; 1321 if (w->w_op == NET_RT_DUMP && w->w_arg) { 1322 u_int8_t prio = w->w_arg & RTP_MASK; 1323 if (w->w_arg < 0) { 1324 prio = (-w->w_arg) & RTP_MASK; 1325 /* Show all routes that are not this priority */ 1326 if (prio == (rt->rt_priority & RTP_MASK)) 1327 return 0; 1328 } else { 1329 if (prio != (rt->rt_priority & RTP_MASK) && 1330 prio != RTP_ANY) 1331 return 0; 1332 } 1333 } 1334 bzero(&info, sizeof(info)); 1335 info.rti_info[RTAX_DST] = rt_key(rt); 1336 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1337 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1338 ifp = if_get(rt->rt_ifidx); 1339 if (ifp != NULL) { 1340 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1341 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1342 if (ifp->if_flags & IFF_POINTOPOINT) 1343 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 1344 } 1345 if_put(ifp); 1346 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1347 #ifdef MPLS 1348 if (rt->rt_flags & RTF_MPLS) { 1349 bzero(&sa_mpls, sizeof(sa_mpls)); 1350 sa_mpls.smpls_family = AF_MPLS; 1351 sa_mpls.smpls_len = sizeof(sa_mpls); 1352 sa_mpls.smpls_label = ((struct rt_mpls *) 1353 rt->rt_llinfo)->mpls_label; 1354 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 1355 info.rti_mpls = ((struct rt_mpls *) 1356 rt->rt_llinfo)->mpls_operation; 1357 } 1358 #endif 1359 1360 size = rt_msg2(RTM_GET, RTM_VERSION, &info, NULL, w); 1361 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1362 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 1363 1364 rtm->rtm_pid = curproc->p_p->ps_pid; 1365 rtm->rtm_flags = rt->rt_flags; 1366 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 1367 rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 1368 /* Do not account the routing table's reference. */ 1369 rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1; 1370 rtm->rtm_index = rt->rt_ifidx; 1371 rtm->rtm_addrs = info.rti_addrs; 1372 rtm->rtm_tableid = id; 1373 #ifdef MPLS 1374 rtm->rtm_mpls = info.rti_mpls; 1375 #endif 1376 if ((error = copyout(rtm, w->w_where, size)) != 0) 1377 w->w_where = NULL; 1378 else 1379 w->w_where += size; 1380 } 1381 return (error); 1382 } 1383 1384 int 1385 sysctl_iflist(int af, struct walkarg *w) 1386 { 1387 struct ifnet *ifp; 1388 struct ifaddr *ifa; 1389 struct rt_addrinfo info; 1390 int len, error = 0; 1391 1392 bzero(&info, sizeof(info)); 1393 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1394 if (w->w_arg && w->w_arg != ifp->if_index) 1395 continue; 1396 /* Copy the link-layer address first */ 1397 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1398 len = rt_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w); 1399 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1400 struct if_msghdr *ifm; 1401 1402 ifm = (struct if_msghdr *)w->w_tmem; 1403 ifm->ifm_index = ifp->if_index; 1404 ifm->ifm_tableid = ifp->if_rdomain; 1405 ifm->ifm_flags = ifp->if_flags; 1406 ifm->ifm_data = ifp->if_data; 1407 ifm->ifm_addrs = info.rti_addrs; 1408 error = copyout(ifm, w->w_where, len); 1409 if (error) 1410 return (error); 1411 w->w_where += len; 1412 } 1413 info.rti_info[RTAX_IFP] = NULL; 1414 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 1415 KASSERT(ifa->ifa_addr->sa_family != AF_LINK); 1416 if (af && af != ifa->ifa_addr->sa_family) 1417 continue; 1418 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1419 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1420 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1421 len = rt_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w); 1422 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1423 struct ifa_msghdr *ifam; 1424 1425 ifam = (struct ifa_msghdr *)w->w_tmem; 1426 ifam->ifam_index = ifa->ifa_ifp->if_index; 1427 ifam->ifam_flags = ifa->ifa_flags; 1428 ifam->ifam_metric = ifa->ifa_metric; 1429 ifam->ifam_addrs = info.rti_addrs; 1430 error = copyout(w->w_tmem, w->w_where, len); 1431 if (error) 1432 return (error); 1433 w->w_where += len; 1434 } 1435 } 1436 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 1437 info.rti_info[RTAX_BRD] = NULL; 1438 } 1439 return (0); 1440 } 1441 1442 int 1443 sysctl_ifnames(struct walkarg *w) 1444 { 1445 struct if_nameindex_msg ifn; 1446 struct ifnet *ifp; 1447 int error = 0; 1448 1449 /* XXX ignore tableid for now */ 1450 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1451 if (w->w_arg && w->w_arg != ifp->if_index) 1452 continue; 1453 w->w_needed += sizeof(ifn); 1454 if (w->w_where && w->w_needed <= 0) { 1455 1456 memset(&ifn, 0, sizeof(ifn)); 1457 ifn.if_index = ifp->if_index; 1458 strlcpy(ifn.if_name, ifp->if_xname, 1459 sizeof(ifn.if_name)); 1460 error = copyout(&ifn, w->w_where, sizeof(ifn)); 1461 if (error) 1462 return (error); 1463 w->w_where += sizeof(ifn); 1464 } 1465 } 1466 1467 return (0); 1468 } 1469 1470 int 1471 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new, 1472 size_t newlen) 1473 { 1474 int i, s, error = EINVAL; 1475 u_char af; 1476 struct walkarg w; 1477 struct rt_tableinfo tableinfo; 1478 u_int tableid = 0; 1479 1480 if (new) 1481 return (EPERM); 1482 if (namelen < 3 || namelen > 4) 1483 return (EINVAL); 1484 af = name[0]; 1485 bzero(&w, sizeof(w)); 1486 w.w_where = where; 1487 w.w_given = *given; 1488 w.w_needed = 0 - w.w_given; 1489 w.w_op = name[1]; 1490 w.w_arg = name[2]; 1491 1492 if (namelen == 4) { 1493 tableid = name[3]; 1494 if (!rtable_exists(tableid)) 1495 return (ENOENT); 1496 } else 1497 tableid = curproc->p_p->ps_rtableid; 1498 1499 s = splsoftnet(); 1500 switch (w.w_op) { 1501 case NET_RT_DUMP: 1502 case NET_RT_FLAGS: 1503 for (i = 1; i <= AF_MAX; i++) { 1504 if (af != 0 && af != i) 1505 continue; 1506 1507 error = rtable_walk(tableid, i, sysctl_dumpentry, &w); 1508 if (error == EAFNOSUPPORT) 1509 error = 0; 1510 if (error) 1511 break; 1512 } 1513 break; 1514 1515 case NET_RT_IFLIST: 1516 error = sysctl_iflist(af, &w); 1517 break; 1518 1519 case NET_RT_STATS: 1520 error = sysctl_rdstruct(where, given, new, 1521 &rtstat, sizeof(rtstat)); 1522 splx(s); 1523 return (error); 1524 case NET_RT_TABLE: 1525 tableid = w.w_arg; 1526 if (!rtable_exists(tableid)) { 1527 splx(s); 1528 return (ENOENT); 1529 } 1530 tableinfo.rti_tableid = tableid; 1531 tableinfo.rti_domainid = rtable_l2(tableid); 1532 error = sysctl_rdstruct(where, given, new, 1533 &tableinfo, sizeof(tableinfo)); 1534 splx(s); 1535 return (error); 1536 case NET_RT_IFNAMES: 1537 error = sysctl_ifnames(&w); 1538 break; 1539 } 1540 splx(s); 1541 free(w.w_tmem, M_RTABLE, 0); 1542 w.w_needed += w.w_given; 1543 if (where) { 1544 *given = w.w_where - (caddr_t)where; 1545 if (*given < w.w_needed) 1546 return (ENOMEM); 1547 } else 1548 *given = (11 * w.w_needed) / 10; 1549 1550 return (error); 1551 } 1552 1553 /* 1554 * Definitions of protocols supported in the ROUTE domain. 1555 */ 1556 1557 extern struct domain routedomain; /* or at least forward */ 1558 1559 struct protosw routesw[] = { 1560 { SOCK_RAW, &routedomain, 0, PR_ATOMIC|PR_ADDR|PR_WANTRCVD, 1561 route_input, route_output, raw_ctlinput, route_ctloutput, 1562 route_usrreq, 1563 raw_init, 0, 0, 0, 1564 sysctl_rtable, 1565 } 1566 }; 1567 1568 struct domain routedomain = 1569 { PF_ROUTE, "route", route_init, 0, 0, 1570 routesw, &routesw[nitems(routesw)] }; 1571