1 /* $OpenBSD: rtsock.c,v 1.373 2023/12/03 10:51:17 mvs Exp $ */ 2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 62 */ 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/proc.h> 67 #include <sys/sysctl.h> 68 #include <sys/mbuf.h> 69 #include <sys/socket.h> 70 #include <sys/socketvar.h> 71 #include <sys/domain.h> 72 #include <sys/pool.h> 73 #include <sys/protosw.h> 74 #include <sys/srp.h> 75 76 #include <net/if.h> 77 #include <net/if_dl.h> 78 #include <net/if_var.h> 79 #include <net/route.h> 80 81 #include <netinet/in.h> 82 83 #ifdef MPLS 84 #include <netmpls/mpls.h> 85 #endif 86 #ifdef IPSEC 87 #include <netinet/ip_ipsp.h> 88 #include <net/if_enc.h> 89 #endif 90 #ifdef BFD 91 #include <net/bfd.h> 92 #endif 93 94 #include <sys/stdarg.h> 95 #include <sys/kernel.h> 96 #include <sys/timeout.h> 97 98 #define ROUTESNDQ 8192 99 #define ROUTERCVQ 8192 100 101 const struct sockaddr route_src = { 2, PF_ROUTE, }; 102 103 struct walkarg { 104 int w_op, w_arg, w_tmemsize; 105 size_t w_given, w_needed; 106 caddr_t w_where, w_tmem; 107 }; 108 109 void route_prinit(void); 110 void rcb_ref(void *, void *); 111 void rcb_unref(void *, void *); 112 int route_output(struct mbuf *, struct socket *); 113 int route_ctloutput(int, struct socket *, int, int, struct mbuf *); 114 int route_attach(struct socket *, int, int); 115 int route_detach(struct socket *); 116 int route_disconnect(struct socket *); 117 int route_shutdown(struct socket *); 118 void route_rcvd(struct socket *); 119 int route_send(struct socket *, struct mbuf *, struct mbuf *, 120 struct mbuf *); 121 int route_sockaddr(struct socket *, struct mbuf *); 122 int route_peeraddr(struct socket *, struct mbuf *); 123 void route_input(struct mbuf *m0, struct socket *, sa_family_t); 124 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); 125 int route_cleargateway(struct rtentry *, void *, unsigned int); 126 void rtm_senddesync_timer(void *); 127 void rtm_senddesync(struct socket *); 128 int rtm_sendup(struct socket *, struct mbuf *); 129 130 int rtm_getifa(struct rt_addrinfo *, unsigned int); 131 int rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *, 132 uint8_t, unsigned int); 133 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int); 134 struct mbuf *rtm_msg1(int, struct rt_addrinfo *); 135 int rtm_msg2(int, int, struct rt_addrinfo *, caddr_t, 136 struct walkarg *); 137 int rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); 138 int rtm_validate_proposal(struct rt_addrinfo *); 139 void rtm_setmetrics(u_long, const struct rt_metrics *, 140 struct rt_kmetrics *); 141 void rtm_getmetrics(const struct rtentry *, 142 struct rt_metrics *); 143 144 int sysctl_iflist(int, struct walkarg *); 145 int sysctl_ifnames(struct walkarg *); 146 int sysctl_rtable_rtstat(void *, size_t *, void *); 147 148 int rt_setsource(unsigned int, const struct sockaddr *); 149 150 /* 151 * Locks used to protect struct members 152 * I immutable after creation 153 * s solock 154 */ 155 struct rtpcb { 156 struct socket *rop_socket; /* [I] */ 157 158 SRPL_ENTRY(rtpcb) rop_list; 159 struct refcnt rop_refcnt; 160 struct timeout rop_timeout; 161 unsigned int rop_msgfilter; /* [s] */ 162 unsigned int rop_flagfilter; /* [s] */ 163 unsigned int rop_flags; /* [s] */ 164 u_int rop_rtableid; /* [s] */ 165 unsigned short rop_proto; /* [I] */ 166 u_char rop_priority; /* [s] */ 167 }; 168 #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb) 169 170 struct rtptable { 171 SRPL_HEAD(, rtpcb) rtp_list; 172 struct srpl_rc rtp_rc; 173 struct rwlock rtp_lk; 174 unsigned int rtp_count; 175 }; 176 177 struct pool rtpcb_pool; 178 struct rtptable rtptable; 179 180 /* 181 * These flags and timeout are used for indicating to userland (via a 182 * RTM_DESYNC msg) when the route socket has overflowed and messages 183 * have been lost. 184 */ 185 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */ 186 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before 187 queueing more packets */ 188 189 #define ROUTE_DESYNC_RESEND_TIMEOUT 200 /* In ms */ 190 191 void 192 route_prinit(void) 193 { 194 srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL); 195 rw_init(&rtptable.rtp_lk, "rtsock"); 196 SRPL_INIT(&rtptable.rtp_list); 197 pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0, 198 IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL); 199 } 200 201 void 202 rcb_ref(void *null, void *v) 203 { 204 struct rtpcb *rop = v; 205 206 refcnt_take(&rop->rop_refcnt); 207 } 208 209 void 210 rcb_unref(void *null, void *v) 211 { 212 struct rtpcb *rop = v; 213 214 refcnt_rele_wake(&rop->rop_refcnt); 215 } 216 217 int 218 route_attach(struct socket *so, int proto, int wait) 219 { 220 struct rtpcb *rop; 221 int error; 222 223 error = soreserve(so, ROUTESNDQ, ROUTERCVQ); 224 if (error) 225 return (error); 226 /* 227 * use the rawcb but allocate a rtpcb, this 228 * code does not care about the additional fields 229 * and works directly on the raw socket. 230 */ 231 rop = pool_get(&rtpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) | 232 PR_ZERO); 233 if (rop == NULL) 234 return (ENOBUFS); 235 so->so_pcb = rop; 236 /* Init the timeout structure */ 237 timeout_set_flags(&rop->rop_timeout, rtm_senddesync_timer, so, 238 KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); 239 refcnt_init(&rop->rop_refcnt); 240 241 rop->rop_socket = so; 242 rop->rop_proto = proto; 243 244 rop->rop_rtableid = curproc->p_p->ps_rtableid; 245 246 soisconnected(so); 247 so->so_options |= SO_USELOOPBACK; 248 249 rw_enter(&rtptable.rtp_lk, RW_WRITE); 250 SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, 251 rop_list); 252 rtptable.rtp_count++; 253 rw_exit(&rtptable.rtp_lk); 254 255 return (0); 256 } 257 258 int 259 route_detach(struct socket *so) 260 { 261 struct rtpcb *rop; 262 263 soassertlocked(so); 264 265 rop = sotortpcb(so); 266 if (rop == NULL) 267 return (EINVAL); 268 269 rw_enter(&rtptable.rtp_lk, RW_WRITE); 270 271 rtptable.rtp_count--; 272 SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb, 273 rop_list); 274 rw_exit(&rtptable.rtp_lk); 275 276 sounlock(so); 277 278 /* wait for all references to drop */ 279 refcnt_finalize(&rop->rop_refcnt, "rtsockrefs"); 280 timeout_del_barrier(&rop->rop_timeout); 281 282 solock(so); 283 284 so->so_pcb = NULL; 285 KASSERT((so->so_state & SS_NOFDREF) == 0); 286 pool_put(&rtpcb_pool, rop); 287 288 return (0); 289 } 290 291 int 292 route_disconnect(struct socket *so) 293 { 294 soisdisconnected(so); 295 return (0); 296 } 297 298 int 299 route_shutdown(struct socket *so) 300 { 301 socantsendmore(so); 302 return (0); 303 } 304 305 void 306 route_rcvd(struct socket *so) 307 { 308 struct rtpcb *rop = sotortpcb(so); 309 310 soassertlocked(so); 311 312 /* 313 * If we are in a FLUSH state, check if the buffer is 314 * empty so that we can clear the flag. 315 */ 316 if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) && 317 ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) == 318 rop->rop_socket->so_rcv.sb_hiwat))) 319 rop->rop_flags &= ~ROUTECB_FLAG_FLUSH; 320 } 321 322 int 323 route_send(struct socket *so, struct mbuf *m, struct mbuf *nam, 324 struct mbuf *control) 325 { 326 int error; 327 328 soassertlocked(so); 329 330 if (control && control->m_len) { 331 error = EOPNOTSUPP; 332 goto out; 333 } 334 335 if (nam) { 336 error = EISCONN; 337 goto out; 338 } 339 340 error = route_output(m, so); 341 m = NULL; 342 343 out: 344 m_freem(control); 345 m_freem(m); 346 347 return (error); 348 } 349 350 int 351 route_sockaddr(struct socket *so, struct mbuf *nam) 352 { 353 return (EINVAL); 354 } 355 356 int 357 route_peeraddr(struct socket *so, struct mbuf *nam) 358 { 359 /* minimal support, just implement a fake peer address */ 360 bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len); 361 nam->m_len = route_src.sa_len; 362 return (0); 363 } 364 365 int 366 route_ctloutput(int op, struct socket *so, int level, int optname, 367 struct mbuf *m) 368 { 369 struct rtpcb *rop = sotortpcb(so); 370 int error = 0; 371 unsigned int tid, prio; 372 373 if (level != AF_ROUTE) 374 return (EINVAL); 375 376 switch (op) { 377 case PRCO_SETOPT: 378 switch (optname) { 379 case ROUTE_MSGFILTER: 380 if (m == NULL || m->m_len != sizeof(unsigned int)) 381 error = EINVAL; 382 else 383 rop->rop_msgfilter = *mtod(m, unsigned int *); 384 break; 385 case ROUTE_TABLEFILTER: 386 if (m == NULL || m->m_len != sizeof(unsigned int)) { 387 error = EINVAL; 388 break; 389 } 390 tid = *mtod(m, unsigned int *); 391 if (tid != RTABLE_ANY && !rtable_exists(tid)) 392 error = ENOENT; 393 else 394 rop->rop_rtableid = tid; 395 break; 396 case ROUTE_PRIOFILTER: 397 if (m == NULL || m->m_len != sizeof(unsigned int)) { 398 error = EINVAL; 399 break; 400 } 401 prio = *mtod(m, unsigned int *); 402 if (prio > RTP_MAX) 403 error = EINVAL; 404 else 405 rop->rop_priority = prio; 406 break; 407 case ROUTE_FLAGFILTER: 408 if (m == NULL || m->m_len != sizeof(unsigned int)) 409 error = EINVAL; 410 else 411 rop->rop_flagfilter = *mtod(m, unsigned int *); 412 break; 413 default: 414 error = ENOPROTOOPT; 415 break; 416 } 417 break; 418 case PRCO_GETOPT: 419 switch (optname) { 420 case ROUTE_MSGFILTER: 421 m->m_len = sizeof(unsigned int); 422 *mtod(m, unsigned int *) = rop->rop_msgfilter; 423 break; 424 case ROUTE_TABLEFILTER: 425 m->m_len = sizeof(unsigned int); 426 *mtod(m, unsigned int *) = rop->rop_rtableid; 427 break; 428 case ROUTE_PRIOFILTER: 429 m->m_len = sizeof(unsigned int); 430 *mtod(m, unsigned int *) = rop->rop_priority; 431 break; 432 case ROUTE_FLAGFILTER: 433 m->m_len = sizeof(unsigned int); 434 *mtod(m, unsigned int *) = rop->rop_flagfilter; 435 break; 436 default: 437 error = ENOPROTOOPT; 438 break; 439 } 440 } 441 return (error); 442 } 443 444 void 445 rtm_senddesync_timer(void *xso) 446 { 447 struct socket *so = xso; 448 449 solock(so); 450 rtm_senddesync(so); 451 sounlock(so); 452 } 453 454 void 455 rtm_senddesync(struct socket *so) 456 { 457 struct rtpcb *rop = sotortpcb(so); 458 struct mbuf *desync_mbuf; 459 460 soassertlocked(so); 461 462 /* 463 * Dying socket is disconnected by upper layer and there is 464 * no reason to send packet. Also we shouldn't reschedule 465 * timeout(9), otherwise timeout_del_barrier(9) can't help us. 466 */ 467 if ((so->so_state & SS_ISCONNECTED) == 0 || 468 (so->so_rcv.sb_state & SS_CANTRCVMORE)) 469 return; 470 471 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ 472 if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0) 473 return; 474 475 /* 476 * If we fail to alloc memory or if sbappendaddr() 477 * fails, re-add timeout and try again. 478 */ 479 desync_mbuf = rtm_msg1(RTM_DESYNC, NULL); 480 if (desync_mbuf != NULL) { 481 if (sbappendaddr(so, &so->so_rcv, &route_src, 482 desync_mbuf, NULL) != 0) { 483 rop->rop_flags &= ~ROUTECB_FLAG_DESYNC; 484 sorwakeup(rop->rop_socket); 485 return; 486 } 487 m_freem(desync_mbuf); 488 } 489 /* Re-add timeout to try sending msg again */ 490 timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT); 491 } 492 493 void 494 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family) 495 { 496 struct socket *so; 497 struct rtpcb *rop; 498 struct rt_msghdr *rtm; 499 struct mbuf *m = m0; 500 struct srp_ref sr; 501 502 /* ensure that we can access the rtm_type via mtod() */ 503 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { 504 m_freem(m); 505 return; 506 } 507 508 SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) { 509 /* 510 * If route socket is bound to an address family only send 511 * messages that match the address family. Address family 512 * agnostic messages are always sent. 513 */ 514 if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC && 515 rop->rop_proto != sa_family) 516 continue; 517 518 519 so = rop->rop_socket; 520 solock(so); 521 522 /* 523 * Check to see if we don't want our own messages and 524 * if we can receive anything. 525 */ 526 if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) || 527 !(so->so_state & SS_ISCONNECTED) || 528 (so->so_rcv.sb_state & SS_CANTRCVMORE)) 529 goto next; 530 531 /* filter messages that the process does not want */ 532 rtm = mtod(m, struct rt_msghdr *); 533 /* but RTM_DESYNC can't be filtered */ 534 if (rtm->rtm_type != RTM_DESYNC) { 535 if (rop->rop_msgfilter != 0 && 536 !(rop->rop_msgfilter & (1U << rtm->rtm_type))) 537 goto next; 538 if (ISSET(rop->rop_flagfilter, rtm->rtm_flags)) 539 goto next; 540 } 541 switch (rtm->rtm_type) { 542 case RTM_IFANNOUNCE: 543 case RTM_DESYNC: 544 /* no tableid */ 545 break; 546 case RTM_RESOLVE: 547 case RTM_NEWADDR: 548 case RTM_DELADDR: 549 case RTM_IFINFO: 550 case RTM_80211INFO: 551 case RTM_BFD: 552 /* check against rdomain id */ 553 if (rop->rop_rtableid != RTABLE_ANY && 554 rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid) 555 goto next; 556 break; 557 default: 558 if (rop->rop_priority != 0 && 559 rop->rop_priority < rtm->rtm_priority) 560 goto next; 561 /* check against rtable id */ 562 if (rop->rop_rtableid != RTABLE_ANY && 563 rop->rop_rtableid != rtm->rtm_tableid) 564 goto next; 565 break; 566 } 567 568 /* 569 * Check to see if the flush flag is set. If so, don't queue 570 * any more messages until the flag is cleared. 571 */ 572 if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) 573 goto next; 574 575 rtm_sendup(so, m); 576 next: 577 sounlock(so); 578 } 579 SRPL_LEAVE(&sr); 580 581 m_freem(m); 582 } 583 584 int 585 rtm_sendup(struct socket *so, struct mbuf *m0) 586 { 587 struct rtpcb *rop = sotortpcb(so); 588 struct mbuf *m; 589 590 soassertlocked(so); 591 592 m = m_copym(m0, 0, M_COPYALL, M_NOWAIT); 593 if (m == NULL) 594 return (ENOMEM); 595 596 if (sbspace(so, &so->so_rcv) < (2 * MSIZE) || 597 sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) { 598 /* Flag socket as desync'ed and flush required */ 599 rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; 600 rtm_senddesync(so); 601 m_freem(m); 602 return (ENOBUFS); 603 } 604 605 sorwakeup(so); 606 return (0); 607 } 608 609 struct rt_msghdr * 610 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid) 611 { 612 struct rt_msghdr *rtm; 613 struct rt_addrinfo info; 614 struct sockaddr_rtlabel sa_rl; 615 struct sockaddr_in6 sa_mask; 616 #ifdef BFD 617 struct sockaddr_bfd sa_bfd; 618 #endif 619 struct ifnet *ifp = NULL; 620 int len; 621 622 bzero(&info, sizeof(info)); 623 info.rti_info[RTAX_DST] = rt_key(rt); 624 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 625 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 626 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 627 #ifdef BFD 628 if (rt->rt_flags & RTF_BFD) { 629 KERNEL_LOCK(); 630 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 631 KERNEL_UNLOCK(); 632 } 633 #endif 634 #ifdef MPLS 635 if (rt->rt_flags & RTF_MPLS) { 636 struct sockaddr_mpls sa_mpls; 637 638 bzero(&sa_mpls, sizeof(sa_mpls)); 639 sa_mpls.smpls_family = AF_MPLS; 640 sa_mpls.smpls_len = sizeof(sa_mpls); 641 sa_mpls.smpls_label = ((struct rt_mpls *) 642 rt->rt_llinfo)->mpls_label; 643 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 644 info.rti_mpls = ((struct rt_mpls *) 645 rt->rt_llinfo)->mpls_operation; 646 } 647 #endif 648 ifp = if_get(rt->rt_ifidx); 649 if (ifp != NULL) { 650 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 651 info.rti_info[RTAX_IFA] = rtable_getsource(tableid, 652 info.rti_info[RTAX_DST]->sa_family); 653 if (info.rti_info[RTAX_IFA] == NULL) 654 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 655 if (ifp->if_flags & IFF_POINTOPOINT) 656 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 657 } 658 if_put(ifp); 659 /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */ 660 661 /* build new route message */ 662 len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL); 663 rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO); 664 665 rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL); 666 rtm->rtm_type = type; 667 rtm->rtm_index = rt->rt_ifidx; 668 rtm->rtm_tableid = tableid; 669 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 670 rtm->rtm_flags = rt->rt_flags; 671 rtm->rtm_pid = curproc->p_p->ps_pid; 672 rtm->rtm_seq = seq; 673 rtm_getmetrics(rt, &rtm->rtm_rmx); 674 rtm->rtm_addrs = info.rti_addrs; 675 #ifdef MPLS 676 rtm->rtm_mpls = info.rti_mpls; 677 #endif 678 return rtm; 679 } 680 681 int 682 route_output(struct mbuf *m, struct socket *so) 683 { 684 struct rt_msghdr *rtm = NULL; 685 struct rtentry *rt = NULL; 686 struct rt_addrinfo info; 687 struct ifnet *ifp; 688 int len, seq, useloopback, error = 0; 689 u_int tableid; 690 u_int8_t prio; 691 u_char vers, type; 692 693 if (m == NULL || ((m->m_len < sizeof(int32_t)) && 694 (m = m_pullup(m, sizeof(int32_t))) == NULL)) 695 return (ENOBUFS); 696 if ((m->m_flags & M_PKTHDR) == 0) 697 panic("route_output"); 698 699 useloopback = so->so_options & SO_USELOOPBACK; 700 701 /* 702 * The socket can't be closed concurrently because the file 703 * descriptor reference is still held. 704 */ 705 706 sounlock(so); 707 708 len = m->m_pkthdr.len; 709 if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 710 sizeof(rtm->rtm_hdrlen) || 711 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 712 error = EINVAL; 713 goto fail; 714 } 715 vers = mtod(m, struct rt_msghdr *)->rtm_version; 716 switch (vers) { 717 case RTM_VERSION: 718 if (len < sizeof(struct rt_msghdr)) { 719 error = EINVAL; 720 goto fail; 721 } 722 if (len > RTM_MAXSIZE) { 723 error = EMSGSIZE; 724 goto fail; 725 } 726 rtm = malloc(len, M_RTABLE, M_WAITOK); 727 m_copydata(m, 0, len, rtm); 728 break; 729 default: 730 error = EPROTONOSUPPORT; 731 goto fail; 732 } 733 734 /* Verify that the caller is sending an appropriate message early */ 735 switch (rtm->rtm_type) { 736 case RTM_ADD: 737 case RTM_DELETE: 738 case RTM_GET: 739 case RTM_CHANGE: 740 case RTM_PROPOSAL: 741 case RTM_SOURCE: 742 break; 743 default: 744 error = EOPNOTSUPP; 745 goto fail; 746 } 747 /* 748 * Verify that the header length is valid. 749 * All messages from userland start with a struct rt_msghdr. 750 */ 751 if (rtm->rtm_hdrlen == 0) /* old client */ 752 rtm->rtm_hdrlen = sizeof(struct rt_msghdr); 753 if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) || 754 len < rtm->rtm_hdrlen) { 755 error = EINVAL; 756 goto fail; 757 } 758 759 rtm->rtm_pid = curproc->p_p->ps_pid; 760 761 /* 762 * Verify that the caller has the appropriate privilege; RTM_GET 763 * is the only operation the non-superuser is allowed. 764 */ 765 if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) { 766 error = EACCES; 767 goto fail; 768 } 769 tableid = rtm->rtm_tableid; 770 if (!rtable_exists(tableid)) { 771 if (rtm->rtm_type == RTM_ADD) { 772 if ((error = rtable_add(tableid)) != 0) 773 goto fail; 774 } else { 775 error = EINVAL; 776 goto fail; 777 } 778 } 779 780 /* Do not let userland play with kernel-only flags. */ 781 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) { 782 error = EINVAL; 783 goto fail; 784 } 785 786 /* make sure that kernel-only bits are not set */ 787 rtm->rtm_priority &= RTP_MASK; 788 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED); 789 rtm->rtm_fmask &= RTF_FMASK; 790 791 if (rtm->rtm_priority != 0) { 792 if (rtm->rtm_priority > RTP_MAX || 793 rtm->rtm_priority == RTP_LOCAL) { 794 error = EINVAL; 795 goto fail; 796 } 797 prio = rtm->rtm_priority; 798 } else if (rtm->rtm_type != RTM_ADD) 799 prio = RTP_ANY; 800 else if (rtm->rtm_flags & RTF_STATIC) 801 prio = 0; 802 else 803 prio = RTP_DEFAULT; 804 805 bzero(&info, sizeof(info)); 806 info.rti_addrs = rtm->rtm_addrs; 807 if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, 808 len + (caddr_t)rtm, &info)) != 0) 809 goto fail; 810 811 info.rti_flags = rtm->rtm_flags; 812 813 if (rtm->rtm_type != RTM_SOURCE && 814 rtm->rtm_type != RTM_PROPOSAL && 815 (info.rti_info[RTAX_DST] == NULL || 816 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 817 (info.rti_info[RTAX_GATEWAY] != NULL && 818 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) || 819 info.rti_info[RTAX_GENMASK] != NULL)) { 820 error = EINVAL; 821 goto fail; 822 } 823 #ifdef MPLS 824 info.rti_mpls = rtm->rtm_mpls; 825 #endif 826 827 if (info.rti_info[RTAX_GATEWAY] != NULL && 828 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && 829 (info.rti_flags & RTF_CLONING) == 0) { 830 info.rti_flags |= RTF_LLINFO; 831 } 832 833 /* 834 * Validate RTM_PROPOSAL and pass it along or error out. 835 */ 836 if (rtm->rtm_type == RTM_PROPOSAL) { 837 if (rtm_validate_proposal(&info) == -1) { 838 error = EINVAL; 839 goto fail; 840 } 841 /* 842 * If this is a solicitation proposal forward request to 843 * all interfaces. Most handlers will ignore it but at least 844 * umb(4) will send a response to this event. 845 */ 846 if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) { 847 NET_LOCK(); 848 TAILQ_FOREACH(ifp, &ifnetlist, if_list) { 849 ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL); 850 } 851 NET_UNLOCK(); 852 } 853 } else if (rtm->rtm_type == RTM_SOURCE) { 854 if (info.rti_info[RTAX_IFA] == NULL) { 855 error = EINVAL; 856 goto fail; 857 } 858 NET_LOCK(); 859 error = rt_setsource(tableid, info.rti_info[RTAX_IFA]); 860 NET_UNLOCK(); 861 if (error) 862 goto fail; 863 } else { 864 error = rtm_output(rtm, &rt, &info, prio, tableid); 865 if (!error) { 866 type = rtm->rtm_type; 867 seq = rtm->rtm_seq; 868 free(rtm, M_RTABLE, len); 869 NET_LOCK_SHARED(); 870 rtm = rtm_report(rt, type, seq, tableid); 871 NET_UNLOCK_SHARED(); 872 len = rtm->rtm_msglen; 873 } 874 } 875 876 rtfree(rt); 877 if (error) { 878 rtm->rtm_errno = error; 879 } else { 880 rtm->rtm_flags |= RTF_DONE; 881 } 882 883 /* 884 * Check to see if we don't want our own messages. 885 */ 886 if (!useloopback) { 887 if (rtptable.rtp_count == 0) { 888 /* no other listener and no loopback of messages */ 889 goto fail; 890 } 891 } 892 if (m_copyback(m, 0, len, rtm, M_NOWAIT)) { 893 m_freem(m); 894 m = NULL; 895 } else if (m->m_pkthdr.len > len) 896 m_adj(m, len - m->m_pkthdr.len); 897 free(rtm, M_RTABLE, len); 898 if (m) 899 route_input(m, so, info.rti_info[RTAX_DST] ? 900 info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC); 901 solock(so); 902 903 return (error); 904 fail: 905 free(rtm, M_RTABLE, len); 906 m_freem(m); 907 solock(so); 908 909 return (error); 910 } 911 912 int 913 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt, 914 struct rt_addrinfo *info, uint8_t prio, unsigned int tableid) 915 { 916 struct rtentry *rt = *prt; 917 struct ifnet *ifp = NULL; 918 int plen, newgate = 0, error = 0; 919 920 switch (rtm->rtm_type) { 921 case RTM_ADD: 922 if (info->rti_info[RTAX_GATEWAY] == NULL) { 923 error = EINVAL; 924 break; 925 } 926 927 rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL); 928 if ((error = route_arp_conflict(rt, info))) { 929 rtfree(rt); 930 rt = NULL; 931 break; 932 } 933 934 /* 935 * We cannot go through a delete/create/insert cycle for 936 * cached route because this can lead to races in the 937 * receive path. Instead we update the L2 cache. 938 */ 939 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) { 940 ifp = if_get(rt->rt_ifidx); 941 if (ifp == NULL) { 942 rtfree(rt); 943 rt = NULL; 944 error = ESRCH; 945 break; 946 } 947 948 goto change; 949 } 950 951 rtfree(rt); 952 rt = NULL; 953 954 NET_LOCK(); 955 if ((error = rtm_getifa(info, tableid)) != 0) { 956 NET_UNLOCK(); 957 break; 958 } 959 error = rtrequest(RTM_ADD, info, prio, &rt, tableid); 960 NET_UNLOCK(); 961 if (error == 0) 962 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 963 &rt->rt_rmx); 964 break; 965 case RTM_DELETE: 966 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 967 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 968 prio); 969 if (rt == NULL) { 970 error = ESRCH; 971 break; 972 } 973 974 /* 975 * If we got multipath routes, we require users to specify 976 * a matching gateway. 977 */ 978 if (ISSET(rt->rt_flags, RTF_MPATH) && 979 info->rti_info[RTAX_GATEWAY] == NULL) { 980 error = ESRCH; 981 break; 982 } 983 984 ifp = if_get(rt->rt_ifidx); 985 if (ifp == NULL) { 986 rtfree(rt); 987 rt = NULL; 988 error = ESRCH; 989 break; 990 } 991 992 /* 993 * Invalidate the cache of automagically created and 994 * referenced L2 entries to make sure that ``rt_gwroute'' 995 * pointer stays valid for other CPUs. 996 */ 997 if ((ISSET(rt->rt_flags, RTF_CACHED))) { 998 NET_LOCK(); 999 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt); 1000 /* Reset the MTU of the gateway route. */ 1001 rtable_walk(tableid, rt_key(rt)->sa_family, NULL, 1002 route_cleargateway, rt); 1003 NET_UNLOCK(); 1004 break; 1005 } 1006 1007 /* 1008 * Make sure that local routes are only modified by the 1009 * kernel. 1010 */ 1011 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 1012 error = EINVAL; 1013 break; 1014 } 1015 1016 rtfree(rt); 1017 rt = NULL; 1018 1019 NET_LOCK(); 1020 error = rtrequest_delete(info, prio, ifp, &rt, tableid); 1021 NET_UNLOCK(); 1022 break; 1023 case RTM_CHANGE: 1024 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1025 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1026 prio); 1027 /* 1028 * If we got multipath routes, we require users to specify 1029 * a matching gateway. 1030 */ 1031 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) && 1032 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1033 rtfree(rt); 1034 rt = NULL; 1035 } 1036 1037 /* 1038 * If RTAX_GATEWAY is the argument we're trying to 1039 * change, try to find a compatible route. 1040 */ 1041 if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) { 1042 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1043 info->rti_info[RTAX_NETMASK], NULL, prio); 1044 /* Ensure we don't pick a multipath one. */ 1045 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) { 1046 rtfree(rt); 1047 rt = NULL; 1048 } 1049 } 1050 1051 if (rt == NULL) { 1052 error = ESRCH; 1053 break; 1054 } 1055 1056 /* 1057 * Make sure that local routes are only modified by the 1058 * kernel. 1059 */ 1060 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 1061 error = EINVAL; 1062 break; 1063 } 1064 1065 ifp = if_get(rt->rt_ifidx); 1066 if (ifp == NULL) { 1067 rtfree(rt); 1068 rt = NULL; 1069 error = ESRCH; 1070 break; 1071 } 1072 1073 /* 1074 * RTM_CHANGE needs a perfect match. 1075 */ 1076 plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family, 1077 info->rti_info[RTAX_NETMASK]); 1078 if (rt_plen(rt) != plen) { 1079 error = ESRCH; 1080 break; 1081 } 1082 1083 if (info->rti_info[RTAX_GATEWAY] != NULL) 1084 if (rt->rt_gateway == NULL || 1085 bcmp(rt->rt_gateway, 1086 info->rti_info[RTAX_GATEWAY], 1087 info->rti_info[RTAX_GATEWAY]->sa_len)) { 1088 newgate = 1; 1089 } 1090 /* 1091 * Check reachable gateway before changing the route. 1092 * New gateway could require new ifaddr, ifp; 1093 * flags may also be different; ifp may be specified 1094 * by ll sockaddr when protocol address is ambiguous. 1095 */ 1096 if (newgate || info->rti_info[RTAX_IFP] != NULL || 1097 info->rti_info[RTAX_IFA] != NULL) { 1098 struct ifaddr *ifa = NULL; 1099 1100 NET_LOCK(); 1101 if ((error = rtm_getifa(info, tableid)) != 0) { 1102 NET_UNLOCK(); 1103 break; 1104 } 1105 ifa = info->rti_ifa; 1106 if (rt->rt_ifa != ifa) { 1107 ifp->if_rtrequest(ifp, RTM_DELETE, rt); 1108 ifafree(rt->rt_ifa); 1109 1110 rt->rt_ifa = ifaref(ifa); 1111 rt->rt_ifidx = ifa->ifa_ifp->if_index; 1112 /* recheck link state after ifp change */ 1113 rt_if_linkstate_change(rt, ifa->ifa_ifp, 1114 tableid); 1115 } 1116 NET_UNLOCK(); 1117 } 1118 change: 1119 if (info->rti_info[RTAX_GATEWAY] != NULL) { 1120 /* When updating the gateway, make sure it is valid. */ 1121 if (!newgate && rt->rt_gateway->sa_family != 1122 info->rti_info[RTAX_GATEWAY]->sa_family) { 1123 error = EINVAL; 1124 break; 1125 } 1126 1127 NET_LOCK(); 1128 error = rt_setgate(rt, 1129 info->rti_info[RTAX_GATEWAY], tableid); 1130 NET_UNLOCK(); 1131 if (error) 1132 break; 1133 } 1134 #ifdef MPLS 1135 if (rtm->rtm_flags & RTF_MPLS) { 1136 NET_LOCK(); 1137 error = rt_mpls_set(rt, 1138 info->rti_info[RTAX_SRC], info->rti_mpls); 1139 NET_UNLOCK(); 1140 if (error) 1141 break; 1142 } else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) { 1143 NET_LOCK(); 1144 /* if gateway changed remove MPLS information */ 1145 rt_mpls_clear(rt); 1146 NET_UNLOCK(); 1147 } 1148 #endif 1149 1150 #ifdef BFD 1151 if (ISSET(rtm->rtm_flags, RTF_BFD)) { 1152 KERNEL_LOCK(); 1153 error = bfdset(rt); 1154 KERNEL_UNLOCK(); 1155 if (error) 1156 break; 1157 } else if (!ISSET(rtm->rtm_flags, RTF_BFD) && 1158 ISSET(rtm->rtm_fmask, RTF_BFD)) { 1159 KERNEL_LOCK(); 1160 bfdclear(rt); 1161 KERNEL_UNLOCK(); 1162 } 1163 #endif 1164 1165 NET_LOCK(); 1166 /* Hack to allow some flags to be toggled */ 1167 if (rtm->rtm_fmask) { 1168 /* MPLS flag it is set by rt_mpls_set() */ 1169 rtm->rtm_fmask &= ~RTF_MPLS; 1170 rtm->rtm_flags &= ~RTF_MPLS; 1171 rt->rt_flags = 1172 (rt->rt_flags & ~rtm->rtm_fmask) | 1173 (rtm->rtm_flags & rtm->rtm_fmask); 1174 } 1175 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); 1176 1177 ifp->if_rtrequest(ifp, RTM_ADD, rt); 1178 1179 if (info->rti_info[RTAX_LABEL] != NULL) { 1180 const char *rtlabel = ((const struct sockaddr_rtlabel *) 1181 info->rti_info[RTAX_LABEL])->sr_label; 1182 rtlabel_unref(rt->rt_labelid); 1183 rt->rt_labelid = rtlabel_name2id(rtlabel); 1184 } 1185 if_group_routechange(info->rti_info[RTAX_DST], 1186 info->rti_info[RTAX_NETMASK]); 1187 rt->rt_locks &= ~(rtm->rtm_inits); 1188 rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); 1189 NET_UNLOCK(); 1190 break; 1191 case RTM_GET: 1192 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1193 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1194 prio); 1195 if (rt == NULL) 1196 error = ESRCH; 1197 break; 1198 } 1199 1200 if_put(ifp); 1201 *prt = rt; 1202 return (error); 1203 } 1204 1205 struct ifaddr * 1206 ifa_ifwithroute(int flags, const struct sockaddr *dst, 1207 const struct sockaddr *gateway, unsigned int rtableid) 1208 { 1209 struct ifaddr *ifa; 1210 1211 if ((flags & RTF_GATEWAY) == 0) { 1212 /* 1213 * If we are adding a route to an interface, 1214 * and the interface is a pt to pt link 1215 * we should search for the destination 1216 * as our clue to the interface. Otherwise 1217 * we can use the local address. 1218 */ 1219 ifa = NULL; 1220 if (flags & RTF_HOST) 1221 ifa = ifa_ifwithdstaddr(dst, rtableid); 1222 if (ifa == NULL) 1223 ifa = ifa_ifwithaddr(gateway, rtableid); 1224 } else { 1225 /* 1226 * If we are adding a route to a remote net 1227 * or host, the gateway may still be on the 1228 * other end of a pt to pt link. 1229 */ 1230 ifa = ifa_ifwithdstaddr(gateway, rtableid); 1231 } 1232 if (ifa == NULL) { 1233 if (gateway->sa_family == AF_LINK) { 1234 const struct sockaddr_dl *sdl; 1235 struct ifnet *ifp; 1236 1237 sdl = satosdl_const(gateway); 1238 ifp = if_get(sdl->sdl_index); 1239 if (ifp != NULL) 1240 ifa = ifaof_ifpforaddr(dst, ifp); 1241 if_put(ifp); 1242 } else { 1243 struct rtentry *rt; 1244 1245 rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid)); 1246 if (rt != NULL) 1247 ifa = rt->rt_ifa; 1248 rtfree(rt); 1249 } 1250 } 1251 if (ifa == NULL) 1252 return (NULL); 1253 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1254 struct ifaddr *oifa = ifa; 1255 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1256 if (ifa == NULL) 1257 ifa = oifa; 1258 } 1259 return (ifa); 1260 } 1261 1262 int 1263 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid) 1264 { 1265 struct ifnet *ifp = NULL; 1266 1267 /* 1268 * The "returned" `ifa' is guaranteed to be alive only if 1269 * the NET_LOCK() is held. 1270 */ 1271 NET_ASSERT_LOCKED(); 1272 1273 /* 1274 * ifp may be specified by sockaddr_dl when protocol address 1275 * is ambiguous 1276 */ 1277 if (info->rti_info[RTAX_IFP] != NULL) { 1278 const struct sockaddr_dl *sdl; 1279 1280 sdl = satosdl_const(info->rti_info[RTAX_IFP]); 1281 ifp = if_get(sdl->sdl_index); 1282 } 1283 1284 #ifdef IPSEC 1285 /* 1286 * If the destination is a PF_KEY address, we'll look 1287 * for the existence of a encap interface number or address 1288 * in the options list of the gateway. By default, we'll return 1289 * enc0. 1290 */ 1291 if (info->rti_info[RTAX_DST] && 1292 info->rti_info[RTAX_DST]->sa_family == PF_KEY) 1293 info->rti_ifa = enc_getifa(rtid, 0); 1294 #endif 1295 1296 if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL) 1297 info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid); 1298 1299 if (info->rti_ifa == NULL) { 1300 const struct sockaddr *sa; 1301 1302 if ((sa = info->rti_info[RTAX_IFA]) == NULL) 1303 if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL) 1304 sa = info->rti_info[RTAX_DST]; 1305 1306 if (sa != NULL && ifp != NULL) 1307 info->rti_ifa = ifaof_ifpforaddr(sa, ifp); 1308 else if (info->rti_info[RTAX_DST] != NULL && 1309 info->rti_info[RTAX_GATEWAY] != NULL) 1310 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1311 info->rti_info[RTAX_DST], 1312 info->rti_info[RTAX_GATEWAY], 1313 rtid); 1314 else if (sa != NULL) 1315 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1316 sa, sa, rtid); 1317 } 1318 1319 if_put(ifp); 1320 1321 if (info->rti_ifa == NULL) 1322 return (ENETUNREACH); 1323 1324 return (0); 1325 } 1326 1327 int 1328 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid) 1329 { 1330 struct rtentry *nhrt = arg; 1331 1332 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt && 1333 !ISSET(rt->rt_locks, RTV_MTU)) 1334 rt->rt_mtu = 0; 1335 1336 return (0); 1337 } 1338 1339 /* 1340 * Check if the user request to insert an ARP entry does not conflict 1341 * with existing ones. 1342 * 1343 * Only two entries are allowed for a given IP address: a private one 1344 * (priv) and a public one (pub). 1345 */ 1346 int 1347 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info) 1348 { 1349 int proxy = (info->rti_flags & RTF_ANNOUNCE); 1350 1351 if ((info->rti_flags & RTF_LLINFO) == 0 || 1352 (info->rti_info[RTAX_DST]->sa_family != AF_INET)) 1353 return (0); 1354 1355 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO)) 1356 return (0); 1357 1358 /* If the entry is cached, it can be updated. */ 1359 if (ISSET(rt->rt_flags, RTF_CACHED)) 1360 return (0); 1361 1362 /* 1363 * Same destination, not cached and both "priv" or "pub" conflict. 1364 * If a second entry exists, it always conflict. 1365 */ 1366 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) || 1367 ISSET(rt->rt_flags, RTF_MPATH)) 1368 return (EEXIST); 1369 1370 /* No conflict but an entry exist so we need to force mpath. */ 1371 info->rti_flags |= RTF_MPATH; 1372 return (0); 1373 } 1374 1375 void 1376 rtm_setmetrics(u_long which, const struct rt_metrics *in, 1377 struct rt_kmetrics *out) 1378 { 1379 int64_t expire; 1380 1381 if (which & RTV_MTU) 1382 out->rmx_mtu = in->rmx_mtu; 1383 if (which & RTV_EXPIRE) { 1384 expire = in->rmx_expire; 1385 if (expire != 0) { 1386 expire -= gettime(); 1387 expire += getuptime(); 1388 } 1389 1390 out->rmx_expire = expire; 1391 } 1392 } 1393 1394 void 1395 rtm_getmetrics(const struct rtentry *rt, struct rt_metrics *out) 1396 { 1397 const struct rt_kmetrics *in = &rt->rt_rmx; 1398 int64_t expire; 1399 1400 expire = in->rmx_expire; 1401 if (expire == 0) 1402 expire = rt_timer_get_expire(rt); 1403 if (expire != 0) { 1404 expire -= getuptime(); 1405 expire += gettime(); 1406 } 1407 1408 bzero(out, sizeof(*out)); 1409 out->rmx_locks = in->rmx_locks; 1410 out->rmx_mtu = in->rmx_mtu; 1411 out->rmx_expire = expire; 1412 out->rmx_pksent = in->rmx_pksent; 1413 } 1414 1415 #define ROUNDUP(a) \ 1416 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 1417 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) 1418 1419 int 1420 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 1421 { 1422 int i; 1423 1424 /* 1425 * Parse address bits, split address storage in chunks, and 1426 * set info pointers. Use sa_len for traversing the memory 1427 * and check that we stay within in the limit. 1428 */ 1429 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); 1430 for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) { 1431 struct sockaddr *sa; 1432 1433 if ((rtinfo->rti_addrs & (1U << i)) == 0) 1434 continue; 1435 if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim) 1436 return (EINVAL); 1437 sa = (struct sockaddr *)cp; 1438 if (cp + sa->sa_len > cplim) 1439 return (EINVAL); 1440 rtinfo->rti_info[i] = sa; 1441 ADVANCE(cp, sa); 1442 } 1443 /* 1444 * Check that the address family is suitable for the route address 1445 * type. Check that each address has a size that fits its family 1446 * and its length is within the size. Strings within addresses must 1447 * be NUL terminated. 1448 */ 1449 for (i = 0; i < RTAX_MAX; i++) { 1450 const struct sockaddr *sa; 1451 size_t len, maxlen, size; 1452 1453 sa = rtinfo->rti_info[i]; 1454 if (sa == NULL) 1455 continue; 1456 maxlen = size = 0; 1457 switch (i) { 1458 case RTAX_DST: 1459 case RTAX_GATEWAY: 1460 case RTAX_SRC: 1461 switch (sa->sa_family) { 1462 case AF_INET: 1463 size = sizeof(struct sockaddr_in); 1464 break; 1465 case AF_LINK: 1466 size = sizeof(struct sockaddr_dl); 1467 break; 1468 #ifdef INET6 1469 case AF_INET6: 1470 size = sizeof(struct sockaddr_in6); 1471 break; 1472 #endif 1473 #ifdef MPLS 1474 case AF_MPLS: 1475 size = sizeof(struct sockaddr_mpls); 1476 break; 1477 #endif 1478 } 1479 break; 1480 case RTAX_IFP: 1481 if (sa->sa_family != AF_LINK) 1482 return (EAFNOSUPPORT); 1483 /* 1484 * XXX Should be sizeof(struct sockaddr_dl), but 1485 * route(8) has a bug and provides less memory. 1486 * arp(8) has another bug and uses sizeof pointer. 1487 */ 1488 size = 4; 1489 break; 1490 case RTAX_IFA: 1491 switch (sa->sa_family) { 1492 case AF_INET: 1493 size = sizeof(struct sockaddr_in); 1494 break; 1495 #ifdef INET6 1496 case AF_INET6: 1497 size = sizeof(struct sockaddr_in6); 1498 break; 1499 #endif 1500 default: 1501 return (EAFNOSUPPORT); 1502 } 1503 break; 1504 case RTAX_LABEL: 1505 if (sa->sa_family != AF_UNSPEC) 1506 return (EAFNOSUPPORT); 1507 maxlen = RTLABEL_LEN; 1508 size = sizeof(struct sockaddr_rtlabel); 1509 break; 1510 #ifdef BFD 1511 case RTAX_BFD: 1512 if (sa->sa_family != AF_UNSPEC) 1513 return (EAFNOSUPPORT); 1514 size = sizeof(struct sockaddr_bfd); 1515 break; 1516 #endif 1517 case RTAX_DNS: 1518 /* more validation in rtm_validate_proposal */ 1519 if (sa->sa_len > sizeof(struct sockaddr_rtdns)) 1520 return (EINVAL); 1521 if (sa->sa_len < offsetof(struct sockaddr_rtdns, 1522 sr_dns)) 1523 return (EINVAL); 1524 switch (sa->sa_family) { 1525 case AF_INET: 1526 #ifdef INET6 1527 case AF_INET6: 1528 #endif 1529 break; 1530 default: 1531 return (EAFNOSUPPORT); 1532 } 1533 break; 1534 case RTAX_STATIC: 1535 switch (sa->sa_family) { 1536 case AF_INET: 1537 #ifdef INET6 1538 case AF_INET6: 1539 #endif 1540 break; 1541 default: 1542 return (EAFNOSUPPORT); 1543 } 1544 maxlen = RTSTATIC_LEN; 1545 size = sizeof(struct sockaddr_rtstatic); 1546 break; 1547 case RTAX_SEARCH: 1548 if (sa->sa_family != AF_UNSPEC) 1549 return (EAFNOSUPPORT); 1550 maxlen = RTSEARCH_LEN; 1551 size = sizeof(struct sockaddr_rtsearch); 1552 break; 1553 } 1554 if (size) { 1555 /* memory for the full struct must be provided */ 1556 if (sa->sa_len < size) 1557 return (EINVAL); 1558 } 1559 if (maxlen) { 1560 /* this should not happen */ 1561 if (2 + maxlen > size) 1562 return (EINVAL); 1563 /* strings must be NUL terminated within the struct */ 1564 len = strnlen(sa->sa_data, maxlen); 1565 if (len >= maxlen || 2 + len >= sa->sa_len) 1566 return (EINVAL); 1567 break; 1568 } 1569 } 1570 return (0); 1571 } 1572 1573 struct mbuf * 1574 rtm_msg1(int type, struct rt_addrinfo *rtinfo) 1575 { 1576 struct rt_msghdr *rtm; 1577 struct mbuf *m; 1578 int i; 1579 const struct sockaddr *sa; 1580 int len, dlen, hlen; 1581 1582 switch (type) { 1583 case RTM_DELADDR: 1584 case RTM_NEWADDR: 1585 hlen = sizeof(struct ifa_msghdr); 1586 break; 1587 case RTM_IFINFO: 1588 hlen = sizeof(struct if_msghdr); 1589 break; 1590 case RTM_IFANNOUNCE: 1591 hlen = sizeof(struct if_announcemsghdr); 1592 break; 1593 #ifdef BFD 1594 case RTM_BFD: 1595 hlen = sizeof(struct bfd_msghdr); 1596 break; 1597 #endif 1598 case RTM_80211INFO: 1599 hlen = sizeof(struct if_ieee80211_msghdr); 1600 break; 1601 default: 1602 hlen = sizeof(struct rt_msghdr); 1603 break; 1604 } 1605 len = hlen; 1606 for (i = 0; i < RTAX_MAX; i++) { 1607 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1608 continue; 1609 len += ROUNDUP(sa->sa_len); 1610 } 1611 if (len > MCLBYTES) 1612 panic("rtm_msg1"); 1613 m = m_gethdr(M_DONTWAIT, MT_DATA); 1614 if (m && len > MHLEN) { 1615 MCLGET(m, M_DONTWAIT); 1616 if ((m->m_flags & M_EXT) == 0) { 1617 m_free(m); 1618 m = NULL; 1619 } 1620 } 1621 if (m == NULL) 1622 return (m); 1623 m->m_pkthdr.len = m->m_len = len; 1624 m->m_pkthdr.ph_ifidx = 0; 1625 rtm = mtod(m, struct rt_msghdr *); 1626 bzero(rtm, len); 1627 len = hlen; 1628 for (i = 0; i < RTAX_MAX; i++) { 1629 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1630 continue; 1631 rtinfo->rti_addrs |= (1U << i); 1632 dlen = ROUNDUP(sa->sa_len); 1633 if (m_copyback(m, len, sa->sa_len, sa, M_NOWAIT)) { 1634 m_freem(m); 1635 return (NULL); 1636 } 1637 len += dlen; 1638 } 1639 rtm->rtm_msglen = len; 1640 rtm->rtm_hdrlen = hlen; 1641 rtm->rtm_version = RTM_VERSION; 1642 rtm->rtm_type = type; 1643 return (m); 1644 } 1645 1646 int 1647 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp, 1648 struct walkarg *w) 1649 { 1650 int i; 1651 int len, dlen, hlen, second_time = 0; 1652 caddr_t cp0; 1653 1654 rtinfo->rti_addrs = 0; 1655 again: 1656 switch (type) { 1657 case RTM_DELADDR: 1658 case RTM_NEWADDR: 1659 len = sizeof(struct ifa_msghdr); 1660 break; 1661 case RTM_IFINFO: 1662 len = sizeof(struct if_msghdr); 1663 break; 1664 default: 1665 len = sizeof(struct rt_msghdr); 1666 break; 1667 } 1668 hlen = len; 1669 if ((cp0 = cp) != NULL) 1670 cp += len; 1671 for (i = 0; i < RTAX_MAX; i++) { 1672 const struct sockaddr *sa; 1673 1674 if ((sa = rtinfo->rti_info[i]) == NULL) 1675 continue; 1676 rtinfo->rti_addrs |= (1U << i); 1677 dlen = ROUNDUP(sa->sa_len); 1678 if (cp) { 1679 bcopy(sa, cp, sa->sa_len); 1680 bzero(cp + sa->sa_len, dlen - sa->sa_len); 1681 cp += dlen; 1682 } 1683 len += dlen; 1684 } 1685 /* align message length to the next natural boundary */ 1686 len = ALIGN(len); 1687 if (cp == 0 && w != NULL && !second_time) { 1688 w->w_needed += len; 1689 if (w->w_needed <= w->w_given && w->w_where) { 1690 if (w->w_tmemsize < len) { 1691 free(w->w_tmem, M_RTABLE, w->w_tmemsize); 1692 w->w_tmem = malloc(len, M_RTABLE, 1693 M_NOWAIT | M_ZERO); 1694 if (w->w_tmem) 1695 w->w_tmemsize = len; 1696 } 1697 if (w->w_tmem) { 1698 cp = w->w_tmem; 1699 second_time = 1; 1700 goto again; 1701 } else 1702 w->w_where = 0; 1703 } 1704 } 1705 if (cp && w) /* clear the message header */ 1706 bzero(cp0, hlen); 1707 1708 if (cp) { 1709 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 1710 1711 rtm->rtm_version = RTM_VERSION; 1712 rtm->rtm_type = type; 1713 rtm->rtm_msglen = len; 1714 rtm->rtm_hdrlen = hlen; 1715 } 1716 return (len); 1717 } 1718 1719 void 1720 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid) 1721 { 1722 struct rt_addrinfo info; 1723 struct ifnet *ifp; 1724 struct sockaddr_rtlabel sa_rl; 1725 struct sockaddr_in6 sa_mask; 1726 1727 memset(&info, 0, sizeof(info)); 1728 info.rti_info[RTAX_DST] = rt_key(rt); 1729 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1730 if (!ISSET(rt->rt_flags, RTF_HOST)) 1731 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1732 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1733 ifp = if_get(rt->rt_ifidx); 1734 if (ifp != NULL) { 1735 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1736 info.rti_info[RTAX_IFA] = rtable_getsource(rtableid, 1737 info.rti_info[RTAX_DST]->sa_family); 1738 if (info.rti_info[RTAX_IFA] == NULL) 1739 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1740 } 1741 1742 rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error, 1743 rtableid); 1744 if_put(ifp); 1745 } 1746 1747 /* 1748 * This routine is called to generate a message from the routing 1749 * socket indicating that a redirect has occurred, a routing lookup 1750 * has failed, or that a protocol has detected timeouts to a particular 1751 * destination. 1752 */ 1753 void 1754 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio, 1755 u_int ifidx, int error, u_int tableid) 1756 { 1757 struct rt_msghdr *rtm; 1758 struct mbuf *m; 1759 const struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 1760 1761 if (rtptable.rtp_count == 0) 1762 return; 1763 m = rtm_msg1(type, rtinfo); 1764 if (m == NULL) 1765 return; 1766 rtm = mtod(m, struct rt_msghdr *); 1767 rtm->rtm_flags = RTF_DONE | flags; 1768 rtm->rtm_priority = prio; 1769 rtm->rtm_errno = error; 1770 rtm->rtm_tableid = tableid; 1771 rtm->rtm_addrs = rtinfo->rti_addrs; 1772 rtm->rtm_index = ifidx; 1773 route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC); 1774 } 1775 1776 /* 1777 * This routine is called to generate a message from the routing 1778 * socket indicating that the status of a network interface has changed. 1779 */ 1780 void 1781 rtm_ifchg(struct ifnet *ifp) 1782 { 1783 struct rt_addrinfo info; 1784 struct if_msghdr *ifm; 1785 struct mbuf *m; 1786 1787 if (rtptable.rtp_count == 0) 1788 return; 1789 memset(&info, 0, sizeof(info)); 1790 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1791 m = rtm_msg1(RTM_IFINFO, &info); 1792 if (m == NULL) 1793 return; 1794 ifm = mtod(m, struct if_msghdr *); 1795 ifm->ifm_index = ifp->if_index; 1796 ifm->ifm_tableid = ifp->if_rdomain; 1797 ifm->ifm_flags = ifp->if_flags; 1798 ifm->ifm_xflags = ifp->if_xflags; 1799 if_getdata(ifp, &ifm->ifm_data); 1800 ifm->ifm_addrs = info.rti_addrs; 1801 route_input(m, NULL, AF_UNSPEC); 1802 } 1803 1804 /* 1805 * This is called to generate messages from the routing socket 1806 * indicating a network interface has had addresses associated with it. 1807 * if we ever reverse the logic and replace messages TO the routing 1808 * socket indicate a request to configure interfaces, then it will 1809 * be unnecessary as the routing socket will automatically generate 1810 * copies of it. 1811 */ 1812 void 1813 rtm_addr(int cmd, struct ifaddr *ifa) 1814 { 1815 struct ifnet *ifp = ifa->ifa_ifp; 1816 struct mbuf *m; 1817 struct rt_addrinfo info; 1818 struct ifa_msghdr *ifam; 1819 1820 if (rtptable.rtp_count == 0) 1821 return; 1822 1823 memset(&info, 0, sizeof(info)); 1824 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1825 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1826 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1827 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1828 if ((m = rtm_msg1(cmd, &info)) == NULL) 1829 return; 1830 ifam = mtod(m, struct ifa_msghdr *); 1831 ifam->ifam_index = ifp->if_index; 1832 ifam->ifam_metric = ifa->ifa_metric; 1833 ifam->ifam_flags = ifa->ifa_flags; 1834 ifam->ifam_addrs = info.rti_addrs; 1835 ifam->ifam_tableid = ifp->if_rdomain; 1836 1837 route_input(m, NULL, 1838 ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC); 1839 } 1840 1841 /* 1842 * This is called to generate routing socket messages indicating 1843 * network interface arrival and departure. 1844 */ 1845 void 1846 rtm_ifannounce(struct ifnet *ifp, int what) 1847 { 1848 struct if_announcemsghdr *ifan; 1849 struct mbuf *m; 1850 1851 if (rtptable.rtp_count == 0) 1852 return; 1853 m = rtm_msg1(RTM_IFANNOUNCE, NULL); 1854 if (m == NULL) 1855 return; 1856 ifan = mtod(m, struct if_announcemsghdr *); 1857 ifan->ifan_index = ifp->if_index; 1858 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); 1859 ifan->ifan_what = what; 1860 route_input(m, NULL, AF_UNSPEC); 1861 } 1862 1863 #ifdef BFD 1864 /* 1865 * This is used to generate routing socket messages indicating 1866 * the state of a BFD session. 1867 */ 1868 void 1869 rtm_bfd(struct bfd_config *bfd) 1870 { 1871 struct bfd_msghdr *bfdm; 1872 struct sockaddr_bfd sa_bfd; 1873 struct mbuf *m; 1874 struct rt_addrinfo info; 1875 1876 if (rtptable.rtp_count == 0) 1877 return; 1878 memset(&info, 0, sizeof(info)); 1879 info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt); 1880 info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr; 1881 1882 m = rtm_msg1(RTM_BFD, &info); 1883 if (m == NULL) 1884 return; 1885 bfdm = mtod(m, struct bfd_msghdr *); 1886 bfdm->bm_addrs = info.rti_addrs; 1887 1888 KERNEL_ASSERT_LOCKED(); 1889 bfd2sa(bfd->bc_rt, &sa_bfd); 1890 memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd)); 1891 1892 route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family); 1893 } 1894 #endif /* BFD */ 1895 1896 /* 1897 * This is used to generate routing socket messages indicating 1898 * the state of an ieee80211 interface. 1899 */ 1900 void 1901 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie) 1902 { 1903 struct if_ieee80211_msghdr *ifim; 1904 struct mbuf *m; 1905 1906 if (rtptable.rtp_count == 0) 1907 return; 1908 m = rtm_msg1(RTM_80211INFO, NULL); 1909 if (m == NULL) 1910 return; 1911 ifim = mtod(m, struct if_ieee80211_msghdr *); 1912 ifim->ifim_index = ifp->if_index; 1913 ifim->ifim_tableid = ifp->if_rdomain; 1914 1915 memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie)); 1916 route_input(m, NULL, AF_UNSPEC); 1917 } 1918 1919 /* 1920 * This is used to generate routing socket messages indicating 1921 * the address selection proposal from an interface. 1922 */ 1923 void 1924 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags, 1925 uint8_t prio) 1926 { 1927 struct rt_msghdr *rtm; 1928 struct mbuf *m; 1929 1930 m = rtm_msg1(RTM_PROPOSAL, rtinfo); 1931 if (m == NULL) 1932 return; 1933 rtm = mtod(m, struct rt_msghdr *); 1934 rtm->rtm_flags = RTF_DONE | flags; 1935 rtm->rtm_priority = prio; 1936 rtm->rtm_tableid = ifp->if_rdomain; 1937 rtm->rtm_index = ifp->if_index; 1938 rtm->rtm_addrs = rtinfo->rti_addrs; 1939 1940 route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family); 1941 } 1942 1943 /* 1944 * This is used in dumping the kernel table via sysctl(). 1945 */ 1946 int 1947 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id) 1948 { 1949 struct walkarg *w = v; 1950 int error = 0, size; 1951 struct rt_addrinfo info; 1952 struct ifnet *ifp; 1953 #ifdef BFD 1954 struct sockaddr_bfd sa_bfd; 1955 #endif 1956 struct sockaddr_rtlabel sa_rl; 1957 struct sockaddr_in6 sa_mask; 1958 1959 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1960 return 0; 1961 if (w->w_op == NET_RT_DUMP && w->w_arg) { 1962 u_int8_t prio = w->w_arg & RTP_MASK; 1963 if (w->w_arg < 0) { 1964 prio = (-w->w_arg) & RTP_MASK; 1965 /* Show all routes that are not this priority */ 1966 if (prio == (rt->rt_priority & RTP_MASK)) 1967 return 0; 1968 } else { 1969 if (prio != (rt->rt_priority & RTP_MASK) && 1970 prio != RTP_ANY) 1971 return 0; 1972 } 1973 } 1974 bzero(&info, sizeof(info)); 1975 info.rti_info[RTAX_DST] = rt_key(rt); 1976 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1977 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1978 ifp = if_get(rt->rt_ifidx); 1979 if (ifp != NULL) { 1980 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1981 info.rti_info[RTAX_IFA] = 1982 rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family); 1983 if (info.rti_info[RTAX_IFA] == NULL) 1984 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1985 if (ifp->if_flags & IFF_POINTOPOINT) 1986 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 1987 } 1988 if_put(ifp); 1989 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1990 #ifdef BFD 1991 if (rt->rt_flags & RTF_BFD) { 1992 KERNEL_ASSERT_LOCKED(); 1993 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 1994 } 1995 #endif 1996 #ifdef MPLS 1997 if (rt->rt_flags & RTF_MPLS) { 1998 struct sockaddr_mpls sa_mpls; 1999 2000 bzero(&sa_mpls, sizeof(sa_mpls)); 2001 sa_mpls.smpls_family = AF_MPLS; 2002 sa_mpls.smpls_len = sizeof(sa_mpls); 2003 sa_mpls.smpls_label = ((struct rt_mpls *) 2004 rt->rt_llinfo)->mpls_label; 2005 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 2006 info.rti_mpls = ((struct rt_mpls *) 2007 rt->rt_llinfo)->mpls_operation; 2008 } 2009 #endif 2010 2011 size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w); 2012 if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) { 2013 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 2014 2015 rtm->rtm_pid = curproc->p_p->ps_pid; 2016 rtm->rtm_flags = RTF_DONE | rt->rt_flags; 2017 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 2018 rtm_getmetrics(rt, &rtm->rtm_rmx); 2019 /* Do not account the routing table's reference. */ 2020 rtm->rtm_rmx.rmx_refcnt = refcnt_read(&rt->rt_refcnt) - 1; 2021 rtm->rtm_index = rt->rt_ifidx; 2022 rtm->rtm_addrs = info.rti_addrs; 2023 rtm->rtm_tableid = id; 2024 #ifdef MPLS 2025 rtm->rtm_mpls = info.rti_mpls; 2026 #endif 2027 if ((error = copyout(rtm, w->w_where, size)) != 0) 2028 w->w_where = NULL; 2029 else 2030 w->w_where += size; 2031 } 2032 return (error); 2033 } 2034 2035 int 2036 sysctl_iflist(int af, struct walkarg *w) 2037 { 2038 struct ifnet *ifp; 2039 struct ifaddr *ifa; 2040 struct rt_addrinfo info; 2041 int len, error = 0; 2042 2043 bzero(&info, sizeof(info)); 2044 TAILQ_FOREACH(ifp, &ifnetlist, if_list) { 2045 if (w->w_arg && w->w_arg != ifp->if_index) 2046 continue; 2047 /* Copy the link-layer address first */ 2048 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 2049 len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w); 2050 if (w->w_where && w->w_tmem && w->w_needed <= w->w_given) { 2051 struct if_msghdr *ifm; 2052 2053 ifm = (struct if_msghdr *)w->w_tmem; 2054 ifm->ifm_index = ifp->if_index; 2055 ifm->ifm_tableid = ifp->if_rdomain; 2056 ifm->ifm_flags = ifp->if_flags; 2057 if_getdata(ifp, &ifm->ifm_data); 2058 ifm->ifm_addrs = info.rti_addrs; 2059 error = copyout(ifm, w->w_where, len); 2060 if (error) 2061 return (error); 2062 w->w_where += len; 2063 } 2064 info.rti_info[RTAX_IFP] = NULL; 2065 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 2066 KASSERT(ifa->ifa_addr->sa_family != AF_LINK); 2067 if (af && af != ifa->ifa_addr->sa_family) 2068 continue; 2069 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 2070 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 2071 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 2072 len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w); 2073 if (w->w_where && w->w_tmem && 2074 w->w_needed <= w->w_given) { 2075 struct ifa_msghdr *ifam; 2076 2077 ifam = (struct ifa_msghdr *)w->w_tmem; 2078 ifam->ifam_index = ifa->ifa_ifp->if_index; 2079 ifam->ifam_flags = ifa->ifa_flags; 2080 ifam->ifam_metric = ifa->ifa_metric; 2081 ifam->ifam_addrs = info.rti_addrs; 2082 error = copyout(w->w_tmem, w->w_where, len); 2083 if (error) 2084 return (error); 2085 w->w_where += len; 2086 } 2087 } 2088 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 2089 info.rti_info[RTAX_BRD] = NULL; 2090 } 2091 return (0); 2092 } 2093 2094 int 2095 sysctl_ifnames(struct walkarg *w) 2096 { 2097 struct if_nameindex_msg ifn; 2098 struct ifnet *ifp; 2099 int error = 0; 2100 2101 /* XXX ignore tableid for now */ 2102 TAILQ_FOREACH(ifp, &ifnetlist, if_list) { 2103 if (w->w_arg && w->w_arg != ifp->if_index) 2104 continue; 2105 w->w_needed += sizeof(ifn); 2106 if (w->w_where && w->w_needed <= w->w_given) { 2107 2108 memset(&ifn, 0, sizeof(ifn)); 2109 ifn.if_index = ifp->if_index; 2110 strlcpy(ifn.if_name, ifp->if_xname, 2111 sizeof(ifn.if_name)); 2112 error = copyout(&ifn, w->w_where, sizeof(ifn)); 2113 if (error) 2114 return (error); 2115 w->w_where += sizeof(ifn); 2116 } 2117 } 2118 2119 return (0); 2120 } 2121 2122 int 2123 sysctl_source(int af, u_int tableid, struct walkarg *w) 2124 { 2125 struct sockaddr *sa; 2126 int size, error = 0; 2127 2128 sa = rtable_getsource(tableid, af); 2129 if (sa) { 2130 switch (sa->sa_family) { 2131 case AF_INET: 2132 size = sizeof(struct sockaddr_in); 2133 break; 2134 #ifdef INET6 2135 case AF_INET6: 2136 size = sizeof(struct sockaddr_in6); 2137 break; 2138 #endif 2139 default: 2140 return (0); 2141 } 2142 w->w_needed += size; 2143 if (w->w_where && w->w_needed <= w->w_given) { 2144 if ((error = copyout(sa, w->w_where, size))) 2145 return (error); 2146 w->w_where += size; 2147 } 2148 } 2149 return (0); 2150 } 2151 2152 int 2153 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new, 2154 size_t newlen) 2155 { 2156 int i, error = EINVAL; 2157 u_char af; 2158 struct walkarg w; 2159 struct rt_tableinfo tableinfo; 2160 u_int tableid = 0; 2161 2162 if (new) 2163 return (EPERM); 2164 if (namelen < 3 || namelen > 4) 2165 return (EINVAL); 2166 af = name[0]; 2167 bzero(&w, sizeof(w)); 2168 w.w_where = where; 2169 w.w_given = *given; 2170 w.w_op = name[1]; 2171 w.w_arg = name[2]; 2172 2173 if (namelen == 4) { 2174 tableid = name[3]; 2175 if (!rtable_exists(tableid)) 2176 return (ENOENT); 2177 } else 2178 tableid = curproc->p_p->ps_rtableid; 2179 2180 switch (w.w_op) { 2181 case NET_RT_DUMP: 2182 case NET_RT_FLAGS: 2183 NET_LOCK_SHARED(); 2184 for (i = 1; i <= AF_MAX; i++) { 2185 if (af != 0 && af != i) 2186 continue; 2187 2188 error = rtable_walk(tableid, i, NULL, sysctl_dumpentry, 2189 &w); 2190 if (error == EAFNOSUPPORT) 2191 error = 0; 2192 if (error) 2193 break; 2194 } 2195 NET_UNLOCK_SHARED(); 2196 break; 2197 2198 case NET_RT_IFLIST: 2199 NET_LOCK_SHARED(); 2200 error = sysctl_iflist(af, &w); 2201 NET_UNLOCK_SHARED(); 2202 break; 2203 2204 case NET_RT_STATS: 2205 return (sysctl_rtable_rtstat(where, given, new)); 2206 case NET_RT_TABLE: 2207 tableid = w.w_arg; 2208 if (!rtable_exists(tableid)) 2209 return (ENOENT); 2210 memset(&tableinfo, 0, sizeof tableinfo); 2211 tableinfo.rti_tableid = tableid; 2212 tableinfo.rti_domainid = rtable_l2(tableid); 2213 error = sysctl_rdstruct(where, given, new, 2214 &tableinfo, sizeof(tableinfo)); 2215 return (error); 2216 case NET_RT_IFNAMES: 2217 NET_LOCK_SHARED(); 2218 error = sysctl_ifnames(&w); 2219 NET_UNLOCK_SHARED(); 2220 break; 2221 case NET_RT_SOURCE: 2222 tableid = w.w_arg; 2223 if (!rtable_exists(tableid)) 2224 return (ENOENT); 2225 NET_LOCK_SHARED(); 2226 for (i = 1; i <= AF_MAX; i++) { 2227 if (af != 0 && af != i) 2228 continue; 2229 2230 error = sysctl_source(i, tableid, &w); 2231 if (error == EAFNOSUPPORT) 2232 error = 0; 2233 if (error) 2234 break; 2235 } 2236 NET_UNLOCK_SHARED(); 2237 break; 2238 } 2239 free(w.w_tmem, M_RTABLE, w.w_tmemsize); 2240 if (where) { 2241 *given = w.w_where - (caddr_t)where; 2242 if (w.w_needed > w.w_given) 2243 return (ENOMEM); 2244 } else if (w.w_needed == 0) { 2245 *given = 0; 2246 } else { 2247 *given = roundup(w.w_needed + MAX(w.w_needed / 10, 1024), 2248 PAGE_SIZE); 2249 } 2250 return (error); 2251 } 2252 2253 int 2254 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp) 2255 { 2256 extern struct cpumem *rtcounters; 2257 uint64_t counters[rts_ncounters]; 2258 struct rtstat rtstat; 2259 uint32_t *words = (uint32_t *)&rtstat; 2260 int i; 2261 2262 CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t))); 2263 memset(&rtstat, 0, sizeof rtstat); 2264 counters_read(rtcounters, counters, nitems(counters), NULL); 2265 2266 for (i = 0; i < nitems(counters); i++) 2267 words[i] = (uint32_t)counters[i]; 2268 2269 return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat))); 2270 } 2271 2272 int 2273 rtm_validate_proposal(struct rt_addrinfo *info) 2274 { 2275 if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC | 2276 RTA_SEARCH)) { 2277 return -1; 2278 } 2279 2280 if (ISSET(info->rti_addrs, RTA_NETMASK)) { 2281 const struct sockaddr *sa = info->rti_info[RTAX_NETMASK]; 2282 if (sa == NULL) 2283 return -1; 2284 switch (sa->sa_family) { 2285 case AF_INET: 2286 if (sa->sa_len != sizeof(struct sockaddr_in)) 2287 return -1; 2288 break; 2289 case AF_INET6: 2290 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2291 return -1; 2292 break; 2293 default: 2294 return -1; 2295 } 2296 } 2297 2298 if (ISSET(info->rti_addrs, RTA_IFA)) { 2299 const struct sockaddr *sa = info->rti_info[RTAX_IFA]; 2300 if (sa == NULL) 2301 return -1; 2302 switch (sa->sa_family) { 2303 case AF_INET: 2304 if (sa->sa_len != sizeof(struct sockaddr_in)) 2305 return -1; 2306 break; 2307 case AF_INET6: 2308 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2309 return -1; 2310 break; 2311 default: 2312 return -1; 2313 } 2314 } 2315 2316 if (ISSET(info->rti_addrs, RTA_DNS)) { 2317 const struct sockaddr_rtdns *rtdns = 2318 (const struct sockaddr_rtdns *)info->rti_info[RTAX_DNS]; 2319 if (rtdns == NULL) 2320 return -1; 2321 if (rtdns->sr_len > sizeof(*rtdns)) 2322 return -1; 2323 if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns)) 2324 return -1; 2325 switch (rtdns->sr_family) { 2326 case AF_INET: 2327 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2328 sr_dns)) % sizeof(struct in_addr) != 0) 2329 return -1; 2330 break; 2331 #ifdef INET6 2332 case AF_INET6: 2333 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2334 sr_dns)) % sizeof(struct in6_addr) != 0) 2335 return -1; 2336 break; 2337 #endif 2338 default: 2339 return -1; 2340 } 2341 } 2342 2343 if (ISSET(info->rti_addrs, RTA_STATIC)) { 2344 const struct sockaddr_rtstatic *rtstatic = (const struct 2345 sockaddr_rtstatic *)info->rti_info[RTAX_STATIC]; 2346 if (rtstatic == NULL) 2347 return -1; 2348 if (rtstatic->sr_len > sizeof(*rtstatic)) 2349 return -1; 2350 if (rtstatic->sr_len <= 2351 offsetof(struct sockaddr_rtstatic, sr_static)) 2352 return -1; 2353 } 2354 2355 if (ISSET(info->rti_addrs, RTA_SEARCH)) { 2356 const struct sockaddr_rtsearch *rtsearch = (const struct 2357 sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH]; 2358 if (rtsearch == NULL) 2359 return -1; 2360 if (rtsearch->sr_len > sizeof(*rtsearch)) 2361 return -1; 2362 if (rtsearch->sr_len <= 2363 offsetof(struct sockaddr_rtsearch, sr_search)) 2364 return -1; 2365 } 2366 2367 return 0; 2368 } 2369 2370 int 2371 rt_setsource(unsigned int rtableid, const struct sockaddr *src) 2372 { 2373 struct ifaddr *ifa; 2374 /* 2375 * If source address is 0.0.0.0 or :: 2376 * use automatic source selection 2377 */ 2378 switch(src->sa_family) { 2379 case AF_INET: 2380 if(satosin_const(src)->sin_addr.s_addr == INADDR_ANY) { 2381 rtable_setsource(rtableid, AF_INET, NULL); 2382 return (0); 2383 } 2384 break; 2385 #ifdef INET6 2386 case AF_INET6: 2387 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6_const(src)->sin6_addr)) { 2388 rtable_setsource(rtableid, AF_INET6, NULL); 2389 return (0); 2390 } 2391 break; 2392 #endif 2393 default: 2394 return (EAFNOSUPPORT); 2395 } 2396 2397 /* 2398 * Check if source address is assigned to an interface in the 2399 * same rdomain 2400 */ 2401 if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL) 2402 return (EINVAL); 2403 2404 return rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr); 2405 } 2406 2407 /* 2408 * Definitions of protocols supported in the ROUTE domain. 2409 */ 2410 2411 const struct pr_usrreqs route_usrreqs = { 2412 .pru_attach = route_attach, 2413 .pru_detach = route_detach, 2414 .pru_disconnect = route_disconnect, 2415 .pru_shutdown = route_shutdown, 2416 .pru_rcvd = route_rcvd, 2417 .pru_send = route_send, 2418 .pru_sockaddr = route_sockaddr, 2419 .pru_peeraddr = route_peeraddr, 2420 }; 2421 2422 const struct protosw routesw[] = { 2423 { 2424 .pr_type = SOCK_RAW, 2425 .pr_domain = &routedomain, 2426 .pr_flags = PR_ATOMIC|PR_ADDR|PR_WANTRCVD, 2427 .pr_ctloutput = route_ctloutput, 2428 .pr_usrreqs = &route_usrreqs, 2429 .pr_init = route_prinit, 2430 .pr_sysctl = sysctl_rtable 2431 } 2432 }; 2433 2434 const struct domain routedomain = { 2435 .dom_family = PF_ROUTE, 2436 .dom_name = "route", 2437 .dom_init = route_init, 2438 .dom_protosw = routesw, 2439 .dom_protoswNPROTOSW = &routesw[nitems(routesw)] 2440 }; 2441