1 /* $OpenBSD: rtsock.c,v 1.323 2021/12/16 09:33:56 claudio Exp $ */ 2 /* $NetBSD: rtsock.c,v 1.18 1996/03/29 00:32:10 cgd Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1988, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)rtsock.c 8.6 (Berkeley) 2/11/95 62 */ 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/proc.h> 67 #include <sys/sysctl.h> 68 #include <sys/mbuf.h> 69 #include <sys/socket.h> 70 #include <sys/socketvar.h> 71 #include <sys/domain.h> 72 #include <sys/pool.h> 73 #include <sys/protosw.h> 74 #include <sys/srp.h> 75 76 #include <net/if.h> 77 #include <net/if_dl.h> 78 #include <net/if_var.h> 79 #include <net/route.h> 80 81 #include <netinet/in.h> 82 83 #ifdef MPLS 84 #include <netmpls/mpls.h> 85 #endif 86 #ifdef IPSEC 87 #include <netinet/ip_ipsp.h> 88 #include <net/if_enc.h> 89 #endif 90 #ifdef BFD 91 #include <net/bfd.h> 92 #endif 93 94 #include <sys/stdarg.h> 95 #include <sys/kernel.h> 96 #include <sys/timeout.h> 97 98 #define ROUTESNDQ 8192 99 #define ROUTERCVQ 8192 100 101 const struct sockaddr route_src = { 2, PF_ROUTE, }; 102 103 struct walkarg { 104 int w_op, w_arg, w_given, w_needed, w_tmemsize; 105 caddr_t w_where, w_tmem; 106 }; 107 108 void route_prinit(void); 109 void rcb_ref(void *, void *); 110 void rcb_unref(void *, void *); 111 int route_output(struct mbuf *, struct socket *, struct sockaddr *, 112 struct mbuf *); 113 int route_ctloutput(int, struct socket *, int, int, struct mbuf *); 114 int route_usrreq(struct socket *, int, struct mbuf *, struct mbuf *, 115 struct mbuf *, struct proc *); 116 void route_input(struct mbuf *m0, struct socket *, sa_family_t); 117 int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); 118 int route_cleargateway(struct rtentry *, void *, unsigned int); 119 void rtm_senddesync_timer(void *); 120 void rtm_senddesync(struct socket *); 121 int rtm_sendup(struct socket *, struct mbuf *); 122 123 int rtm_getifa(struct rt_addrinfo *, unsigned int); 124 int rtm_output(struct rt_msghdr *, struct rtentry **, struct rt_addrinfo *, 125 uint8_t, unsigned int); 126 struct rt_msghdr *rtm_report(struct rtentry *, u_char, int, int); 127 struct mbuf *rtm_msg1(int, struct rt_addrinfo *); 128 int rtm_msg2(int, int, struct rt_addrinfo *, caddr_t, 129 struct walkarg *); 130 int rtm_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); 131 int rtm_validate_proposal(struct rt_addrinfo *); 132 void rtm_setmetrics(u_long, const struct rt_metrics *, 133 struct rt_kmetrics *); 134 void rtm_getmetrics(const struct rt_kmetrics *, 135 struct rt_metrics *); 136 137 int sysctl_iflist(int, struct walkarg *); 138 int sysctl_ifnames(struct walkarg *); 139 int sysctl_rtable_rtstat(void *, size_t *, void *); 140 141 int rt_setsource(unsigned int, struct sockaddr *); 142 143 /* 144 * Locks used to protect struct members 145 * I immutable after creation 146 * s solock 147 */ 148 struct rtpcb { 149 struct socket *rop_socket; /* [I] */ 150 151 SRPL_ENTRY(rtpcb) rop_list; 152 struct refcnt rop_refcnt; 153 struct timeout rop_timeout; 154 unsigned int rop_msgfilter; /* [s] */ 155 unsigned int rop_flagfilter; /* [s] */ 156 unsigned int rop_flags; /* [s] */ 157 u_int rop_rtableid; /* [s] */ 158 unsigned short rop_proto; /* [I] */ 159 u_char rop_priority; /* [s] */ 160 }; 161 #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb) 162 163 struct rtptable { 164 SRPL_HEAD(, rtpcb) rtp_list; 165 struct srpl_rc rtp_rc; 166 struct rwlock rtp_lk; 167 unsigned int rtp_count; 168 }; 169 170 struct pool rtpcb_pool; 171 struct rtptable rtptable; 172 173 /* 174 * These flags and timeout are used for indicating to userland (via a 175 * RTM_DESYNC msg) when the route socket has overflowed and messages 176 * have been lost. 177 */ 178 #define ROUTECB_FLAG_DESYNC 0x1 /* Route socket out of memory */ 179 #define ROUTECB_FLAG_FLUSH 0x2 /* Wait until socket is empty before 180 queueing more packets */ 181 182 #define ROUTE_DESYNC_RESEND_TIMEOUT 200 /* In ms */ 183 184 void 185 route_prinit(void) 186 { 187 srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL); 188 rw_init(&rtptable.rtp_lk, "rtsock"); 189 SRPL_INIT(&rtptable.rtp_list); 190 pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0, 191 IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL); 192 } 193 194 void 195 rcb_ref(void *null, void *v) 196 { 197 struct rtpcb *rop = v; 198 199 refcnt_take(&rop->rop_refcnt); 200 } 201 202 void 203 rcb_unref(void *null, void *v) 204 { 205 struct rtpcb *rop = v; 206 207 refcnt_rele_wake(&rop->rop_refcnt); 208 } 209 210 int 211 route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 212 struct mbuf *control, struct proc *p) 213 { 214 struct rtpcb *rop; 215 int error = 0; 216 217 if (req == PRU_CONTROL) 218 return (EOPNOTSUPP); 219 220 soassertlocked(so); 221 222 if (control && control->m_len) { 223 error = EOPNOTSUPP; 224 goto release; 225 } 226 227 rop = sotortpcb(so); 228 if (rop == NULL) { 229 error = EINVAL; 230 goto release; 231 } 232 233 switch (req) { 234 /* no connect, bind, accept. Socket is connected from the start */ 235 case PRU_CONNECT: 236 case PRU_BIND: 237 case PRU_CONNECT2: 238 case PRU_LISTEN: 239 case PRU_ACCEPT: 240 error = EOPNOTSUPP; 241 break; 242 243 case PRU_DISCONNECT: 244 case PRU_ABORT: 245 soisdisconnected(so); 246 break; 247 case PRU_SHUTDOWN: 248 socantsendmore(so); 249 break; 250 case PRU_SENSE: 251 /* stat: don't bother with a blocksize. */ 252 break; 253 254 /* minimal support, just implement a fake peer address */ 255 case PRU_SOCKADDR: 256 error = EINVAL; 257 break; 258 case PRU_PEERADDR: 259 bcopy(&route_src, mtod(nam, caddr_t), route_src.sa_len); 260 nam->m_len = route_src.sa_len; 261 break; 262 263 case PRU_RCVD: 264 /* 265 * If we are in a FLUSH state, check if the buffer is 266 * empty so that we can clear the flag. 267 */ 268 if (((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) && 269 ((sbspace(rop->rop_socket, &rop->rop_socket->so_rcv) == 270 rop->rop_socket->so_rcv.sb_hiwat))) 271 rop->rop_flags &= ~ROUTECB_FLAG_FLUSH; 272 break; 273 274 case PRU_RCVOOB: 275 case PRU_SENDOOB: 276 error = EOPNOTSUPP; 277 break; 278 case PRU_SEND: 279 if (nam) { 280 error = EISCONN; 281 break; 282 } 283 error = (*so->so_proto->pr_output)(m, so, NULL, NULL); 284 m = NULL; 285 break; 286 default: 287 panic("route_usrreq"); 288 } 289 290 release: 291 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 292 m_freem(control); 293 m_freem(m); 294 } 295 return (error); 296 } 297 298 int 299 route_attach(struct socket *so, int proto) 300 { 301 struct rtpcb *rop; 302 int error; 303 304 error = soreserve(so, ROUTESNDQ, ROUTERCVQ); 305 if (error) 306 return (error); 307 /* 308 * use the rawcb but allocate a rtpcb, this 309 * code does not care about the additional fields 310 * and works directly on the raw socket. 311 */ 312 rop = pool_get(&rtpcb_pool, PR_WAITOK|PR_ZERO); 313 so->so_pcb = rop; 314 /* Init the timeout structure */ 315 timeout_set_proc(&rop->rop_timeout, rtm_senddesync_timer, so); 316 refcnt_init(&rop->rop_refcnt); 317 318 rop->rop_socket = so; 319 rop->rop_proto = proto; 320 321 rop->rop_rtableid = curproc->p_p->ps_rtableid; 322 323 soisconnected(so); 324 so->so_options |= SO_USELOOPBACK; 325 326 rw_enter(&rtptable.rtp_lk, RW_WRITE); 327 SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, 328 rop_list); 329 rtptable.rtp_count++; 330 rw_exit(&rtptable.rtp_lk); 331 332 return (0); 333 } 334 335 int 336 route_detach(struct socket *so) 337 { 338 struct rtpcb *rop; 339 340 soassertlocked(so); 341 342 rop = sotortpcb(so); 343 if (rop == NULL) 344 return (EINVAL); 345 346 rw_enter(&rtptable.rtp_lk, RW_WRITE); 347 348 rtptable.rtp_count--; 349 SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb, 350 rop_list); 351 rw_exit(&rtptable.rtp_lk); 352 353 sounlock(so, SL_LOCKED); 354 355 /* wait for all references to drop */ 356 refcnt_finalize(&rop->rop_refcnt, "rtsockrefs"); 357 timeout_del_barrier(&rop->rop_timeout); 358 359 solock(so); 360 361 so->so_pcb = NULL; 362 KASSERT((so->so_state & SS_NOFDREF) == 0); 363 pool_put(&rtpcb_pool, rop); 364 365 return (0); 366 } 367 368 int 369 route_ctloutput(int op, struct socket *so, int level, int optname, 370 struct mbuf *m) 371 { 372 struct rtpcb *rop = sotortpcb(so); 373 int error = 0; 374 unsigned int tid, prio; 375 376 if (level != AF_ROUTE) 377 return (EINVAL); 378 379 switch (op) { 380 case PRCO_SETOPT: 381 switch (optname) { 382 case ROUTE_MSGFILTER: 383 if (m == NULL || m->m_len != sizeof(unsigned int)) 384 error = EINVAL; 385 else 386 rop->rop_msgfilter = *mtod(m, unsigned int *); 387 break; 388 case ROUTE_TABLEFILTER: 389 if (m == NULL || m->m_len != sizeof(unsigned int)) { 390 error = EINVAL; 391 break; 392 } 393 tid = *mtod(m, unsigned int *); 394 if (tid != RTABLE_ANY && !rtable_exists(tid)) 395 error = ENOENT; 396 else 397 rop->rop_rtableid = tid; 398 break; 399 case ROUTE_PRIOFILTER: 400 if (m == NULL || m->m_len != sizeof(unsigned int)) { 401 error = EINVAL; 402 break; 403 } 404 prio = *mtod(m, unsigned int *); 405 if (prio > RTP_MAX) 406 error = EINVAL; 407 else 408 rop->rop_priority = prio; 409 break; 410 case ROUTE_FLAGFILTER: 411 if (m == NULL || m->m_len != sizeof(unsigned int)) 412 error = EINVAL; 413 else 414 rop->rop_flagfilter = *mtod(m, unsigned int *); 415 break; 416 default: 417 error = ENOPROTOOPT; 418 break; 419 } 420 break; 421 case PRCO_GETOPT: 422 switch (optname) { 423 case ROUTE_MSGFILTER: 424 m->m_len = sizeof(unsigned int); 425 *mtod(m, unsigned int *) = rop->rop_msgfilter; 426 break; 427 case ROUTE_TABLEFILTER: 428 m->m_len = sizeof(unsigned int); 429 *mtod(m, unsigned int *) = rop->rop_rtableid; 430 break; 431 case ROUTE_PRIOFILTER: 432 m->m_len = sizeof(unsigned int); 433 *mtod(m, unsigned int *) = rop->rop_priority; 434 break; 435 case ROUTE_FLAGFILTER: 436 m->m_len = sizeof(unsigned int); 437 *mtod(m, unsigned int *) = rop->rop_flagfilter; 438 break; 439 default: 440 error = ENOPROTOOPT; 441 break; 442 } 443 } 444 return (error); 445 } 446 447 void 448 rtm_senddesync_timer(void *xso) 449 { 450 struct socket *so = xso; 451 int s; 452 453 s = solock(so); 454 rtm_senddesync(so); 455 sounlock(so, s); 456 } 457 458 void 459 rtm_senddesync(struct socket *so) 460 { 461 struct rtpcb *rop = sotortpcb(so); 462 struct mbuf *desync_mbuf; 463 464 soassertlocked(so); 465 466 /* 467 * Dying socket is disconnected by upper layer and there is 468 * no reason to send packet. Also we shouldn't reschedule 469 * timeout(9), otherwise timeout_del_barrier(9) can't help us. 470 */ 471 if ((so->so_state & SS_ISCONNECTED) == 0 || 472 (so->so_state & SS_CANTRCVMORE)) 473 return; 474 475 /* If we are in a DESYNC state, try to send a RTM_DESYNC packet */ 476 if ((rop->rop_flags & ROUTECB_FLAG_DESYNC) == 0) 477 return; 478 479 /* 480 * If we fail to alloc memory or if sbappendaddr() 481 * fails, re-add timeout and try again. 482 */ 483 desync_mbuf = rtm_msg1(RTM_DESYNC, NULL); 484 if (desync_mbuf != NULL) { 485 if (sbappendaddr(so, &so->so_rcv, &route_src, 486 desync_mbuf, NULL) != 0) { 487 rop->rop_flags &= ~ROUTECB_FLAG_DESYNC; 488 sorwakeup(rop->rop_socket); 489 return; 490 } 491 m_freem(desync_mbuf); 492 } 493 /* Re-add timeout to try sending msg again */ 494 timeout_add_msec(&rop->rop_timeout, ROUTE_DESYNC_RESEND_TIMEOUT); 495 } 496 497 void 498 route_input(struct mbuf *m0, struct socket *so0, sa_family_t sa_family) 499 { 500 struct socket *so; 501 struct rtpcb *rop; 502 struct rt_msghdr *rtm; 503 struct mbuf *m = m0; 504 struct srp_ref sr; 505 int s; 506 507 /* ensure that we can access the rtm_type via mtod() */ 508 if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { 509 m_freem(m); 510 return; 511 } 512 513 SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) { 514 /* 515 * If route socket is bound to an address family only send 516 * messages that match the address family. Address family 517 * agnostic messages are always sent. 518 */ 519 if (sa_family != AF_UNSPEC && rop->rop_proto != AF_UNSPEC && 520 rop->rop_proto != sa_family) 521 continue; 522 523 524 so = rop->rop_socket; 525 s = solock(so); 526 527 /* 528 * Check to see if we don't want our own messages and 529 * if we can receive anything. 530 */ 531 if ((so0 == so && !(so0->so_options & SO_USELOOPBACK)) || 532 !(so->so_state & SS_ISCONNECTED) || 533 (so->so_state & SS_CANTRCVMORE)) 534 goto next; 535 536 /* filter messages that the process does not want */ 537 rtm = mtod(m, struct rt_msghdr *); 538 /* but RTM_DESYNC can't be filtered */ 539 if (rtm->rtm_type != RTM_DESYNC) { 540 if (rop->rop_msgfilter != 0 && 541 !(rop->rop_msgfilter & (1 << rtm->rtm_type))) 542 goto next; 543 if (ISSET(rop->rop_flagfilter, rtm->rtm_flags)) 544 goto next; 545 } 546 switch (rtm->rtm_type) { 547 case RTM_IFANNOUNCE: 548 case RTM_DESYNC: 549 /* no tableid */ 550 break; 551 case RTM_RESOLVE: 552 case RTM_NEWADDR: 553 case RTM_DELADDR: 554 case RTM_IFINFO: 555 case RTM_80211INFO: 556 case RTM_BFD: 557 /* check against rdomain id */ 558 if (rop->rop_rtableid != RTABLE_ANY && 559 rtable_l2(rop->rop_rtableid) != rtm->rtm_tableid) 560 goto next; 561 break; 562 default: 563 if (rop->rop_priority != 0 && 564 rop->rop_priority < rtm->rtm_priority) 565 goto next; 566 /* check against rtable id */ 567 if (rop->rop_rtableid != RTABLE_ANY && 568 rop->rop_rtableid != rtm->rtm_tableid) 569 goto next; 570 break; 571 } 572 573 /* 574 * Check to see if the flush flag is set. If so, don't queue 575 * any more messages until the flag is cleared. 576 */ 577 if ((rop->rop_flags & ROUTECB_FLAG_FLUSH) != 0) 578 goto next; 579 580 rtm_sendup(so, m); 581 next: 582 sounlock(so, s); 583 } 584 SRPL_LEAVE(&sr); 585 586 m_freem(m); 587 } 588 589 int 590 rtm_sendup(struct socket *so, struct mbuf *m0) 591 { 592 struct rtpcb *rop = sotortpcb(so); 593 struct mbuf *m; 594 595 soassertlocked(so); 596 597 m = m_copym(m0, 0, M_COPYALL, M_NOWAIT); 598 if (m == NULL) 599 return (ENOMEM); 600 601 if (sbspace(so, &so->so_rcv) < (2 * MSIZE) || 602 sbappendaddr(so, &so->so_rcv, &route_src, m, NULL) == 0) { 603 /* Flag socket as desync'ed and flush required */ 604 rop->rop_flags |= ROUTECB_FLAG_DESYNC | ROUTECB_FLAG_FLUSH; 605 rtm_senddesync(so); 606 m_freem(m); 607 return (ENOBUFS); 608 } 609 610 sorwakeup(so); 611 return (0); 612 } 613 614 struct rt_msghdr * 615 rtm_report(struct rtentry *rt, u_char type, int seq, int tableid) 616 { 617 struct rt_msghdr *rtm; 618 struct rt_addrinfo info; 619 struct sockaddr_rtlabel sa_rl; 620 struct sockaddr_in6 sa_mask; 621 #ifdef BFD 622 struct sockaddr_bfd sa_bfd; 623 #endif 624 struct ifnet *ifp = NULL; 625 int len; 626 627 bzero(&info, sizeof(info)); 628 info.rti_info[RTAX_DST] = rt_key(rt); 629 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 630 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 631 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 632 #ifdef BFD 633 if (rt->rt_flags & RTF_BFD) { 634 KERNEL_LOCK(); 635 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 636 KERNEL_UNLOCK(); 637 } 638 #endif 639 #ifdef MPLS 640 if (rt->rt_flags & RTF_MPLS) { 641 struct sockaddr_mpls sa_mpls; 642 643 bzero(&sa_mpls, sizeof(sa_mpls)); 644 sa_mpls.smpls_family = AF_MPLS; 645 sa_mpls.smpls_len = sizeof(sa_mpls); 646 sa_mpls.smpls_label = ((struct rt_mpls *) 647 rt->rt_llinfo)->mpls_label; 648 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 649 info.rti_mpls = ((struct rt_mpls *) 650 rt->rt_llinfo)->mpls_operation; 651 } 652 #endif 653 ifp = if_get(rt->rt_ifidx); 654 if (ifp != NULL) { 655 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 656 info.rti_info[RTAX_IFA] = 657 rtable_getsource(tableid, info.rti_info[RTAX_DST]->sa_family); 658 if (info.rti_info[RTAX_IFA] == NULL) 659 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 660 if (ifp->if_flags & IFF_POINTOPOINT) 661 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 662 } 663 if_put(ifp); 664 /* RTAX_GENMASK, RTAX_AUTHOR, RTAX_SRCMASK ignored */ 665 666 /* build new route message */ 667 len = rtm_msg2(type, RTM_VERSION, &info, NULL, NULL); 668 rtm = malloc(len, M_RTABLE, M_WAITOK | M_ZERO); 669 670 rtm_msg2(type, RTM_VERSION, &info, (caddr_t)rtm, NULL); 671 rtm->rtm_type = type; 672 rtm->rtm_index = rt->rt_ifidx; 673 rtm->rtm_tableid = tableid; 674 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 675 rtm->rtm_flags = rt->rt_flags; 676 rtm->rtm_pid = curproc->p_p->ps_pid; 677 rtm->rtm_seq = seq; 678 rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 679 rtm->rtm_addrs = info.rti_addrs; 680 #ifdef MPLS 681 rtm->rtm_mpls = info.rti_mpls; 682 #endif 683 return rtm; 684 } 685 686 int 687 route_output(struct mbuf *m, struct socket *so, struct sockaddr *dstaddr, 688 struct mbuf *control) 689 { 690 struct rt_msghdr *rtm = NULL; 691 struct rtentry *rt = NULL; 692 struct rt_addrinfo info; 693 struct ifnet *ifp; 694 int len, seq, useloopback, error = 0; 695 u_int tableid; 696 u_int8_t prio; 697 u_char vers, type; 698 699 if (m == NULL || ((m->m_len < sizeof(int32_t)) && 700 (m = m_pullup(m, sizeof(int32_t))) == 0)) 701 return (ENOBUFS); 702 if ((m->m_flags & M_PKTHDR) == 0) 703 panic("route_output"); 704 705 useloopback = so->so_options & SO_USELOOPBACK; 706 707 /* 708 * The socket can't be closed concurrently because the file 709 * descriptor reference is still held. 710 */ 711 712 sounlock(so, SL_LOCKED); 713 714 len = m->m_pkthdr.len; 715 if (len < offsetof(struct rt_msghdr, rtm_hdrlen) + 1 || 716 len != mtod(m, struct rt_msghdr *)->rtm_msglen) { 717 error = EINVAL; 718 goto fail; 719 } 720 vers = mtod(m, struct rt_msghdr *)->rtm_version; 721 switch (vers) { 722 case RTM_VERSION: 723 if (len < sizeof(struct rt_msghdr)) { 724 error = EINVAL; 725 goto fail; 726 } 727 if (len > RTM_MAXSIZE) { 728 error = EMSGSIZE; 729 goto fail; 730 } 731 rtm = malloc(len, M_RTABLE, M_WAITOK); 732 m_copydata(m, 0, len, rtm); 733 break; 734 default: 735 error = EPROTONOSUPPORT; 736 goto fail; 737 } 738 739 /* Verify that the caller is sending an appropriate message early */ 740 switch (rtm->rtm_type) { 741 case RTM_ADD: 742 case RTM_DELETE: 743 case RTM_GET: 744 case RTM_CHANGE: 745 case RTM_PROPOSAL: 746 case RTM_SOURCE: 747 break; 748 default: 749 error = EOPNOTSUPP; 750 goto fail; 751 } 752 /* 753 * Verify that the header length is valid. 754 * All messages from userland start with a struct rt_msghdr. 755 */ 756 if (rtm->rtm_hdrlen == 0) /* old client */ 757 rtm->rtm_hdrlen = sizeof(struct rt_msghdr); 758 if (rtm->rtm_hdrlen < sizeof(struct rt_msghdr) || 759 len < rtm->rtm_hdrlen) { 760 error = EINVAL; 761 goto fail; 762 } 763 764 rtm->rtm_pid = curproc->p_p->ps_pid; 765 766 /* 767 * Verify that the caller has the appropriate privilege; RTM_GET 768 * is the only operation the non-superuser is allowed. 769 */ 770 if (rtm->rtm_type != RTM_GET && suser(curproc) != 0) { 771 error = EACCES; 772 goto fail; 773 } 774 tableid = rtm->rtm_tableid; 775 if (!rtable_exists(tableid)) { 776 if (rtm->rtm_type == RTM_ADD) { 777 if ((error = rtable_add(tableid)) != 0) 778 goto fail; 779 } else { 780 error = EINVAL; 781 goto fail; 782 } 783 } 784 785 /* Do not let userland play with kernel-only flags. */ 786 if ((rtm->rtm_flags & (RTF_LOCAL|RTF_BROADCAST)) != 0) { 787 error = EINVAL; 788 goto fail; 789 } 790 791 /* make sure that kernel-only bits are not set */ 792 rtm->rtm_priority &= RTP_MASK; 793 rtm->rtm_flags &= ~(RTF_DONE|RTF_CLONED|RTF_CACHED); 794 rtm->rtm_fmask &= RTF_FMASK; 795 796 if (rtm->rtm_priority != 0) { 797 if (rtm->rtm_priority > RTP_MAX || 798 rtm->rtm_priority == RTP_LOCAL) { 799 error = EINVAL; 800 goto fail; 801 } 802 prio = rtm->rtm_priority; 803 } else if (rtm->rtm_type != RTM_ADD) 804 prio = RTP_ANY; 805 else if (rtm->rtm_flags & RTF_STATIC) 806 prio = 0; 807 else 808 prio = RTP_DEFAULT; 809 810 bzero(&info, sizeof(info)); 811 info.rti_addrs = rtm->rtm_addrs; 812 if ((error = rtm_xaddrs(rtm->rtm_hdrlen + (caddr_t)rtm, 813 len + (caddr_t)rtm, &info)) != 0) 814 goto fail; 815 816 info.rti_flags = rtm->rtm_flags; 817 818 if (rtm->rtm_type != RTM_SOURCE && 819 rtm->rtm_type != RTM_PROPOSAL && 820 (info.rti_info[RTAX_DST] == NULL || 821 info.rti_info[RTAX_DST]->sa_family >= AF_MAX || 822 (info.rti_info[RTAX_GATEWAY] != NULL && 823 info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX) || 824 info.rti_info[RTAX_GENMASK] != NULL)) { 825 error = EINVAL; 826 goto fail; 827 } 828 #ifdef MPLS 829 info.rti_mpls = rtm->rtm_mpls; 830 #endif 831 832 if (info.rti_info[RTAX_GATEWAY] != NULL && 833 info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && 834 (info.rti_flags & RTF_CLONING) == 0) { 835 info.rti_flags |= RTF_LLINFO; 836 } 837 838 /* 839 * Validate RTM_PROPOSAL and pass it along or error out. 840 */ 841 if (rtm->rtm_type == RTM_PROPOSAL) { 842 if (rtm_validate_proposal(&info) == -1) { 843 error = EINVAL; 844 goto fail; 845 } 846 /* 847 * If this is a solicitation proposal forward request to 848 * all interfaces. Most handlers will ignore it but at least 849 * umb(4) will send a response to this event. 850 */ 851 if (rtm->rtm_priority == RTP_PROPOSAL_SOLICIT) { 852 NET_LOCK(); 853 TAILQ_FOREACH(ifp, &ifnet, if_list) { 854 ifp->if_rtrequest(ifp, RTM_PROPOSAL, NULL); 855 } 856 NET_UNLOCK(); 857 } 858 } else if (rtm->rtm_type == RTM_SOURCE) { 859 if (info.rti_info[RTAX_IFA] == NULL) { 860 error = EINVAL; 861 goto fail; 862 } 863 if ((error = 864 rt_setsource(tableid, info.rti_info[RTAX_IFA])) != 0) 865 goto fail; 866 } else { 867 error = rtm_output(rtm, &rt, &info, prio, tableid); 868 if (!error) { 869 type = rtm->rtm_type; 870 seq = rtm->rtm_seq; 871 free(rtm, M_RTABLE, len); 872 rtm = rtm_report(rt, type, seq, tableid); 873 len = rtm->rtm_msglen; 874 } 875 } 876 877 rtfree(rt); 878 if (error) { 879 rtm->rtm_errno = error; 880 } else { 881 rtm->rtm_flags |= RTF_DONE; 882 } 883 884 /* 885 * Check to see if we don't want our own messages. 886 */ 887 if (!useloopback) { 888 if (rtptable.rtp_count == 0) { 889 /* no other listener and no loopback of messages */ 890 goto fail; 891 } 892 } 893 if (m_copyback(m, 0, len, rtm, M_NOWAIT)) { 894 m_freem(m); 895 m = NULL; 896 } else if (m->m_pkthdr.len > len) 897 m_adj(m, len - m->m_pkthdr.len); 898 free(rtm, M_RTABLE, len); 899 if (m) 900 route_input(m, so, info.rti_info[RTAX_DST] ? 901 info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC); 902 solock(so); 903 904 return (error); 905 fail: 906 free(rtm, M_RTABLE, len); 907 m_freem(m); 908 solock(so); 909 910 return (error); 911 } 912 913 int 914 rtm_output(struct rt_msghdr *rtm, struct rtentry **prt, 915 struct rt_addrinfo *info, uint8_t prio, unsigned int tableid) 916 { 917 struct rtentry *rt = *prt; 918 struct ifnet *ifp = NULL; 919 int plen, newgate = 0, error = 0; 920 921 switch (rtm->rtm_type) { 922 case RTM_ADD: 923 if (info->rti_info[RTAX_GATEWAY] == NULL) { 924 error = EINVAL; 925 break; 926 } 927 928 rt = rtable_match(tableid, info->rti_info[RTAX_DST], NULL); 929 if ((error = route_arp_conflict(rt, info))) { 930 rtfree(rt); 931 rt = NULL; 932 break; 933 } 934 935 /* 936 * We cannot go through a delete/create/insert cycle for 937 * cached route because this can lead to races in the 938 * receive path. Instead we update the L2 cache. 939 */ 940 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_CACHED)) { 941 ifp = if_get(rt->rt_ifidx); 942 if (ifp == NULL) { 943 rtfree(rt); 944 rt = NULL; 945 error = ESRCH; 946 break; 947 } 948 949 goto change; 950 } 951 952 rtfree(rt); 953 rt = NULL; 954 955 NET_LOCK(); 956 if ((error = rtm_getifa(info, tableid)) != 0) { 957 NET_UNLOCK(); 958 break; 959 } 960 error = rtrequest(RTM_ADD, info, prio, &rt, tableid); 961 NET_UNLOCK(); 962 if (error == 0) 963 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, 964 &rt->rt_rmx); 965 break; 966 case RTM_DELETE: 967 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 968 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 969 prio); 970 if (rt == NULL) { 971 error = ESRCH; 972 break; 973 } 974 975 /* 976 * If we got multipath routes, we require users to specify 977 * a matching gateway. 978 */ 979 if (ISSET(rt->rt_flags, RTF_MPATH) && 980 info->rti_info[RTAX_GATEWAY] == NULL) { 981 error = ESRCH; 982 break; 983 } 984 985 ifp = if_get(rt->rt_ifidx); 986 if (ifp == NULL) { 987 rtfree(rt); 988 rt = NULL; 989 error = ESRCH; 990 break; 991 } 992 993 /* 994 * Invalidate the cache of automagically created and 995 * referenced L2 entries to make sure that ``rt_gwroute'' 996 * pointer stays valid for other CPUs. 997 */ 998 if ((ISSET(rt->rt_flags, RTF_CACHED))) { 999 NET_LOCK(); 1000 ifp->if_rtrequest(ifp, RTM_INVALIDATE, rt); 1001 /* Reset the MTU of the gateway route. */ 1002 rtable_walk(tableid, rt_key(rt)->sa_family, NULL, 1003 route_cleargateway, rt); 1004 NET_UNLOCK(); 1005 break; 1006 } 1007 1008 /* 1009 * Make sure that local routes are only modified by the 1010 * kernel. 1011 */ 1012 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 1013 error = EINVAL; 1014 break; 1015 } 1016 1017 rtfree(rt); 1018 rt = NULL; 1019 1020 NET_LOCK(); 1021 error = rtrequest_delete(info, prio, ifp, &rt, tableid); 1022 NET_UNLOCK(); 1023 break; 1024 case RTM_CHANGE: 1025 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1026 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1027 prio); 1028 /* 1029 * If we got multipath routes, we require users to specify 1030 * a matching gateway. 1031 */ 1032 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH) && 1033 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1034 rtfree(rt); 1035 rt = NULL; 1036 } 1037 1038 /* 1039 * If RTAX_GATEWAY is the argument we're trying to 1040 * change, try to find a compatible route. 1041 */ 1042 if ((rt == NULL) && (info->rti_info[RTAX_GATEWAY] != NULL)) { 1043 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1044 info->rti_info[RTAX_NETMASK], NULL, prio); 1045 /* Ensure we don't pick a multipath one. */ 1046 if ((rt != NULL) && ISSET(rt->rt_flags, RTF_MPATH)) { 1047 rtfree(rt); 1048 rt = NULL; 1049 } 1050 } 1051 1052 if (rt == NULL) { 1053 error = ESRCH; 1054 break; 1055 } 1056 1057 /* 1058 * Make sure that local routes are only modified by the 1059 * kernel. 1060 */ 1061 if (ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { 1062 error = EINVAL; 1063 break; 1064 } 1065 1066 ifp = if_get(rt->rt_ifidx); 1067 if (ifp == NULL) { 1068 rtfree(rt); 1069 rt = NULL; 1070 error = ESRCH; 1071 break; 1072 } 1073 1074 /* 1075 * RTM_CHANGE needs a perfect match. 1076 */ 1077 plen = rtable_satoplen(info->rti_info[RTAX_DST]->sa_family, 1078 info->rti_info[RTAX_NETMASK]); 1079 if (rt_plen(rt) != plen) { 1080 error = ESRCH; 1081 break; 1082 } 1083 1084 if (info->rti_info[RTAX_GATEWAY] != NULL) 1085 if (rt->rt_gateway == NULL || 1086 bcmp(rt->rt_gateway, 1087 info->rti_info[RTAX_GATEWAY], 1088 info->rti_info[RTAX_GATEWAY]->sa_len)) { 1089 newgate = 1; 1090 } 1091 /* 1092 * Check reachable gateway before changing the route. 1093 * New gateway could require new ifaddr, ifp; 1094 * flags may also be different; ifp may be specified 1095 * by ll sockaddr when protocol address is ambiguous. 1096 */ 1097 if (newgate || info->rti_info[RTAX_IFP] != NULL || 1098 info->rti_info[RTAX_IFA] != NULL) { 1099 struct ifaddr *ifa = NULL; 1100 1101 NET_LOCK(); 1102 if ((error = rtm_getifa(info, tableid)) != 0) { 1103 NET_UNLOCK(); 1104 break; 1105 } 1106 ifa = info->rti_ifa; 1107 if (rt->rt_ifa != ifa) { 1108 ifp->if_rtrequest(ifp, RTM_DELETE, rt); 1109 ifafree(rt->rt_ifa); 1110 1111 ifa->ifa_refcnt++; 1112 rt->rt_ifa = ifa; 1113 rt->rt_ifidx = ifa->ifa_ifp->if_index; 1114 /* recheck link state after ifp change */ 1115 rt_if_linkstate_change(rt, ifa->ifa_ifp, 1116 tableid); 1117 } 1118 NET_UNLOCK(); 1119 } 1120 change: 1121 if (info->rti_info[RTAX_GATEWAY] != NULL) { 1122 /* When updating the gateway, make sure it is valid. */ 1123 if (!newgate && rt->rt_gateway->sa_family != 1124 info->rti_info[RTAX_GATEWAY]->sa_family) { 1125 error = EINVAL; 1126 break; 1127 } 1128 1129 NET_LOCK(); 1130 error = rt_setgate(rt, 1131 info->rti_info[RTAX_GATEWAY], tableid); 1132 NET_UNLOCK(); 1133 if (error) 1134 break; 1135 } 1136 #ifdef MPLS 1137 if (rtm->rtm_flags & RTF_MPLS) { 1138 NET_LOCK(); 1139 error = rt_mpls_set(rt, 1140 info->rti_info[RTAX_SRC], info->rti_mpls); 1141 NET_UNLOCK(); 1142 if (error) 1143 break; 1144 } else if (newgate || (rtm->rtm_fmask & RTF_MPLS)) { 1145 NET_LOCK(); 1146 /* if gateway changed remove MPLS information */ 1147 rt_mpls_clear(rt); 1148 NET_UNLOCK(); 1149 } 1150 #endif 1151 1152 #ifdef BFD 1153 if (ISSET(rtm->rtm_flags, RTF_BFD)) { 1154 KERNEL_LOCK(); 1155 error = bfdset(rt); 1156 KERNEL_UNLOCK(); 1157 if (error) 1158 break; 1159 } else if (!ISSET(rtm->rtm_flags, RTF_BFD) && 1160 ISSET(rtm->rtm_fmask, RTF_BFD)) { 1161 KERNEL_LOCK(); 1162 bfdclear(rt); 1163 KERNEL_UNLOCK(); 1164 } 1165 #endif 1166 1167 NET_LOCK(); 1168 /* Hack to allow some flags to be toggled */ 1169 if (rtm->rtm_fmask) { 1170 /* MPLS flag it is set by rt_mpls_set() */ 1171 rtm->rtm_fmask &= ~RTF_MPLS; 1172 rtm->rtm_flags &= ~RTF_MPLS; 1173 rt->rt_flags = 1174 (rt->rt_flags & ~rtm->rtm_fmask) | 1175 (rtm->rtm_flags & rtm->rtm_fmask); 1176 } 1177 rtm_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); 1178 1179 ifp->if_rtrequest(ifp, RTM_ADD, rt); 1180 1181 if (info->rti_info[RTAX_LABEL] != NULL) { 1182 char *rtlabel = ((struct sockaddr_rtlabel *) 1183 info->rti_info[RTAX_LABEL])->sr_label; 1184 rtlabel_unref(rt->rt_labelid); 1185 rt->rt_labelid = rtlabel_name2id(rtlabel); 1186 } 1187 if_group_routechange(info->rti_info[RTAX_DST], 1188 info->rti_info[RTAX_NETMASK]); 1189 rt->rt_locks &= ~(rtm->rtm_inits); 1190 rt->rt_locks |= (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); 1191 NET_UNLOCK(); 1192 break; 1193 case RTM_GET: 1194 rt = rtable_lookup(tableid, info->rti_info[RTAX_DST], 1195 info->rti_info[RTAX_NETMASK], info->rti_info[RTAX_GATEWAY], 1196 prio); 1197 if (rt == NULL) 1198 error = ESRCH; 1199 break; 1200 } 1201 1202 if_put(ifp); 1203 *prt = rt; 1204 return (error); 1205 } 1206 1207 struct ifaddr * 1208 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway, 1209 unsigned int rtableid) 1210 { 1211 struct ifaddr *ifa; 1212 1213 if ((flags & RTF_GATEWAY) == 0) { 1214 /* 1215 * If we are adding a route to an interface, 1216 * and the interface is a pt to pt link 1217 * we should search for the destination 1218 * as our clue to the interface. Otherwise 1219 * we can use the local address. 1220 */ 1221 ifa = NULL; 1222 if (flags & RTF_HOST) 1223 ifa = ifa_ifwithdstaddr(dst, rtableid); 1224 if (ifa == NULL) 1225 ifa = ifa_ifwithaddr(gateway, rtableid); 1226 } else { 1227 /* 1228 * If we are adding a route to a remote net 1229 * or host, the gateway may still be on the 1230 * other end of a pt to pt link. 1231 */ 1232 ifa = ifa_ifwithdstaddr(gateway, rtableid); 1233 } 1234 if (ifa == NULL) { 1235 if (gateway->sa_family == AF_LINK) { 1236 struct sockaddr_dl *sdl = satosdl(gateway); 1237 struct ifnet *ifp = if_get(sdl->sdl_index); 1238 1239 if (ifp != NULL) 1240 ifa = ifaof_ifpforaddr(dst, ifp); 1241 if_put(ifp); 1242 } else { 1243 struct rtentry *rt; 1244 1245 rt = rtalloc(gateway, RT_RESOLVE, rtable_l2(rtableid)); 1246 if (rt != NULL) 1247 ifa = rt->rt_ifa; 1248 rtfree(rt); 1249 } 1250 } 1251 if (ifa == NULL) 1252 return (NULL); 1253 if (ifa->ifa_addr->sa_family != dst->sa_family) { 1254 struct ifaddr *oifa = ifa; 1255 ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 1256 if (ifa == NULL) 1257 ifa = oifa; 1258 } 1259 return (ifa); 1260 } 1261 1262 int 1263 rtm_getifa(struct rt_addrinfo *info, unsigned int rtid) 1264 { 1265 struct ifnet *ifp = NULL; 1266 1267 /* 1268 * The "returned" `ifa' is guaranteed to be alive only if 1269 * the NET_LOCK() is held. 1270 */ 1271 NET_ASSERT_LOCKED(); 1272 1273 /* 1274 * ifp may be specified by sockaddr_dl when protocol address 1275 * is ambiguous 1276 */ 1277 if (info->rti_info[RTAX_IFP] != NULL) { 1278 struct sockaddr_dl *sdl; 1279 1280 sdl = satosdl(info->rti_info[RTAX_IFP]); 1281 ifp = if_get(sdl->sdl_index); 1282 } 1283 1284 #ifdef IPSEC 1285 /* 1286 * If the destination is a PF_KEY address, we'll look 1287 * for the existence of a encap interface number or address 1288 * in the options list of the gateway. By default, we'll return 1289 * enc0. 1290 */ 1291 if (info->rti_info[RTAX_DST] && 1292 info->rti_info[RTAX_DST]->sa_family == PF_KEY) 1293 info->rti_ifa = enc_getifa(rtid, 0); 1294 #endif 1295 1296 if (info->rti_ifa == NULL && info->rti_info[RTAX_IFA] != NULL) 1297 info->rti_ifa = ifa_ifwithaddr(info->rti_info[RTAX_IFA], rtid); 1298 1299 if (info->rti_ifa == NULL) { 1300 struct sockaddr *sa; 1301 1302 if ((sa = info->rti_info[RTAX_IFA]) == NULL) 1303 if ((sa = info->rti_info[RTAX_GATEWAY]) == NULL) 1304 sa = info->rti_info[RTAX_DST]; 1305 1306 if (sa != NULL && ifp != NULL) 1307 info->rti_ifa = ifaof_ifpforaddr(sa, ifp); 1308 else if (info->rti_info[RTAX_DST] != NULL && 1309 info->rti_info[RTAX_GATEWAY] != NULL) 1310 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1311 info->rti_info[RTAX_DST], 1312 info->rti_info[RTAX_GATEWAY], 1313 rtid); 1314 else if (sa != NULL) 1315 info->rti_ifa = ifa_ifwithroute(info->rti_flags, 1316 sa, sa, rtid); 1317 } 1318 1319 if_put(ifp); 1320 1321 if (info->rti_ifa == NULL) 1322 return (ENETUNREACH); 1323 1324 return (0); 1325 } 1326 1327 int 1328 route_cleargateway(struct rtentry *rt, void *arg, unsigned int rtableid) 1329 { 1330 struct rtentry *nhrt = arg; 1331 1332 if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt && 1333 !ISSET(rt->rt_locks, RTV_MTU)) 1334 rt->rt_mtu = 0; 1335 1336 return (0); 1337 } 1338 1339 /* 1340 * Check if the user request to insert an ARP entry does not conflict 1341 * with existing ones. 1342 * 1343 * Only two entries are allowed for a given IP address: a private one 1344 * (priv) and a public one (pub). 1345 */ 1346 int 1347 route_arp_conflict(struct rtentry *rt, struct rt_addrinfo *info) 1348 { 1349 int proxy = (info->rti_flags & RTF_ANNOUNCE); 1350 1351 if ((info->rti_flags & RTF_LLINFO) == 0 || 1352 (info->rti_info[RTAX_DST]->sa_family != AF_INET)) 1353 return (0); 1354 1355 if (rt == NULL || !ISSET(rt->rt_flags, RTF_LLINFO)) 1356 return (0); 1357 1358 /* If the entry is cached, it can be updated. */ 1359 if (ISSET(rt->rt_flags, RTF_CACHED)) 1360 return (0); 1361 1362 /* 1363 * Same destination, not cached and both "priv" or "pub" conflict. 1364 * If a second entry exists, it always conflict. 1365 */ 1366 if ((ISSET(rt->rt_flags, RTF_ANNOUNCE) == proxy) || 1367 ISSET(rt->rt_flags, RTF_MPATH)) 1368 return (EEXIST); 1369 1370 /* No conflict but an entry exist so we need to force mpath. */ 1371 info->rti_flags |= RTF_MPATH; 1372 return (0); 1373 } 1374 1375 void 1376 rtm_setmetrics(u_long which, const struct rt_metrics *in, 1377 struct rt_kmetrics *out) 1378 { 1379 int64_t expire; 1380 1381 if (which & RTV_MTU) 1382 out->rmx_mtu = in->rmx_mtu; 1383 if (which & RTV_EXPIRE) { 1384 expire = in->rmx_expire; 1385 if (expire != 0) { 1386 expire -= gettime(); 1387 expire += getuptime(); 1388 } 1389 1390 out->rmx_expire = expire; 1391 } 1392 } 1393 1394 void 1395 rtm_getmetrics(const struct rt_kmetrics *in, struct rt_metrics *out) 1396 { 1397 int64_t expire; 1398 1399 expire = in->rmx_expire; 1400 if (expire != 0) { 1401 expire -= getuptime(); 1402 expire += gettime(); 1403 } 1404 1405 bzero(out, sizeof(*out)); 1406 out->rmx_locks = in->rmx_locks; 1407 out->rmx_mtu = in->rmx_mtu; 1408 out->rmx_expire = expire; 1409 out->rmx_pksent = in->rmx_pksent; 1410 } 1411 1412 #define ROUNDUP(a) \ 1413 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) 1414 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len)) 1415 1416 int 1417 rtm_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) 1418 { 1419 struct sockaddr *sa; 1420 int i; 1421 1422 /* 1423 * Parse address bits, split address storage in chunks, and 1424 * set info pointers. Use sa_len for traversing the memory 1425 * and check that we stay within in the limit. 1426 */ 1427 bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); 1428 for (i = 0; i < sizeof(rtinfo->rti_addrs) * 8; i++) { 1429 if ((rtinfo->rti_addrs & (1 << i)) == 0) 1430 continue; 1431 if (i >= RTAX_MAX || cp + sizeof(socklen_t) > cplim) 1432 return (EINVAL); 1433 sa = (struct sockaddr *)cp; 1434 if (cp + sa->sa_len > cplim) 1435 return (EINVAL); 1436 rtinfo->rti_info[i] = sa; 1437 ADVANCE(cp, sa); 1438 } 1439 /* 1440 * Check that the address family is suitable for the route address 1441 * type. Check that each address has a size that fits its family 1442 * and its length is within the size. Strings within addresses must 1443 * be NUL terminated. 1444 */ 1445 for (i = 0; i < RTAX_MAX; i++) { 1446 size_t len, maxlen, size; 1447 1448 sa = rtinfo->rti_info[i]; 1449 if (sa == NULL) 1450 continue; 1451 maxlen = size = 0; 1452 switch (i) { 1453 case RTAX_DST: 1454 case RTAX_GATEWAY: 1455 case RTAX_SRC: 1456 switch (sa->sa_family) { 1457 case AF_INET: 1458 size = sizeof(struct sockaddr_in); 1459 break; 1460 case AF_LINK: 1461 size = sizeof(struct sockaddr_dl); 1462 break; 1463 #ifdef INET6 1464 case AF_INET6: 1465 size = sizeof(struct sockaddr_in6); 1466 break; 1467 #endif 1468 #ifdef MPLS 1469 case AF_MPLS: 1470 size = sizeof(struct sockaddr_mpls); 1471 break; 1472 #endif 1473 } 1474 break; 1475 case RTAX_IFP: 1476 if (sa->sa_family != AF_LINK) 1477 return (EAFNOSUPPORT); 1478 /* 1479 * XXX Should be sizeof(struct sockaddr_dl), but 1480 * route(8) has a bug and provides less memory. 1481 * arp(8) has another bug and uses sizeof pointer. 1482 */ 1483 size = 4; 1484 break; 1485 case RTAX_IFA: 1486 switch (sa->sa_family) { 1487 case AF_INET: 1488 size = sizeof(struct sockaddr_in); 1489 break; 1490 #ifdef INET6 1491 case AF_INET6: 1492 size = sizeof(struct sockaddr_in6); 1493 break; 1494 #endif 1495 default: 1496 return (EAFNOSUPPORT); 1497 } 1498 break; 1499 case RTAX_LABEL: 1500 sa->sa_family = AF_UNSPEC; 1501 maxlen = RTLABEL_LEN; 1502 size = sizeof(struct sockaddr_rtlabel); 1503 break; 1504 #ifdef BFD 1505 case RTAX_BFD: 1506 sa->sa_family = AF_UNSPEC; 1507 size = sizeof(struct sockaddr_bfd); 1508 break; 1509 #endif 1510 case RTAX_DNS: 1511 /* more validation in rtm_validate_proposal */ 1512 if (sa->sa_len > sizeof(struct sockaddr_rtdns)) 1513 return (EINVAL); 1514 if (sa->sa_len < offsetof(struct sockaddr_rtdns, 1515 sr_dns)) 1516 return (EINVAL); 1517 switch (sa->sa_family) { 1518 case AF_INET: 1519 #ifdef INET6 1520 case AF_INET6: 1521 #endif 1522 break; 1523 default: 1524 return (EAFNOSUPPORT); 1525 } 1526 break; 1527 case RTAX_STATIC: 1528 sa->sa_family = AF_UNSPEC; 1529 maxlen = RTSTATIC_LEN; 1530 size = sizeof(struct sockaddr_rtstatic); 1531 break; 1532 case RTAX_SEARCH: 1533 sa->sa_family = AF_UNSPEC; 1534 maxlen = RTSEARCH_LEN; 1535 size = sizeof(struct sockaddr_rtsearch); 1536 break; 1537 } 1538 if (size) { 1539 /* memory for the full struct must be provided */ 1540 if (sa->sa_len < size) 1541 return (EINVAL); 1542 } 1543 if (maxlen) { 1544 /* this should not happen */ 1545 if (2 + maxlen > size) 1546 return (EINVAL); 1547 /* strings must be NUL terminated within the struct */ 1548 len = strnlen(sa->sa_data, maxlen); 1549 if (len >= maxlen || 2 + len >= sa->sa_len) 1550 return (EINVAL); 1551 break; 1552 } 1553 } 1554 return (0); 1555 } 1556 1557 struct mbuf * 1558 rtm_msg1(int type, struct rt_addrinfo *rtinfo) 1559 { 1560 struct rt_msghdr *rtm; 1561 struct mbuf *m; 1562 int i; 1563 struct sockaddr *sa; 1564 int len, dlen, hlen; 1565 1566 switch (type) { 1567 case RTM_DELADDR: 1568 case RTM_NEWADDR: 1569 len = sizeof(struct ifa_msghdr); 1570 break; 1571 case RTM_IFINFO: 1572 len = sizeof(struct if_msghdr); 1573 break; 1574 case RTM_IFANNOUNCE: 1575 len = sizeof(struct if_announcemsghdr); 1576 break; 1577 #ifdef BFD 1578 case RTM_BFD: 1579 len = sizeof(struct bfd_msghdr); 1580 break; 1581 #endif 1582 case RTM_80211INFO: 1583 len = sizeof(struct if_ieee80211_msghdr); 1584 break; 1585 default: 1586 len = sizeof(struct rt_msghdr); 1587 break; 1588 } 1589 if (len > MCLBYTES) 1590 panic("rtm_msg1"); 1591 m = m_gethdr(M_DONTWAIT, MT_DATA); 1592 if (m && len > MHLEN) { 1593 MCLGET(m, M_DONTWAIT); 1594 if ((m->m_flags & M_EXT) == 0) { 1595 m_free(m); 1596 m = NULL; 1597 } 1598 } 1599 if (m == NULL) 1600 return (m); 1601 m->m_pkthdr.len = m->m_len = hlen = len; 1602 m->m_pkthdr.ph_ifidx = 0; 1603 rtm = mtod(m, struct rt_msghdr *); 1604 bzero(rtm, len); 1605 for (i = 0; i < RTAX_MAX; i++) { 1606 if (rtinfo == NULL || (sa = rtinfo->rti_info[i]) == NULL) 1607 continue; 1608 rtinfo->rti_addrs |= (1 << i); 1609 dlen = ROUNDUP(sa->sa_len); 1610 if (m_copyback(m, len, dlen, sa, M_NOWAIT)) { 1611 m_freem(m); 1612 return (NULL); 1613 } 1614 len += dlen; 1615 } 1616 rtm->rtm_msglen = len; 1617 rtm->rtm_hdrlen = hlen; 1618 rtm->rtm_version = RTM_VERSION; 1619 rtm->rtm_type = type; 1620 return (m); 1621 } 1622 1623 int 1624 rtm_msg2(int type, int vers, struct rt_addrinfo *rtinfo, caddr_t cp, 1625 struct walkarg *w) 1626 { 1627 int i; 1628 int len, dlen, hlen, second_time = 0; 1629 caddr_t cp0; 1630 1631 rtinfo->rti_addrs = 0; 1632 again: 1633 switch (type) { 1634 case RTM_DELADDR: 1635 case RTM_NEWADDR: 1636 len = sizeof(struct ifa_msghdr); 1637 break; 1638 case RTM_IFINFO: 1639 len = sizeof(struct if_msghdr); 1640 break; 1641 default: 1642 len = sizeof(struct rt_msghdr); 1643 break; 1644 } 1645 hlen = len; 1646 if ((cp0 = cp) != NULL) 1647 cp += len; 1648 for (i = 0; i < RTAX_MAX; i++) { 1649 struct sockaddr *sa; 1650 1651 if ((sa = rtinfo->rti_info[i]) == NULL) 1652 continue; 1653 rtinfo->rti_addrs |= (1 << i); 1654 dlen = ROUNDUP(sa->sa_len); 1655 if (cp) { 1656 bcopy(sa, cp, (size_t)dlen); 1657 cp += dlen; 1658 } 1659 len += dlen; 1660 } 1661 /* align message length to the next natural boundary */ 1662 len = ALIGN(len); 1663 if (cp == 0 && w != NULL && !second_time) { 1664 w->w_needed += len; 1665 if (w->w_needed <= 0 && w->w_where) { 1666 if (w->w_tmemsize < len) { 1667 free(w->w_tmem, M_RTABLE, w->w_tmemsize); 1668 w->w_tmem = malloc(len, M_RTABLE, 1669 M_NOWAIT | M_ZERO); 1670 if (w->w_tmem) 1671 w->w_tmemsize = len; 1672 } 1673 if (w->w_tmem) { 1674 cp = w->w_tmem; 1675 second_time = 1; 1676 goto again; 1677 } else 1678 w->w_where = 0; 1679 } 1680 } 1681 if (cp && w) /* clear the message header */ 1682 bzero(cp0, hlen); 1683 1684 if (cp) { 1685 struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; 1686 1687 rtm->rtm_version = RTM_VERSION; 1688 rtm->rtm_type = type; 1689 rtm->rtm_msglen = len; 1690 rtm->rtm_hdrlen = hlen; 1691 } 1692 return (len); 1693 } 1694 1695 void 1696 rtm_send(struct rtentry *rt, int cmd, int error, unsigned int rtableid) 1697 { 1698 struct rt_addrinfo info; 1699 struct ifnet *ifp; 1700 struct sockaddr_rtlabel sa_rl; 1701 struct sockaddr_in6 sa_mask; 1702 1703 memset(&info, 0, sizeof(info)); 1704 info.rti_info[RTAX_DST] = rt_key(rt); 1705 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1706 if (!ISSET(rt->rt_flags, RTF_HOST)) 1707 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1708 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1709 ifp = if_get(rt->rt_ifidx); 1710 if (ifp != NULL) { 1711 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1712 info.rti_info[RTAX_IFA] = 1713 rtable_getsource(rtableid, info.rti_info[RTAX_DST]->sa_family); 1714 if (info.rti_info[RTAX_IFA] == NULL) 1715 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1716 } 1717 1718 rtm_miss(cmd, &info, rt->rt_flags, rt->rt_priority, rt->rt_ifidx, error, 1719 rtableid); 1720 if_put(ifp); 1721 } 1722 1723 /* 1724 * This routine is called to generate a message from the routing 1725 * socket indicating that a redirect has occurred, a routing lookup 1726 * has failed, or that a protocol has detected timeouts to a particular 1727 * destination. 1728 */ 1729 void 1730 rtm_miss(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio, 1731 u_int ifidx, int error, u_int tableid) 1732 { 1733 struct rt_msghdr *rtm; 1734 struct mbuf *m; 1735 struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; 1736 1737 if (rtptable.rtp_count == 0) 1738 return; 1739 m = rtm_msg1(type, rtinfo); 1740 if (m == NULL) 1741 return; 1742 rtm = mtod(m, struct rt_msghdr *); 1743 rtm->rtm_flags = RTF_DONE | flags; 1744 rtm->rtm_priority = prio; 1745 rtm->rtm_errno = error; 1746 rtm->rtm_tableid = tableid; 1747 rtm->rtm_addrs = rtinfo->rti_addrs; 1748 rtm->rtm_index = ifidx; 1749 route_input(m, NULL, sa ? sa->sa_family : AF_UNSPEC); 1750 } 1751 1752 /* 1753 * This routine is called to generate a message from the routing 1754 * socket indicating that the status of a network interface has changed. 1755 */ 1756 void 1757 rtm_ifchg(struct ifnet *ifp) 1758 { 1759 struct rt_addrinfo info; 1760 struct if_msghdr *ifm; 1761 struct mbuf *m; 1762 1763 if (rtptable.rtp_count == 0) 1764 return; 1765 memset(&info, 0, sizeof(info)); 1766 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1767 m = rtm_msg1(RTM_IFINFO, &info); 1768 if (m == NULL) 1769 return; 1770 ifm = mtod(m, struct if_msghdr *); 1771 ifm->ifm_index = ifp->if_index; 1772 ifm->ifm_tableid = ifp->if_rdomain; 1773 ifm->ifm_flags = ifp->if_flags; 1774 ifm->ifm_xflags = ifp->if_xflags; 1775 if_getdata(ifp, &ifm->ifm_data); 1776 ifm->ifm_addrs = info.rti_addrs; 1777 route_input(m, NULL, AF_UNSPEC); 1778 } 1779 1780 /* 1781 * This is called to generate messages from the routing socket 1782 * indicating a network interface has had addresses associated with it. 1783 * if we ever reverse the logic and replace messages TO the routing 1784 * socket indicate a request to configure interfaces, then it will 1785 * be unnecessary as the routing socket will automatically generate 1786 * copies of it. 1787 */ 1788 void 1789 rtm_addr(int cmd, struct ifaddr *ifa) 1790 { 1791 struct ifnet *ifp = ifa->ifa_ifp; 1792 struct mbuf *m; 1793 struct rt_addrinfo info; 1794 struct ifa_msghdr *ifam; 1795 1796 if (rtptable.rtp_count == 0) 1797 return; 1798 1799 memset(&info, 0, sizeof(info)); 1800 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 1801 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1802 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 1803 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 1804 if ((m = rtm_msg1(cmd, &info)) == NULL) 1805 return; 1806 ifam = mtod(m, struct ifa_msghdr *); 1807 ifam->ifam_index = ifp->if_index; 1808 ifam->ifam_metric = ifa->ifa_metric; 1809 ifam->ifam_flags = ifa->ifa_flags; 1810 ifam->ifam_addrs = info.rti_addrs; 1811 ifam->ifam_tableid = ifp->if_rdomain; 1812 1813 route_input(m, NULL, 1814 ifa->ifa_addr ? ifa->ifa_addr->sa_family : AF_UNSPEC); 1815 } 1816 1817 /* 1818 * This is called to generate routing socket messages indicating 1819 * network interface arrival and departure. 1820 */ 1821 void 1822 rtm_ifannounce(struct ifnet *ifp, int what) 1823 { 1824 struct if_announcemsghdr *ifan; 1825 struct mbuf *m; 1826 1827 if (rtptable.rtp_count == 0) 1828 return; 1829 m = rtm_msg1(RTM_IFANNOUNCE, NULL); 1830 if (m == NULL) 1831 return; 1832 ifan = mtod(m, struct if_announcemsghdr *); 1833 ifan->ifan_index = ifp->if_index; 1834 strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); 1835 ifan->ifan_what = what; 1836 route_input(m, NULL, AF_UNSPEC); 1837 } 1838 1839 #ifdef BFD 1840 /* 1841 * This is used to generate routing socket messages indicating 1842 * the state of a BFD session. 1843 */ 1844 void 1845 rtm_bfd(struct bfd_config *bfd) 1846 { 1847 struct bfd_msghdr *bfdm; 1848 struct sockaddr_bfd sa_bfd; 1849 struct mbuf *m; 1850 struct rt_addrinfo info; 1851 1852 if (rtptable.rtp_count == 0) 1853 return; 1854 memset(&info, 0, sizeof(info)); 1855 info.rti_info[RTAX_DST] = rt_key(bfd->bc_rt); 1856 info.rti_info[RTAX_IFA] = bfd->bc_rt->rt_ifa->ifa_addr; 1857 1858 m = rtm_msg1(RTM_BFD, &info); 1859 if (m == NULL) 1860 return; 1861 bfdm = mtod(m, struct bfd_msghdr *); 1862 bfdm->bm_addrs = info.rti_addrs; 1863 1864 KERNEL_ASSERT_LOCKED(); 1865 bfd2sa(bfd->bc_rt, &sa_bfd); 1866 memcpy(&bfdm->bm_sa, &sa_bfd, sizeof(sa_bfd)); 1867 1868 route_input(m, NULL, info.rti_info[RTAX_DST]->sa_family); 1869 } 1870 #endif /* BFD */ 1871 1872 /* 1873 * This is used to generate routing socket messages indicating 1874 * the state of an ieee80211 interface. 1875 */ 1876 void 1877 rtm_80211info(struct ifnet *ifp, struct if_ieee80211_data *ifie) 1878 { 1879 struct if_ieee80211_msghdr *ifim; 1880 struct mbuf *m; 1881 1882 if (rtptable.rtp_count == 0) 1883 return; 1884 m = rtm_msg1(RTM_80211INFO, NULL); 1885 if (m == NULL) 1886 return; 1887 ifim = mtod(m, struct if_ieee80211_msghdr *); 1888 ifim->ifim_index = ifp->if_index; 1889 ifim->ifim_tableid = ifp->if_rdomain; 1890 1891 memcpy(&ifim->ifim_ifie, ifie, sizeof(ifim->ifim_ifie)); 1892 route_input(m, NULL, AF_UNSPEC); 1893 } 1894 1895 /* 1896 * This is used to generate routing socket messages indicating 1897 * the address selection proposal from an interface. 1898 */ 1899 void 1900 rtm_proposal(struct ifnet *ifp, struct rt_addrinfo *rtinfo, int flags, 1901 uint8_t prio) 1902 { 1903 struct rt_msghdr *rtm; 1904 struct mbuf *m; 1905 1906 m = rtm_msg1(RTM_PROPOSAL, rtinfo); 1907 if (m == NULL) 1908 return; 1909 rtm = mtod(m, struct rt_msghdr *); 1910 rtm->rtm_flags = RTF_DONE | flags; 1911 rtm->rtm_priority = prio; 1912 rtm->rtm_tableid = ifp->if_rdomain; 1913 rtm->rtm_index = ifp->if_index; 1914 rtm->rtm_addrs = rtinfo->rti_addrs; 1915 1916 route_input(m, NULL, rtinfo->rti_info[RTAX_DNS]->sa_family); 1917 } 1918 1919 /* 1920 * This is used in dumping the kernel table via sysctl(). 1921 */ 1922 int 1923 sysctl_dumpentry(struct rtentry *rt, void *v, unsigned int id) 1924 { 1925 struct walkarg *w = v; 1926 int error = 0, size; 1927 struct rt_addrinfo info; 1928 struct ifnet *ifp; 1929 #ifdef BFD 1930 struct sockaddr_bfd sa_bfd; 1931 #endif 1932 struct sockaddr_rtlabel sa_rl; 1933 struct sockaddr_in6 sa_mask; 1934 1935 if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) 1936 return 0; 1937 if (w->w_op == NET_RT_DUMP && w->w_arg) { 1938 u_int8_t prio = w->w_arg & RTP_MASK; 1939 if (w->w_arg < 0) { 1940 prio = (-w->w_arg) & RTP_MASK; 1941 /* Show all routes that are not this priority */ 1942 if (prio == (rt->rt_priority & RTP_MASK)) 1943 return 0; 1944 } else { 1945 if (prio != (rt->rt_priority & RTP_MASK) && 1946 prio != RTP_ANY) 1947 return 0; 1948 } 1949 } 1950 bzero(&info, sizeof(info)); 1951 info.rti_info[RTAX_DST] = rt_key(rt); 1952 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1953 info.rti_info[RTAX_NETMASK] = rt_plen2mask(rt, &sa_mask); 1954 ifp = if_get(rt->rt_ifidx); 1955 if (ifp != NULL) { 1956 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 1957 info.rti_info[RTAX_IFA] = 1958 rtable_getsource(id, info.rti_info[RTAX_DST]->sa_family); 1959 if (info.rti_info[RTAX_IFA] == NULL) 1960 info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; 1961 if (ifp->if_flags & IFF_POINTOPOINT) 1962 info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; 1963 } 1964 if_put(ifp); 1965 info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt->rt_labelid, &sa_rl); 1966 #ifdef BFD 1967 if (rt->rt_flags & RTF_BFD) { 1968 KERNEL_ASSERT_LOCKED(); 1969 info.rti_info[RTAX_BFD] = bfd2sa(rt, &sa_bfd); 1970 } 1971 #endif 1972 #ifdef MPLS 1973 if (rt->rt_flags & RTF_MPLS) { 1974 struct sockaddr_mpls sa_mpls; 1975 1976 bzero(&sa_mpls, sizeof(sa_mpls)); 1977 sa_mpls.smpls_family = AF_MPLS; 1978 sa_mpls.smpls_len = sizeof(sa_mpls); 1979 sa_mpls.smpls_label = ((struct rt_mpls *) 1980 rt->rt_llinfo)->mpls_label; 1981 info.rti_info[RTAX_SRC] = (struct sockaddr *)&sa_mpls; 1982 info.rti_mpls = ((struct rt_mpls *) 1983 rt->rt_llinfo)->mpls_operation; 1984 } 1985 #endif 1986 1987 size = rtm_msg2(RTM_GET, RTM_VERSION, &info, NULL, w); 1988 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 1989 struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; 1990 1991 rtm->rtm_pid = curproc->p_p->ps_pid; 1992 rtm->rtm_flags = rt->rt_flags; 1993 rtm->rtm_priority = rt->rt_priority & RTP_MASK; 1994 rtm_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); 1995 /* Do not account the routing table's reference. */ 1996 rtm->rtm_rmx.rmx_refcnt = rt->rt_refcnt - 1; 1997 rtm->rtm_index = rt->rt_ifidx; 1998 rtm->rtm_addrs = info.rti_addrs; 1999 rtm->rtm_tableid = id; 2000 #ifdef MPLS 2001 rtm->rtm_mpls = info.rti_mpls; 2002 #endif 2003 if ((error = copyout(rtm, w->w_where, size)) != 0) 2004 w->w_where = NULL; 2005 else 2006 w->w_where += size; 2007 } 2008 return (error); 2009 } 2010 2011 int 2012 sysctl_iflist(int af, struct walkarg *w) 2013 { 2014 struct ifnet *ifp; 2015 struct ifaddr *ifa; 2016 struct rt_addrinfo info; 2017 int len, error = 0; 2018 2019 bzero(&info, sizeof(info)); 2020 TAILQ_FOREACH(ifp, &ifnet, if_list) { 2021 if (w->w_arg && w->w_arg != ifp->if_index) 2022 continue; 2023 /* Copy the link-layer address first */ 2024 info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); 2025 len = rtm_msg2(RTM_IFINFO, RTM_VERSION, &info, 0, w); 2026 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 2027 struct if_msghdr *ifm; 2028 2029 ifm = (struct if_msghdr *)w->w_tmem; 2030 ifm->ifm_index = ifp->if_index; 2031 ifm->ifm_tableid = ifp->if_rdomain; 2032 ifm->ifm_flags = ifp->if_flags; 2033 if_getdata(ifp, &ifm->ifm_data); 2034 ifm->ifm_addrs = info.rti_addrs; 2035 error = copyout(ifm, w->w_where, len); 2036 if (error) 2037 return (error); 2038 w->w_where += len; 2039 } 2040 info.rti_info[RTAX_IFP] = NULL; 2041 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 2042 KASSERT(ifa->ifa_addr->sa_family != AF_LINK); 2043 if (af && af != ifa->ifa_addr->sa_family) 2044 continue; 2045 info.rti_info[RTAX_IFA] = ifa->ifa_addr; 2046 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 2047 info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; 2048 len = rtm_msg2(RTM_NEWADDR, RTM_VERSION, &info, 0, w); 2049 if (w->w_where && w->w_tmem && w->w_needed <= 0) { 2050 struct ifa_msghdr *ifam; 2051 2052 ifam = (struct ifa_msghdr *)w->w_tmem; 2053 ifam->ifam_index = ifa->ifa_ifp->if_index; 2054 ifam->ifam_flags = ifa->ifa_flags; 2055 ifam->ifam_metric = ifa->ifa_metric; 2056 ifam->ifam_addrs = info.rti_addrs; 2057 error = copyout(w->w_tmem, w->w_where, len); 2058 if (error) 2059 return (error); 2060 w->w_where += len; 2061 } 2062 } 2063 info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = 2064 info.rti_info[RTAX_BRD] = NULL; 2065 } 2066 return (0); 2067 } 2068 2069 int 2070 sysctl_ifnames(struct walkarg *w) 2071 { 2072 struct if_nameindex_msg ifn; 2073 struct ifnet *ifp; 2074 int error = 0; 2075 2076 /* XXX ignore tableid for now */ 2077 TAILQ_FOREACH(ifp, &ifnet, if_list) { 2078 if (w->w_arg && w->w_arg != ifp->if_index) 2079 continue; 2080 w->w_needed += sizeof(ifn); 2081 if (w->w_where && w->w_needed <= 0) { 2082 2083 memset(&ifn, 0, sizeof(ifn)); 2084 ifn.if_index = ifp->if_index; 2085 strlcpy(ifn.if_name, ifp->if_xname, 2086 sizeof(ifn.if_name)); 2087 error = copyout(&ifn, w->w_where, sizeof(ifn)); 2088 if (error) 2089 return (error); 2090 w->w_where += sizeof(ifn); 2091 } 2092 } 2093 2094 return (0); 2095 } 2096 2097 int 2098 sysctl_source(int af, u_int tableid, struct walkarg *w) 2099 { 2100 struct sockaddr *sa; 2101 int size, error = 0; 2102 2103 sa = rtable_getsource(tableid, af); 2104 if (sa) { 2105 switch (sa->sa_family) { 2106 case AF_INET: 2107 size = sizeof(struct sockaddr_in); 2108 break; 2109 #ifdef INET6 2110 case AF_INET6: 2111 size = sizeof(struct sockaddr_in6); 2112 break; 2113 #endif 2114 default: 2115 return (0); 2116 } 2117 w->w_needed += size; 2118 if (w->w_where && w->w_needed <= 0) { 2119 if ((error = copyout(sa, w->w_where, size))) 2120 return (error); 2121 w->w_where += size; 2122 } 2123 } 2124 return (0); 2125 } 2126 2127 int 2128 sysctl_rtable(int *name, u_int namelen, void *where, size_t *given, void *new, 2129 size_t newlen) 2130 { 2131 int i, error = EINVAL; 2132 u_char af; 2133 struct walkarg w; 2134 struct rt_tableinfo tableinfo; 2135 u_int tableid = 0; 2136 2137 if (new) 2138 return (EPERM); 2139 if (namelen < 3 || namelen > 4) 2140 return (EINVAL); 2141 af = name[0]; 2142 bzero(&w, sizeof(w)); 2143 w.w_where = where; 2144 w.w_given = *given; 2145 w.w_needed = 0 - w.w_given; 2146 w.w_op = name[1]; 2147 w.w_arg = name[2]; 2148 2149 if (namelen == 4) { 2150 tableid = name[3]; 2151 if (!rtable_exists(tableid)) 2152 return (ENOENT); 2153 } else 2154 tableid = curproc->p_p->ps_rtableid; 2155 2156 switch (w.w_op) { 2157 case NET_RT_DUMP: 2158 case NET_RT_FLAGS: 2159 NET_LOCK(); 2160 for (i = 1; i <= AF_MAX; i++) { 2161 if (af != 0 && af != i) 2162 continue; 2163 2164 error = rtable_walk(tableid, i, NULL, sysctl_dumpentry, 2165 &w); 2166 if (error == EAFNOSUPPORT) 2167 error = 0; 2168 if (error) 2169 break; 2170 } 2171 NET_UNLOCK(); 2172 break; 2173 2174 case NET_RT_IFLIST: 2175 NET_LOCK(); 2176 error = sysctl_iflist(af, &w); 2177 NET_UNLOCK(); 2178 break; 2179 2180 case NET_RT_STATS: 2181 return (sysctl_rtable_rtstat(where, given, new)); 2182 case NET_RT_TABLE: 2183 tableid = w.w_arg; 2184 if (!rtable_exists(tableid)) 2185 return (ENOENT); 2186 memset(&tableinfo, 0, sizeof tableinfo); 2187 tableinfo.rti_tableid = tableid; 2188 tableinfo.rti_domainid = rtable_l2(tableid); 2189 error = sysctl_rdstruct(where, given, new, 2190 &tableinfo, sizeof(tableinfo)); 2191 return (error); 2192 case NET_RT_IFNAMES: 2193 NET_LOCK(); 2194 error = sysctl_ifnames(&w); 2195 NET_UNLOCK(); 2196 break; 2197 case NET_RT_SOURCE: 2198 tableid = w.w_arg; 2199 if (!rtable_exists(tableid)) 2200 return (ENOENT); 2201 NET_LOCK(); 2202 for (i = 1; i <= AF_MAX; i++) { 2203 if (af != 0 && af != i) 2204 continue; 2205 2206 error = sysctl_source(i, tableid, &w); 2207 if (error == EAFNOSUPPORT) 2208 error = 0; 2209 if (error) 2210 break; 2211 } 2212 NET_UNLOCK(); 2213 break; 2214 } 2215 free(w.w_tmem, M_RTABLE, w.w_tmemsize); 2216 w.w_needed += w.w_given; 2217 if (where) { 2218 *given = w.w_where - (caddr_t)where; 2219 if (*given < w.w_needed) 2220 return (ENOMEM); 2221 } else 2222 *given = w.w_needed + w.w_needed / 10; 2223 2224 return (error); 2225 } 2226 2227 int 2228 sysctl_rtable_rtstat(void *oldp, size_t *oldlenp, void *newp) 2229 { 2230 extern struct cpumem *rtcounters; 2231 uint64_t counters[rts_ncounters]; 2232 struct rtstat rtstat; 2233 uint32_t *words = (uint32_t *)&rtstat; 2234 int i; 2235 2236 CTASSERT(sizeof(rtstat) == (nitems(counters) * sizeof(uint32_t))); 2237 memset(&rtstat, 0, sizeof rtstat); 2238 counters_read(rtcounters, counters, nitems(counters)); 2239 2240 for (i = 0; i < nitems(counters); i++) 2241 words[i] = (uint32_t)counters[i]; 2242 2243 return (sysctl_rdstruct(oldp, oldlenp, newp, &rtstat, sizeof(rtstat))); 2244 } 2245 2246 int 2247 rtm_validate_proposal(struct rt_addrinfo *info) 2248 { 2249 if (info->rti_addrs & ~(RTA_NETMASK | RTA_IFA | RTA_DNS | RTA_STATIC | 2250 RTA_SEARCH)) { 2251 return -1; 2252 } 2253 2254 if (ISSET(info->rti_addrs, RTA_NETMASK)) { 2255 struct sockaddr *sa = info->rti_info[RTAX_NETMASK]; 2256 if (sa == NULL) 2257 return -1; 2258 switch (sa->sa_family) { 2259 case AF_INET: 2260 if (sa->sa_len != sizeof(struct sockaddr_in)) 2261 return -1; 2262 break; 2263 case AF_INET6: 2264 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2265 return -1; 2266 break; 2267 default: 2268 return -1; 2269 } 2270 } 2271 2272 if (ISSET(info->rti_addrs, RTA_IFA)) { 2273 struct sockaddr *sa = info->rti_info[RTAX_IFA]; 2274 if (sa == NULL) 2275 return -1; 2276 switch (sa->sa_family) { 2277 case AF_INET: 2278 if (sa->sa_len != sizeof(struct sockaddr_in)) 2279 return -1; 2280 break; 2281 case AF_INET6: 2282 if (sa->sa_len != sizeof(struct sockaddr_in6)) 2283 return -1; 2284 break; 2285 default: 2286 return -1; 2287 } 2288 } 2289 2290 if (ISSET(info->rti_addrs, RTA_DNS)) { 2291 struct sockaddr_rtdns *rtdns = 2292 (struct sockaddr_rtdns *)info->rti_info[RTAX_DNS]; 2293 if (rtdns == NULL) 2294 return -1; 2295 if (rtdns->sr_len > sizeof(*rtdns)) 2296 return -1; 2297 if (rtdns->sr_len < offsetof(struct sockaddr_rtdns, sr_dns)) 2298 return -1; 2299 switch (rtdns->sr_family) { 2300 case AF_INET: 2301 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2302 sr_dns)) % sizeof(struct in_addr) != 0) 2303 return -1; 2304 break; 2305 #ifdef INET6 2306 case AF_INET6: 2307 if ((rtdns->sr_len - offsetof(struct sockaddr_rtdns, 2308 sr_dns)) % sizeof(struct in6_addr) != 0) 2309 return -1; 2310 break; 2311 #endif 2312 default: 2313 return -1; 2314 } 2315 } 2316 2317 if (ISSET(info->rti_addrs, RTA_STATIC)) { 2318 struct sockaddr_rtstatic *rtstatic = 2319 (struct sockaddr_rtstatic *)info->rti_info[RTAX_STATIC]; 2320 if (rtstatic == NULL) 2321 return -1; 2322 if (rtstatic->sr_len > sizeof(*rtstatic)) 2323 return -1; 2324 if (rtstatic->sr_len <= 2325 offsetof(struct sockaddr_rtstatic, sr_static)) 2326 return -1; 2327 } 2328 2329 if (ISSET(info->rti_addrs, RTA_SEARCH)) { 2330 struct sockaddr_rtsearch *rtsearch = 2331 (struct sockaddr_rtsearch *)info->rti_info[RTAX_SEARCH]; 2332 if (rtsearch == NULL) 2333 return -1; 2334 if (rtsearch->sr_len > sizeof(*rtsearch)) 2335 return -1; 2336 if (rtsearch->sr_len <= 2337 offsetof(struct sockaddr_rtsearch, sr_search)) 2338 return -1; 2339 } 2340 2341 return 0; 2342 } 2343 2344 int 2345 rt_setsource(unsigned int rtableid, struct sockaddr *src) 2346 { 2347 struct ifaddr *ifa; 2348 int error; 2349 /* 2350 * If source address is 0.0.0.0 or :: 2351 * use automatic source selection 2352 */ 2353 switch(src->sa_family) { 2354 case AF_INET: 2355 if(satosin(src)->sin_addr.s_addr == INADDR_ANY) { 2356 rtable_setsource(rtableid, AF_INET, NULL); 2357 return (0); 2358 } 2359 break; 2360 #ifdef INET6 2361 case AF_INET6: 2362 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) { 2363 rtable_setsource(rtableid, AF_INET6, NULL); 2364 return (0); 2365 } 2366 break; 2367 #endif 2368 default: 2369 return (EAFNOSUPPORT); 2370 } 2371 2372 KERNEL_LOCK(); 2373 /* 2374 * Check if source address is assigned to an interface in the 2375 * same rdomain 2376 */ 2377 if ((ifa = ifa_ifwithaddr(src, rtableid)) == NULL) { 2378 KERNEL_UNLOCK(); 2379 return (EINVAL); 2380 } 2381 2382 error = rtable_setsource(rtableid, src->sa_family, ifa->ifa_addr); 2383 KERNEL_UNLOCK(); 2384 2385 return (error); 2386 } 2387 2388 /* 2389 * Definitions of protocols supported in the ROUTE domain. 2390 */ 2391 2392 const struct protosw routesw[] = { 2393 { 2394 .pr_type = SOCK_RAW, 2395 .pr_domain = &routedomain, 2396 .pr_flags = PR_ATOMIC|PR_ADDR|PR_WANTRCVD, 2397 .pr_output = route_output, 2398 .pr_ctloutput = route_ctloutput, 2399 .pr_usrreq = route_usrreq, 2400 .pr_attach = route_attach, 2401 .pr_detach = route_detach, 2402 .pr_init = route_prinit, 2403 .pr_sysctl = sysctl_rtable 2404 } 2405 }; 2406 2407 const struct domain routedomain = { 2408 .dom_family = PF_ROUTE, 2409 .dom_name = "route", 2410 .dom_init = route_init, 2411 .dom_protosw = routesw, 2412 .dom_protoswNPROTOSW = &routesw[nitems(routesw)] 2413 }; 2414