1 /* LWIP service - rtsock.c - routing sockets and route sysctl support */ 2 /* 3 * In a nutshell, the intended abstraction is that only this module deals with 4 * route messages, message headers, and RTA arrays, whereas other modules 5 * (ifaddr, route) are responsible for parsing and providing sockaddr_* type 6 * addresses, with the exception of compression and expansion which is 7 * particular to routing sockets. Concretely, there should be no reference to 8 * (e.g.) rt_msghdr outside this module, and no mention of ip_addr_t inside it. 9 */ 10 11 #include "lwip.h" 12 #include "ifaddr.h" 13 #include "rtsock.h" 14 #include "route.h" 15 #include "lldata.h" 16 17 /* The number of routing sockets. */ 18 #define NR_RTSOCK 8 19 20 /* 21 * The send buffer maximum determines the maximum size of requests. The 22 * maximum possible request size is the size of the routing message header plus 23 * RTAX_MAX times the maximum socket address size, including alignment. That 24 * currently works out to a number in the low 400s, so 512 should be fine for 25 * now. At this time we do not support changing the send buffer size, because 26 * there really is no point in doing so. Hence also no RT_SNDBUF_{MIN,DEF}. 27 */ 28 #define RT_SNDBUF_MAX 512 /* maximum RT send buffer size */ 29 30 #define RT_RCVBUF_MIN 0 /* minimum RT receive buffer size */ 31 #define RT_RCVBUF_DEF 16384 /* default RT receive buffer size */ 32 #define RT_RCVBUF_MAX 65536 /* maximum RT receive buffer size */ 33 34 /* Address length of routing socket address structures; two bytes only. */ 35 #define RTSOCK_ADDR_LEN offsetof(struct sockaddr, sa_data) 36 37 struct rtsock_rta { 38 const void *rta_ptr[RTAX_MAX]; 39 socklen_t rta_len[RTAX_MAX]; 40 }; 41 42 static const char rtsock_padbuf[RT_ROUNDUP(0)]; 43 44 static struct rtsock { 45 struct sock rt_sock; /* socket object, MUST be first */ 46 int rt_family; /* address family filter if not zero */ 47 unsigned int rt_flags; /* routing socket flags (RTF_) */ 48 struct pbuf *rt_rcvhead; /* receive buffer, first packet */ 49 struct pbuf **rt_rcvtailp; /* receive buffer, last ptr-ptr */ 50 size_t rt_rcvlen; /* receive buffer, length in bytes */ 51 size_t rt_rcvbuf; /* receive buffer, maximum size */ 52 TAILQ_ENTRY(rtsock) rt_next; /* next in active or free list */ 53 } rt_array[NR_RTSOCK]; 54 55 #define RTF_NOLOOPBACK 0x1 /* suppress reply messages */ 56 57 static TAILQ_HEAD(, rtsock) rt_freelist; /* free routing sockets */ 58 static TAILQ_HEAD(, rtsock) rt_activelist; /* active routing sockets */ 59 60 struct rtsock_request { 61 struct rtsock *rtr_src; /* source socket of the request */ 62 pid_t rtr_pid; /* process ID of requesting process */ 63 int rtr_seq; /* sequence number from the request */ 64 int rtr_getif; /* RTM_GET only: get interface info */ 65 }; 66 67 static const struct sockevent_ops rtsock_ops; 68 69 static ssize_t rtsock_info(struct rmib_call *, struct rmib_node *, 70 struct rmib_oldp *, struct rmib_newp *); 71 72 /* The CTL_NET PF_ROUTE subtree. */ 73 static struct rmib_node net_route_table[] = { 74 [0] = RMIB_FUNC(RMIB_RO | CTLTYPE_NODE, 0, rtsock_info, 75 "rtable", "Routing table information"), 76 }; 77 78 /* The CTL_NET PF_ROUTE node. */ 79 static struct rmib_node net_route_node = 80 RMIB_NODE(RMIB_RO, net_route_table, "route", "PF_ROUTE information"); 81 82 /* 83 * Initialize the routing sockets module. 84 */ 85 void 86 rtsock_init(void) 87 { 88 const int mib[] = { CTL_NET, PF_ROUTE }; 89 unsigned int slot; 90 int r; 91 92 /* Initialize the list of free routing sockets. */ 93 TAILQ_INIT(&rt_freelist); 94 95 for (slot = 0; slot < __arraycount(rt_array); slot++) 96 TAILQ_INSERT_TAIL(&rt_freelist, &rt_array[slot], rt_next); 97 98 /* Initialize the list of acive routing sockets. */ 99 TAILQ_INIT(&rt_activelist); 100 101 /* Register the "net.route" subtree with the MIB service. */ 102 if ((r = rmib_register(mib, __arraycount(mib), &net_route_node)) != OK) 103 panic("unable to register net.route RMIB tree: %d", r); 104 } 105 106 /* 107 * Allocate a pbuf suitable for storing a routing message of 'size' bytes. 108 * Return the allocated pbuf on success, or NULL on memory allocation failure. 109 */ 110 static struct pbuf * 111 rtsock_alloc(size_t size) 112 { 113 struct pbuf *pbuf; 114 115 /* 116 * The data will currently always fit in a single pool buffer. Just in 117 * case this changes in the future, warn and fail cleanly. The rest of 118 * the code is not able to deal with buffer chains as it is, although 119 * that can be changed if necessary. 120 */ 121 if (size > MEMPOOL_BUFSIZE) { 122 printf("LWIP: routing socket packet too large (%zu)\n", size); 123 124 return NULL; 125 } 126 127 pbuf = pbuf_alloc(PBUF_RAW, size, PBUF_RAM); 128 129 assert(pbuf == NULL || pbuf->tot_len == pbuf->len); 130 131 return pbuf; 132 } 133 134 /* 135 * Initialize a routing addresses map. 136 */ 137 static void 138 rtsock_rta_init(struct rtsock_rta * rta) 139 { 140 141 memset(rta, 0, sizeof(*rta)); 142 } 143 144 /* 145 * Set an entry in a routing addresses map. When computing sizes, 'ptr' may be 146 * NULL. 147 */ 148 static void 149 rtsock_rta_set(struct rtsock_rta * rta, unsigned int rtax, const void * ptr, 150 socklen_t len) 151 { 152 153 assert(rtax < RTAX_MAX); 154 155 rta->rta_ptr[rtax] = ptr; 156 rta->rta_len[rtax] = len; 157 } 158 159 /* 160 * Copy out a message with a header and any entries in a routing addresses map, 161 * either into a pbuf allocated for this purpose, or to a RMIB (sysctl) caller, 162 * at the given offset. If no destination is given ('pbuf ' and 'oldp' are 163 * both NULL), compute just the size of the resulting data. Otherwise, set the 164 * length and address mask fields in the header as a side effect. Return the 165 * number of bytes copied on success, and if 'pbuf' is not NULL, it is filled 166 * with a pointer to the newly allocated pbuf. Return a negative error code on 167 * failure. Note that when computing the size only, any actual data pointers 168 * ('hdr', 'msglen', 'addrs', and the pointers in 'rta') may be NULL or even 169 * invalid, even though the corresponding sizes should still be supplied. 170 */ 171 static ssize_t 172 rtsock_rta_finalize(void * hdr, size_t hdrlen, u_short * msglen, int * addrs, 173 const struct rtsock_rta * rta, struct pbuf ** pbuf, 174 struct rmib_oldp * oldp, ssize_t off) 175 { 176 iovec_t iov[1 + RTAX_MAX * 2]; 177 size_t len, padlen, totallen; 178 unsigned int i, iovcnt; 179 int mask; 180 181 assert(pbuf == NULL || oldp == NULL); 182 assert(pbuf == NULL || off == 0); 183 assert(RT_ROUNDUP(hdrlen) == hdrlen); 184 185 iov[0].iov_addr = (vir_bytes)hdr; 186 iov[0].iov_size = hdrlen; 187 iovcnt = 1; 188 189 totallen = hdrlen; 190 mask = 0; 191 192 /* 193 * The addresses in the given RTA map, as present, should be stored in 194 * the numbering order of the map. 195 */ 196 for (i = 0; i < RTAX_MAX; i++) { 197 if (rta->rta_ptr[i] == NULL) 198 continue; 199 200 if ((len = rta->rta_len[i]) > 0) { 201 assert(iovcnt < __arraycount(iov)); 202 iov[iovcnt].iov_addr = (vir_bytes)rta->rta_ptr[i]; 203 iov[iovcnt++].iov_size = len; 204 } 205 206 /* Note that RT_ROUNDUP(0) is not 0.. */ 207 if ((padlen = RT_ROUNDUP(len) - len) > 0) { 208 assert(iovcnt < __arraycount(iov)); 209 iov[iovcnt].iov_addr = (vir_bytes)rtsock_padbuf; 210 iov[iovcnt++].iov_size = padlen; 211 } 212 213 totallen += len + padlen; 214 mask |= (1 << i); /* convert RTAX_ to RTA_ */ 215 } 216 217 /* If only the length was requested, return it now. */ 218 if (pbuf == NULL && oldp == NULL) 219 return totallen; 220 221 /* 222 * Casting 'hdr' would violate C99 strict aliasing rules, but the 223 * address mask is not always at the same location anyway. 224 */ 225 *msglen = totallen; 226 *addrs = mask; 227 228 if (pbuf != NULL) { 229 if ((*pbuf = rtsock_alloc(totallen)) == NULL) 230 return ENOMEM; 231 232 return util_coalesce((char *)(*pbuf)->payload, totallen, iov, 233 iovcnt); 234 } else 235 return rmib_vcopyout(oldp, off, iov, iovcnt); 236 } 237 238 /* 239 * Reduce the size of a network mask to the bytes actually used. It is highly 240 * doubtful that this extra complexity pays off in any form, but it is what the 241 * BSDs historically do. We currently implement compression for IPv4 only. 242 */ 243 static void 244 rtsock_compress_netmask(struct sockaddr * sa) 245 { 246 struct sockaddr_in sin; 247 uint32_t addr; 248 249 if (sa->sa_family != AF_INET) 250 return; /* nothing to do */ 251 252 memcpy(&sin, sa, sizeof(sin)); /* no type punning.. (sigh) */ 253 254 addr = htonl(sin.sin_addr.s_addr); 255 256 if (addr & 0x000000ff) 257 sa->sa_len = 8; 258 else if (addr & 0x0000ffff) 259 sa->sa_len = 7; 260 else if (addr & 0x00ffffff) 261 sa->sa_len = 6; 262 else if (addr != 0) 263 sa->sa_len = 5; 264 else 265 sa->sa_len = 0; 266 } 267 268 /* 269 * Expand a possibly compressed IPv4 or IPv6 network mask, given as 'sa', into 270 * 'mask'. Return TRUE if expansion succeeded. In that case, the resulting 271 * mask must have sa.sa_len and sa.sa_family filled in correctly, and have the 272 * appropriate size for its address family. Return FALSE if expansion failed 273 * and an error should be returned to the caller. 274 */ 275 static int 276 rtsock_expand_netmask(union sockaddr_any * mask, const struct sockaddr * sa) 277 { 278 279 if (sa->sa_len > sizeof(*mask)) 280 return FALSE; 281 282 memset(mask, 0, sizeof(*mask)); 283 memcpy(mask, sa, sa->sa_len); 284 285 /* 286 * Amazingly, even the address family may be chopped off, in which case 287 * an IPv4 address is implied. 288 */ 289 if (sa->sa_len >= offsetof(struct sockaddr, sa_data) && 290 sa->sa_family == AF_INET6) { 291 if (sa->sa_len > sizeof(struct sockaddr_in6)) 292 return FALSE; 293 294 mask->sa.sa_len = sizeof(struct sockaddr_in6); 295 mask->sa.sa_family = AF_INET6; 296 } else { 297 if (sa->sa_len > sizeof(struct sockaddr_in)) 298 return FALSE; 299 300 mask->sa.sa_len = sizeof(struct sockaddr_in); 301 mask->sa.sa_family = AF_INET; 302 } 303 304 return TRUE; 305 } 306 307 /* 308 * Create a routing socket. 309 */ 310 sockid_t 311 rtsock_socket(int type, int protocol, struct sock ** sockp, 312 const struct sockevent_ops ** ops) 313 { 314 struct rtsock *rt; 315 316 /* 317 * There is no superuser check here: regular users are allowed to issue 318 * (only) RTM_GET requests on routing sockets. 319 */ 320 if (type != SOCK_RAW) 321 return EPROTOTYPE; 322 323 /* We could accept only the protocols we know, but this is fine too. */ 324 if (protocol < 0 || protocol >= AF_MAX) 325 return EPROTONOSUPPORT; 326 327 if (TAILQ_EMPTY(&rt_freelist)) 328 return ENOBUFS; 329 330 rt = TAILQ_FIRST(&rt_freelist); 331 TAILQ_REMOVE(&rt_freelist, rt, rt_next); 332 333 rt->rt_flags = 0; 334 rt->rt_family = protocol; 335 rt->rt_rcvhead = NULL; 336 rt->rt_rcvtailp = &rt->rt_rcvhead; 337 rt->rt_rcvlen = 0; 338 rt->rt_rcvbuf = RT_RCVBUF_DEF; 339 340 TAILQ_INSERT_HEAD(&rt_activelist, rt, rt_next); 341 342 *sockp = &rt->rt_sock; 343 *ops = &rtsock_ops; 344 return SOCKID_RT | (sockid_t)(rt - rt_array); 345 } 346 347 /* 348 * Enqueue data on the receive queue of a routing socket. The caller must have 349 * checked whether the receive buffer size allows for the receipt of the data. 350 */ 351 static void 352 rtsock_enqueue(struct rtsock * rt, struct pbuf * pbuf) 353 { 354 355 *rt->rt_rcvtailp = pbuf; 356 rt->rt_rcvtailp = pchain_end(pbuf); 357 rt->rt_rcvlen += pchain_size(pbuf); 358 359 sockevent_raise(&rt->rt_sock, SEV_RECV); 360 } 361 362 /* 363 * Determine whether a routing message for address family 'family', originated 364 * from routing socket 'rtsrc' if not NULL, should be sent to routing socket 365 * 'rt'. Return TRUE if the message should be sent to this socket, or FALSE 366 * if it should not. 367 */ 368 static int 369 rtsock_can_send(struct rtsock *rt, struct rtsock *rtsrc, int family) 370 { 371 372 /* Do not send anything on sockets shut down for reading. */ 373 if (sockevent_is_shutdown(&rt->rt_sock, SFL_SHUT_RD)) 374 return FALSE; 375 376 /* 377 * Do not send a reply message to the source of the request if the 378 * source is not interested in replies to its own requests. 379 */ 380 if (rt == rtsrc && (rt->rt_flags & RTF_NOLOOPBACK)) 381 return FALSE; 382 383 /* 384 * For address family specific messages, make sure the routing socket 385 * is interested in that family. Make an exception if the socket was 386 * the source of the request, though: we currently do not prevent user 387 * processes from issuing commands for the "wrong" family. 388 */ 389 if (rt->rt_family != AF_UNSPEC && family != AF_UNSPEC && 390 rt->rt_family != family && rt != rtsrc) 391 return FALSE; 392 393 /* 394 * See whether the receive queue of the socket is already full. We do 395 * not consider the size of the current request, in order to not drop 396 * larger messages and then enqueue smaller ones. 397 */ 398 if (rt->rt_rcvlen >= rt->rt_rcvbuf) 399 return FALSE; 400 401 /* All is well: go on and deliver the message. */ 402 return TRUE; 403 } 404 405 /* 406 * Send the routing message in 'pbuf' to the given routing socket if possible, 407 * or check whether such a message could be sent to that socket if 'pbuf' is 408 * NULL. In the former case, the function takes ownership of 'pbuf'. The 409 * given routing socket is assumed to be the source of the routing request that 410 * generated this message. In the latter case, the function returns TRUE if 411 * the socket would take the message or FALSE if not. If 'family' is not 412 * AF_UNSPEC, it is to be the address family of the message. 413 */ 414 static int 415 rtsock_msg_one(struct rtsock * rt, int family, struct pbuf * pbuf) 416 { 417 418 if (rtsock_can_send(rt, rt, family)) { 419 if (pbuf != NULL) 420 rtsock_enqueue(rt, pbuf); 421 422 return TRUE; 423 } else { 424 if (pbuf != NULL) 425 pbuf_free(pbuf); 426 427 return FALSE; 428 } 429 } 430 431 /* 432 * Send the routing message in 'pbuf' to all matching routing sockets, or check 433 * whether there are any such matching routing sockets if 'pbuf' is NULL. In 434 * the former case, the function takes ownership of 'pbuf'. In the latter 435 * case, the function returns TRUE if there are any matching sockets or FALSE 436 * if there are none. If 'rtsrc' is not NULL, it is to be the routing socket 437 * that is the source of the message. If 'family' is not AF_UNSPEC, it is to 438 * be the address family of the message. 439 */ 440 static int 441 rtsock_msg_match(struct rtsock * rtsrc, int family, struct pbuf * pbuf) 442 { 443 struct rtsock *rt, *rtprev; 444 struct pbuf *pcopy; 445 446 rtprev = NULL; 447 448 TAILQ_FOREACH(rt, &rt_activelist, rt_next) { 449 if (!rtsock_can_send(rt, rtsrc, family)) 450 continue; 451 452 /* 453 * There is at least one routing socket that is interested in 454 * receiving this message, and able to receive it. 455 */ 456 if (pbuf == NULL) 457 return TRUE; 458 459 /* 460 * We need to make copies of the generated message for all but 461 * the last matching socket, which gets the original. If we're 462 * out of memory, free the original and stop: there are more 463 * important things to spend memory on than routing sockets. 464 */ 465 if (rtprev != NULL) { 466 if ((pcopy = rtsock_alloc(pbuf->tot_len)) == NULL) { 467 pbuf_free(pbuf); 468 469 return TRUE; 470 } 471 472 if (pbuf_copy(pcopy, pbuf) != ERR_OK) 473 panic("unexpected pbuf copy failure"); 474 475 rtsock_enqueue(rtprev, pcopy); 476 } 477 478 rtprev = rt; 479 } 480 481 if (rtprev != NULL) 482 rtsock_enqueue(rtprev, pbuf); 483 else if (pbuf != NULL) 484 pbuf_free(pbuf); 485 486 return (rtprev != NULL); 487 } 488 489 /* 490 * Dequeue and free the head of the receive queue of a routing socket. 491 */ 492 static void 493 rtsock_dequeue(struct rtsock * rt) 494 { 495 struct pbuf *pbuf, **pnext; 496 size_t size; 497 498 pbuf = rt->rt_rcvhead; 499 assert(pbuf != NULL); 500 501 pnext = pchain_end(pbuf); 502 size = pchain_size(pbuf); 503 504 if ((rt->rt_rcvhead = *pnext) == NULL) 505 rt->rt_rcvtailp = &rt->rt_rcvhead; 506 507 assert(rt->rt_rcvlen >= size); 508 rt->rt_rcvlen -= size; 509 510 *pnext = NULL; 511 pbuf_free(pbuf); 512 } 513 514 /* 515 * Process a routing message sent on a socket. Return OK on success, in which 516 * case the caller assumes that the processing routine has sent a reply to the 517 * user and possibly other routing sockets. Return a negative error code on 518 * failure, in which case the caller will send the reply to the user instead. 519 */ 520 static int 521 rtsock_process(struct rtsock *rt, struct rt_msghdr * rtm, char * buf, 522 size_t len, int is_root) 523 { 524 struct rtsock_request rtr; 525 struct rtsock_rta rta; 526 const struct sockaddr *netmask; 527 struct sockaddr sa; 528 union sockaddr_any mask; 529 size_t off; 530 int i; 531 532 if (rtm->rtm_msglen != len) 533 return EINVAL; 534 535 if (rtm->rtm_version != RTM_VERSION) { 536 printf("LWIP: PID %d uses routing sockets version %u\n", 537 rtm->rtm_pid, rtm->rtm_version); 538 539 return EPROTONOSUPPORT; 540 } 541 542 /* 543 * Make sure that we won't misinterpret the rest of the message. While 544 * looking at the message type, also make sure non-root users can only 545 * ever issue RTM_GET requests. 546 */ 547 switch (rtm->rtm_type) { 548 case RTM_ADD: 549 case RTM_DELETE: 550 case RTM_CHANGE: 551 case RTM_LOCK: 552 if (!is_root) 553 return EPERM; 554 555 /* FALLTHROUGH */ 556 case RTM_GET: 557 break; 558 559 default: 560 return EOPNOTSUPP; 561 } 562 563 /* 564 * Extract all given addresses. We do not actually support all types 565 * of entries, but we cannot skip the ones we do not need either. 566 */ 567 rtsock_rta_init(&rta); 568 569 off = sizeof(*rtm); 570 assert(off == RT_ROUNDUP(off)); 571 572 for (i = 0; i < RTAX_MAX; i++) { 573 if (!(rtm->rtm_addrs & (1 << i))) 574 continue; 575 576 if (off + offsetof(struct sockaddr, sa_data) > len) 577 return EINVAL; 578 579 /* 580 * It is safe to access sa_len and even sa_family in all cases, 581 * in particular even when the structure is of size zero. 582 */ 583 assert(offsetof(struct sockaddr, sa_data) <= RT_ROUNDUP(0)); 584 585 memcpy(&sa, &buf[off], offsetof(struct sockaddr, sa_data)); 586 587 if (off + sa.sa_len > len) 588 return EINVAL; 589 590 rtsock_rta_set(&rta, i, &buf[off], sa.sa_len); 591 592 off += RT_ROUNDUP((size_t)sa.sa_len); 593 } 594 595 /* 596 * Expand the given netmask if it is in compressed IPv4 form. We do 597 * this here because it is particular to routing sockets; we also do 598 * the compression in this module. Note how the compression may even 599 * strip off the address family; really, who came up with this ****? 600 */ 601 netmask = (const struct sockaddr *)rta.rta_ptr[RTAX_NETMASK]; 602 603 if (netmask != NULL) { 604 if (!rtsock_expand_netmask(&mask, netmask)) 605 return EINVAL; 606 607 rtsock_rta_set(&rta, RTAX_NETMASK, &mask, mask.sa.sa_len); 608 } 609 610 /* 611 * Actually process the command. Pass on enough information so that a 612 * reply can be generated on success. The abstraction as sketched at 613 * the top of the file imposes that we pass quite a few parameters. 614 */ 615 rtr.rtr_src = rt; 616 rtr.rtr_pid = rtm->rtm_pid; 617 rtr.rtr_seq = rtm->rtm_seq; 618 rtr.rtr_getif = (rtm->rtm_type == RTM_GET && 619 (rta.rta_ptr[RTAX_IFP] != NULL || rta.rta_ptr[RTAX_IFA] != NULL)); 620 621 return route_process(rtm->rtm_type, 622 (const struct sockaddr *)rta.rta_ptr[RTAX_DST], 623 (const struct sockaddr *)rta.rta_ptr[RTAX_NETMASK], 624 (const struct sockaddr *)rta.rta_ptr[RTAX_GATEWAY], 625 (const struct sockaddr *)rta.rta_ptr[RTAX_IFP], 626 (const struct sockaddr *)rta.rta_ptr[RTAX_IFA], 627 rtm->rtm_flags, rtm->rtm_inits, &rtm->rtm_rmx, &rtr); 628 } 629 630 /* 631 * Perform preliminary checks on a send request. 632 */ 633 static int 634 rtsock_pre_send(struct sock * sock __unused, size_t len, 635 socklen_t ctl_len __unused, const struct sockaddr * addr, 636 socklen_t addr_len __unused, endpoint_t user_endpt __unused, int flags) 637 { 638 639 if (flags != 0) 640 return EOPNOTSUPP; 641 642 if (addr != NULL) 643 return EISCONN; 644 645 /* 646 * For the most basic failures - that is, we cannot even manage to 647 * receive the request - we do not generate a reply message. 648 */ 649 if (len < sizeof(struct rt_msghdr)) 650 return ENOBUFS; 651 if (len > RT_SNDBUF_MAX) 652 return EMSGSIZE; 653 654 return OK; 655 } 656 657 /* 658 * Send data on a routing socket. 659 */ 660 static int 661 rtsock_send(struct sock * sock, const struct sockdriver_data * data, 662 size_t len, size_t * offp, const struct sockdriver_data * ctl __unused, 663 socklen_t ctl_len __unused, socklen_t * ctl_off __unused, 664 const struct sockaddr * addr __unused, socklen_t addr_len __unused, 665 endpoint_t user_endpt, int flags __unused, size_t min __unused) 666 { 667 struct rtsock *rt = (struct rtsock *)sock; 668 char buf[RT_SNDBUF_MAX] __aligned(4); 669 struct rt_msghdr rtm; 670 struct pbuf *pbuf; 671 uid_t euid; 672 int r, is_root; 673 674 /* Copy in the request, and adjust some fields right away. */ 675 assert(len >= sizeof(rtm)); 676 assert(len <= sizeof(buf)); 677 678 if ((r = sockdriver_copyin(data, 0, buf, len)) != OK) 679 return r; 680 681 memcpy(&rtm, buf, sizeof(rtm)); 682 rtm.rtm_errno = 0; 683 rtm.rtm_flags &= ~RTF_DONE; 684 rtm.rtm_pid = getepinfo(user_endpt, &euid, NULL /*gid*/); 685 686 is_root = (euid == ROOT_EUID); 687 688 /* Process the request. */ 689 r = rtsock_process(rt, &rtm, buf, len, is_root); 690 691 /* 692 * If the request has been processed successfully, a reply has been 693 * sent already, possibly also to other routing sockets. Here, we 694 * handle the case that the request has resulted in failure, in which 695 * case we send a reply to the caller only. This behavior is different 696 * from the traditional BSD behavior, which also sends failure replies 697 * to other sockets. Our motivation is that while other parties are 698 * never going to be interested in failures anyway, it is in fact easy 699 * for an unprivileged user process to abuse the failure-reply system 700 * in order to fake other types of routing messages (e.g., RTM_IFINFO) 701 * to other parties. By sending failure replies only to the requestor, 702 * we eliminate the need for security-sensitive request validation. 703 */ 704 if (r != OK && rtsock_can_send(rt, rt, AF_UNSPEC)) { 705 rtm.rtm_errno = -r; 706 707 if ((pbuf = rtsock_alloc(len)) == NULL) 708 return ENOMEM; 709 710 /* For the reply, reuse the request message largely as is. */ 711 memcpy(pbuf->payload, &rtm, sizeof(rtm)); 712 if (len > sizeof(rtm)) 713 memcpy((uint8_t *)pbuf->payload + sizeof(rtm), 714 buf + sizeof(rtm), len - sizeof(rtm)); 715 716 rtsock_enqueue(rt, pbuf); 717 } else if (r == OK) 718 *offp = len; 719 720 return r; 721 } 722 723 /* 724 * Perform preliminary checks on a receive request. 725 */ 726 static int 727 rtsock_pre_recv(struct sock * sock __unused, endpoint_t user_endpt __unused, 728 int flags) 729 { 730 731 /* 732 * We accept the same flags across all socket types in LWIP, and then 733 * simply ignore the ones we do not support for routing sockets. 734 */ 735 if ((flags & ~(MSG_PEEK | MSG_WAITALL)) != 0) 736 return EOPNOTSUPP; 737 738 return OK; 739 } 740 741 /* 742 * Receive data on a routing socket. 743 */ 744 static int 745 rtsock_recv(struct sock * sock, const struct sockdriver_data * data, 746 size_t len, size_t * off, const struct sockdriver_data * ctl __unused, 747 socklen_t ctl_len __unused, socklen_t * ctl_off __unused, 748 struct sockaddr * addr, socklen_t * addr_len, 749 endpoint_t user_endpt __unused, int flags, size_t min __unused, 750 int * rflags) 751 { 752 struct rtsock *rt = (struct rtsock *)sock; 753 struct pbuf *pbuf; 754 int r; 755 756 if ((pbuf = rt->rt_rcvhead) == NULL) 757 return SUSPEND; 758 759 /* Copy out the data to the calling user process. */ 760 if (len >= pbuf->tot_len) 761 len = pbuf->tot_len; 762 else 763 *rflags |= MSG_TRUNC; 764 765 r = util_copy_data(data, len, 0, pbuf, 0, FALSE /*copy_in*/); 766 767 if (r != OK) 768 return r; 769 770 /* Generate a dummy source address. */ 771 addr->sa_len = RTSOCK_ADDR_LEN; 772 addr->sa_family = AF_ROUTE; 773 *addr_len = RTSOCK_ADDR_LEN; 774 775 /* Discard the data now, unless we were instructed to peek only. */ 776 if (!(flags & MSG_PEEK)) 777 rtsock_dequeue(rt); 778 779 /* Return the received part of the data length. */ 780 *off = len; 781 return OK; 782 } 783 784 /* 785 * Test whether data can be received on a routing socket, and if so, how many 786 * bytes of data. 787 */ 788 static int 789 rtsock_test_recv(struct sock * sock, size_t min __unused, size_t * size) 790 { 791 struct rtsock *rt = (struct rtsock *)sock; 792 793 if (rt->rt_rcvhead == NULL) 794 return SUSPEND; 795 796 if (size != NULL) 797 *size = rt->rt_rcvhead->tot_len; 798 return OK; 799 } 800 801 /* 802 * Set socket options on a routing socket. 803 */ 804 static int 805 rtsock_setsockopt(struct sock * sock, int level, int name, 806 const struct sockdriver_data * data, socklen_t len) 807 { 808 struct rtsock *rt = (struct rtsock *)sock; 809 int r, val; 810 811 if (level == SOL_SOCKET) { 812 switch (name) { 813 case SO_USELOOPBACK: 814 if ((r = sockdriver_copyin_opt(data, &val, sizeof(val), 815 len)) != OK) 816 return r; 817 818 if (!val) 819 rt->rt_flags |= RTF_NOLOOPBACK; 820 else 821 rt->rt_flags &= ~RTF_NOLOOPBACK; 822 823 return OK; 824 825 case SO_RCVBUF: 826 if ((r = sockdriver_copyin_opt(data, &val, sizeof(val), 827 len)) != OK) 828 return r; 829 830 if (val < RT_RCVBUF_MIN || val > RT_RCVBUF_MAX) 831 return EINVAL; 832 833 rt->rt_rcvbuf = (size_t)val; 834 835 return OK; 836 } 837 } 838 839 return ENOPROTOOPT; 840 } 841 842 /* 843 * Retrieve socket options on a routing socket. 844 */ 845 static int 846 rtsock_getsockopt(struct sock * sock, int level, int name, 847 const struct sockdriver_data * data, socklen_t * len) 848 { 849 struct rtsock *rt = (struct rtsock *)sock; 850 int val; 851 852 if (level == SOL_SOCKET) { 853 switch (name) { 854 case SO_USELOOPBACK: 855 val = !(rt->rt_flags & RTF_NOLOOPBACK); 856 857 return sockdriver_copyout_opt(data, &val, sizeof(val), 858 len); 859 860 case SO_RCVBUF: 861 val = rt->rt_rcvbuf; 862 863 return sockdriver_copyout_opt(data, &val, sizeof(val), 864 len); 865 } 866 } 867 868 return ENOPROTOOPT; 869 } 870 871 /* 872 * Retrieve the local or remote socket address of a routing socket. 873 */ 874 static int 875 rtsock_getname(struct sock * sock __unused, struct sockaddr * addr, 876 socklen_t * addr_len) 877 { 878 879 /* This is entirely useless but apparently common between OSes. */ 880 addr->sa_len = RTSOCK_ADDR_LEN; 881 addr->sa_family = AF_ROUTE; 882 *addr_len = RTSOCK_ADDR_LEN; 883 884 return OK; 885 } 886 887 /* 888 * Drain the receive queue of a routing socket. 889 */ 890 static void 891 rtsock_drain(struct rtsock * rt) 892 { 893 894 while (rt->rt_rcvhead != NULL) 895 rtsock_dequeue(rt); 896 } 897 898 /* 899 * Shut down a routing socket for reading and/or writing. 900 */ 901 static int 902 rtsock_shutdown(struct sock * sock, unsigned int mask) 903 { 904 struct rtsock *rt = (struct rtsock *)sock; 905 906 if (mask & SFL_SHUT_RD) 907 rtsock_drain(rt); 908 909 return OK; 910 } 911 912 /* 913 * Close a routing socket. 914 */ 915 static int 916 rtsock_close(struct sock * sock, int force __unused) 917 { 918 struct rtsock *rt = (struct rtsock *)sock; 919 920 rtsock_drain(rt); 921 922 return OK; 923 } 924 925 /* 926 * Free up a closed routing socket. 927 */ 928 static void 929 rtsock_free(struct sock * sock) 930 { 931 struct rtsock *rt = (struct rtsock *)sock; 932 933 TAILQ_REMOVE(&rt_activelist, rt, rt_next); 934 935 TAILQ_INSERT_HEAD(&rt_freelist, rt, rt_next); 936 } 937 938 static const struct sockevent_ops rtsock_ops = { 939 .sop_pre_send = rtsock_pre_send, 940 .sop_send = rtsock_send, 941 .sop_pre_recv = rtsock_pre_recv, 942 .sop_recv = rtsock_recv, 943 .sop_test_recv = rtsock_test_recv, 944 .sop_setsockopt = rtsock_setsockopt, 945 .sop_getsockopt = rtsock_getsockopt, 946 .sop_getsockname = rtsock_getname, 947 .sop_getpeername = rtsock_getname, 948 .sop_shutdown = rtsock_shutdown, 949 .sop_close = rtsock_close, 950 .sop_free = rtsock_free 951 }; 952 953 /* 954 * Send an interface announcement message about the given interface. If 955 * 'arrival' is set, the interface has just been created; otherwise, the 956 * interface is about to be destroyed. 957 */ 958 void 959 rtsock_msg_ifannounce(struct ifdev * ifdev, int arrival) 960 { 961 struct if_announcemsghdr ifan; 962 struct pbuf *pbuf; 963 964 if (!rtsock_msg_match(NULL /*rtsrc*/, AF_UNSPEC, NULL /*pbuf*/)) 965 return; 966 967 memset(&ifan, 0, sizeof(ifan)); 968 ifan.ifan_msglen = sizeof(ifan); 969 ifan.ifan_version = RTM_VERSION; 970 ifan.ifan_type = RTM_IFANNOUNCE; 971 ifan.ifan_index = ifdev_get_index(ifdev); 972 strlcpy(ifan.ifan_name, ifdev_get_name(ifdev), sizeof(ifan.ifan_name)); 973 ifan.ifan_what = (arrival) ? IFAN_ARRIVAL : IFAN_DEPARTURE; 974 975 if ((pbuf = rtsock_alloc(sizeof(ifan))) == NULL) 976 return; 977 memcpy(pbuf->payload, &ifan, sizeof(ifan)); 978 979 rtsock_msg_match(NULL /*rtsrc*/, AF_UNSPEC, pbuf); 980 } 981 982 /* 983 * Send an interface information routing message. 984 */ 985 void 986 rtsock_msg_ifinfo(struct ifdev * ifdev) 987 { 988 struct if_msghdr ifm; 989 struct pbuf *pbuf; 990 991 if (!rtsock_msg_match(NULL /*rtsrc*/, AF_UNSPEC, NULL /*pbuf*/)) 992 return; 993 994 memset(&ifm, 0, sizeof(ifm)); 995 ifm.ifm_msglen = sizeof(ifm); 996 ifm.ifm_version = RTM_VERSION; 997 ifm.ifm_type = RTM_IFINFO; 998 ifm.ifm_addrs = 0; 999 ifm.ifm_flags = ifdev_get_ifflags(ifdev); 1000 ifm.ifm_index = ifdev_get_index(ifdev); 1001 memcpy(&ifm.ifm_data, ifdev_get_ifdata(ifdev), sizeof(ifm.ifm_data)); 1002 1003 if ((pbuf = rtsock_alloc(sizeof(ifm))) == NULL) 1004 return; 1005 memcpy(pbuf->payload, &ifm, sizeof(ifm)); 1006 1007 rtsock_msg_match(NULL /*rtsrc*/, AF_UNSPEC, pbuf); 1008 } 1009 1010 /* 1011 * Set up a RTA map and an interface address structure for use in a RTM_xxxADDR 1012 * routing message. 1013 */ 1014 static void 1015 rtsock_rta_init_ifam(struct rtsock_rta * rta, struct ifa_msghdr * ifam, 1016 struct ifdev * ifdev, unsigned int type, struct sockaddr_dlx * sdlx) 1017 { 1018 1019 memset(ifam, 0, sizeof(*ifam)); 1020 ifam->ifam_version = RTM_VERSION; 1021 ifam->ifam_type = type; 1022 ifam->ifam_flags = 0; 1023 ifam->ifam_index = ifdev_get_index(ifdev); 1024 ifam->ifam_metric = ifdev_get_metric(ifdev); 1025 1026 rtsock_rta_init(rta); 1027 1028 ifaddr_dl_get(ifdev, (ifaddr_dl_num_t)0, sdlx); 1029 1030 rtsock_rta_set(rta, RTAX_IFP, sdlx, sdlx->sdlx_len); 1031 } 1032 1033 /* 1034 * Add a specific link-layer address for an interface to the given RTA map. 1035 */ 1036 static void 1037 rtsock_rta_add_dl(struct rtsock_rta * rta, struct ifdev * ifdev, 1038 ifaddr_dl_num_t num, struct sockaddr_dlx * sdlx) 1039 { 1040 1041 /* Obtain the address data. */ 1042 ifaddr_dl_get(ifdev, num, sdlx); 1043 1044 /* Add the interface address. */ 1045 rtsock_rta_set(rta, RTAX_IFA, sdlx, sdlx->sdlx_len); 1046 1047 /* 1048 * NetBSD also adds a RTAX_NETMASK entry here. At this moment it is 1049 * not clear to me why, and it is a pain to make, so for now we do not. 1050 */ 1051 } 1052 1053 /* 1054 * Send a routing message about a new, changed, or deleted datalink address for 1055 * the given interface. 1056 */ 1057 void 1058 rtsock_msg_addr_dl(struct ifdev * ifdev, unsigned int type, 1059 ifaddr_dl_num_t num) 1060 { 1061 struct rtsock_rta rta; 1062 struct ifa_msghdr ifam; 1063 struct sockaddr_dlx name, addr; 1064 struct pbuf *pbuf; 1065 1066 if (!rtsock_msg_match(NULL /*rtsrc*/, AF_LINK, NULL /*pbuf*/)) 1067 return; 1068 1069 rtsock_rta_init_ifam(&rta, &ifam, ifdev, type, &name); 1070 1071 rtsock_rta_add_dl(&rta, ifdev, num, &addr); 1072 1073 if (rtsock_rta_finalize(&ifam, sizeof(ifam), &ifam.ifam_msglen, 1074 &ifam.ifam_addrs, &rta, &pbuf, NULL, 0) > 0) 1075 rtsock_msg_match(NULL /*rtsrc*/, AF_LINK, pbuf); 1076 } 1077 1078 /* 1079 * Add a specific IPv4 address for an interface to the given RTA map. 1080 */ 1081 static void 1082 rtsock_rta_add_v4(struct rtsock_rta * rta, struct ifdev * ifdev, 1083 ifaddr_v4_num_t num, struct sockaddr_in sin[4]) 1084 { 1085 1086 /* Obtain the address data. */ 1087 (void)ifaddr_v4_get(ifdev, num, &sin[0], &sin[1], &sin[2], &sin[3]); 1088 1089 /* Add the interface address. */ 1090 rtsock_rta_set(rta, RTAX_IFA, &sin[0], sin[0].sin_len); 1091 1092 /* Add the netmask, after compressing it. */ 1093 rtsock_compress_netmask((struct sockaddr *)&sin[1]); 1094 1095 rtsock_rta_set(rta, RTAX_NETMASK, &sin[1], sin[1].sin_len); 1096 1097 /* Possibly add a broadcast or destination address. */ 1098 if (sin[2].sin_len != 0) 1099 rtsock_rta_set(rta, RTAX_BRD, &sin[2], sin[2].sin_len); 1100 else if (sin[3].sin_len != 0) 1101 rtsock_rta_set(rta, RTAX_DST, &sin[3], sin[3].sin_len); 1102 } 1103 1104 /* 1105 * Send a routing message about a new or deleted IPv4 address for the given 1106 * interface. 1107 */ 1108 void 1109 rtsock_msg_addr_v4(struct ifdev * ifdev, unsigned int type, 1110 ifaddr_v4_num_t num) 1111 { 1112 struct rtsock_rta rta; 1113 struct ifa_msghdr ifam; 1114 struct sockaddr_dlx name; 1115 struct sockaddr_in sin[4]; 1116 struct pbuf *pbuf; 1117 1118 if (!rtsock_msg_match(NULL /*rtsrc*/, AF_INET, NULL /*pbuf*/)) 1119 return; 1120 1121 rtsock_rta_init_ifam(&rta, &ifam, ifdev, type, &name); 1122 1123 rtsock_rta_add_v4(&rta, ifdev, num, sin); 1124 1125 if (rtsock_rta_finalize(&ifam, sizeof(ifam), &ifam.ifam_msglen, 1126 &ifam.ifam_addrs, &rta, &pbuf, NULL, 0) > 0) 1127 rtsock_msg_match(NULL /*rtsrc*/, AF_INET, pbuf); 1128 } 1129 1130 /* 1131 * Add a specific IPv6 address for an interface to the given RTA map. 1132 */ 1133 static void 1134 rtsock_rta_add_v6(struct rtsock_rta * rta, struct ifdev * ifdev, 1135 ifaddr_v6_num_t num, struct sockaddr_in6 sin6[3]) 1136 { 1137 1138 /* Obtain the address data. */ 1139 ifaddr_v6_get(ifdev, num, &sin6[0], &sin6[1], &sin6[2]); 1140 1141 /* Add the interface address. */ 1142 rtsock_rta_set(rta, RTAX_IFA, &sin6[0], sin6[0].sin6_len); 1143 1144 /* Add the netmask, after compressing it (a no-op at the moment). */ 1145 rtsock_compress_netmask((struct sockaddr *)&sin6[1]); 1146 1147 rtsock_rta_set(rta, RTAX_NETMASK, &sin6[1], sin6[1].sin6_len); 1148 1149 /* Possibly add a destination address. */ 1150 if (sin6[2].sin6_len != 0) 1151 rtsock_rta_set(rta, RTAX_DST, &sin6[2], sin6[2].sin6_len); 1152 } 1153 1154 /* 1155 * Send a routing message about a new or deleted IPv6 address for the given 1156 * interface. 1157 */ 1158 void 1159 rtsock_msg_addr_v6(struct ifdev * ifdev, unsigned int type, 1160 ifaddr_v6_num_t num) 1161 { 1162 struct rtsock_rta rta; 1163 struct ifa_msghdr ifam; 1164 struct sockaddr_dlx name; 1165 struct sockaddr_in6 sin6[3]; 1166 struct pbuf *pbuf; 1167 1168 if (!rtsock_msg_match(NULL /*rtsrc*/, AF_INET6, NULL /*pbuf*/)) 1169 return; 1170 1171 rtsock_rta_init_ifam(&rta, &ifam, ifdev, type, &name); 1172 1173 rtsock_rta_add_v6(&rta, ifdev, num, sin6); 1174 1175 if (rtsock_rta_finalize(&ifam, sizeof(ifam), &ifam.ifam_msglen, 1176 &ifam.ifam_addrs, &rta, &pbuf, NULL, 0) > 0) 1177 rtsock_msg_match(NULL /*rtsrc*/, AF_INET6, pbuf); 1178 } 1179 1180 /* 1181 * Send an RTM_MISS routing message about an address for which no route was 1182 * found. The caller must provide the address in the appropriate form and 1183 * perform any per-address rate limiting. 1184 */ 1185 void 1186 rtsock_msg_miss(const struct sockaddr * addr) 1187 { 1188 struct rt_msghdr rtm; 1189 struct rtsock_rta rta; 1190 struct pbuf *pbuf; 1191 1192 /* 1193 * Unfortunately the destination address has already been generated (as 1194 * 'addr'), which is a big part of the work. Still, skip the rest if 1195 * there is no routing socket to deliver the message to. 1196 */ 1197 if (!rtsock_msg_match(NULL /*rtsrc*/, addr->sa_family, NULL /*pbuf*/)) 1198 return; 1199 1200 memset(&rtm, 0, sizeof(rtm)); 1201 rtm.rtm_version = RTM_VERSION; 1202 rtm.rtm_type = RTM_MISS; 1203 1204 rtsock_rta_init(&rta); 1205 1206 rtsock_rta_set(&rta, RTAX_DST, addr, addr->sa_len); 1207 1208 if (rtsock_rta_finalize(&rtm, sizeof(rtm), &rtm.rtm_msglen, 1209 &rtm.rtm_addrs, &rta, &pbuf, NULL, 0) > 0) 1210 rtsock_msg_match(NULL /*rtsrc*/, addr->sa_family, pbuf); 1211 } 1212 1213 /* 1214 * Generate routing socket data for a route, for either routing socket 1215 * broadcasting or a sysctl(7) request. The route is given as 'route'. The 1216 * type of the message (RTM_) is given as 'type'. The resulting routing 1217 * message header is stored in 'rtm' and an address vector is stored in 'rta'. 1218 * The latter may point to addresses generated in 'addr', 'mask', 'gateway', 1219 * and optionally (if not NULL) 'ifp' and 'ifa'. The caller is responsible for 1220 * combining the results into an appropriate routing message. 1221 */ 1222 static void 1223 rtsock_get_route(struct rt_msghdr * rtm, struct rtsock_rta * rta, 1224 union sockaddr_any * addr, union sockaddr_any * mask, 1225 union sockaddr_any * gateway, union sockaddr_any * ifp, 1226 union sockaddr_any * ifa, const struct route_entry * route, 1227 unsigned int type) 1228 { 1229 struct ifdev *ifdev; 1230 unsigned int flags, use; 1231 1232 route_get(route, addr, mask, gateway, ifp, ifa, &ifdev, &flags, &use); 1233 1234 memset(rtm, 0, sizeof(*rtm)); 1235 rtm->rtm_version = RTM_VERSION; 1236 rtm->rtm_type = type; 1237 rtm->rtm_flags = flags; 1238 rtm->rtm_index = ifdev_get_index(ifdev); 1239 rtm->rtm_use = use; 1240 1241 rtsock_rta_init(rta); 1242 1243 rtsock_rta_set(rta, RTAX_DST, addr, addr->sa.sa_len); 1244 1245 if (!(flags & RTF_HOST)) { 1246 rtsock_compress_netmask(&mask->sa); 1247 1248 rtsock_rta_set(rta, RTAX_NETMASK, mask, mask->sa.sa_len); 1249 } 1250 1251 rtsock_rta_set(rta, RTAX_GATEWAY, gateway, gateway->sa.sa_len); 1252 1253 if (ifp != NULL) 1254 rtsock_rta_set(rta, RTAX_IFP, ifp, ifp->sa.sa_len); 1255 1256 if (ifa != NULL) 1257 rtsock_rta_set(rta, RTAX_IFA, ifa, ifa->sa.sa_len); 1258 } 1259 1260 /* 1261 * Send a routing message about a route, with the given type which may be one 1262 * of RTM_ADD, RTM_CHANGE, RTM_DELETE, RTM_LOCK, and RTM_GET. The routing 1263 * socket request information 'rtr', if not NULL, provides additional 1264 * information about the routing socket that was the source of the request (if 1265 * any), various fields that should be echoed, and (for RTM_GET) whether to 1266 * add interface information to the output. 1267 */ 1268 void 1269 rtsock_msg_route(const struct route_entry * route, unsigned int type, 1270 const struct rtsock_request * rtr) 1271 { 1272 union sockaddr_any addr, mask, gateway, ifp, ifa; 1273 struct rt_msghdr rtm; 1274 struct rtsock_rta rta; 1275 struct rtsock *rtsrc; 1276 struct pbuf *pbuf; 1277 int family, getif; 1278 1279 rtsrc = (rtr != NULL) ? rtr->rtr_src : NULL; 1280 family = (route_is_ipv6(route)) ? AF_INET6 : AF_INET; 1281 1282 if (!rtsock_msg_match(rtsrc, family, NULL /*pbuf*/)) 1283 return; 1284 1285 getif = (rtr != NULL && rtr->rtr_getif); 1286 1287 rtsock_get_route(&rtm, &rta, &addr, &mask, &gateway, 1288 (getif) ? &ifp : NULL, (getif) ? &ifa : NULL, route, type); 1289 1290 if (rtr != NULL) { 1291 rtm.rtm_flags |= RTF_DONE; 1292 rtm.rtm_pid = rtr->rtr_pid; 1293 rtm.rtm_seq = rtr->rtr_seq; 1294 } 1295 1296 if (rtsock_rta_finalize(&rtm, sizeof(rtm), &rtm.rtm_msglen, 1297 &rtm.rtm_addrs, &rta, &pbuf, NULL, 0) > 0) 1298 rtsock_msg_match(rtsrc, family, pbuf); 1299 } 1300 1301 /* 1302 * Generate sysctl(7) output or length for the given routing table entry 1303 * 'route', provided that the route passes the flags filter 'filter'. The 1304 * address length 'addr_len' is used to compute a cheap length estimate. On 1305 * success, return the byte size of the output. If the route was not a match 1306 * for the filter, return zero. On failure, return a negative error code. 1307 */ 1308 static ssize_t 1309 rtsock_info_rtable_entry(const struct route_entry * route, unsigned int filter, 1310 socklen_t addr_len, struct rmib_oldp * oldp, size_t off) 1311 { 1312 union sockaddr_any addr, mask, gateway; 1313 struct rt_msghdr rtm; 1314 struct rtsock_rta rta; 1315 unsigned int flags; 1316 ssize_t len; 1317 1318 flags = route_get_flags(route); 1319 1320 /* Apparently, matching any of the flags (if given) is sufficient. */ 1321 if (filter != 0 && (filter & flags) != 0) 1322 return 0; 1323 1324 /* Size (over)estimation shortcut. */ 1325 if (oldp == NULL) { 1326 len = sizeof(rtm) + RT_ROUNDUP(addr_len) + 1327 RT_ROUNDUP(sizeof(gateway)); 1328 1329 if (!(flags & RTF_HOST)) 1330 len += RT_ROUNDUP(addr_len); 1331 1332 return len; 1333 } 1334 1335 rtsock_get_route(&rtm, &rta, &addr, &mask, &gateway, NULL /*ifp*/, 1336 NULL /*ifa*/, route, RTM_GET); 1337 1338 return rtsock_rta_finalize(&rtm, sizeof(rtm), &rtm.rtm_msglen, 1339 &rtm.rtm_addrs, &rta, NULL /*pbuf*/, oldp, off); 1340 } 1341 1342 /* 1343 * Obtain routing table entries. 1344 */ 1345 static ssize_t 1346 rtsock_info_rtable(struct rmib_oldp * oldp, int family, int filter) 1347 { 1348 struct route_entry *route; 1349 ssize_t r, off; 1350 1351 off = 0; 1352 1353 if (family == AF_UNSPEC || family == AF_INET) { 1354 for (route = NULL; (route = route_enum_v4(route)) != NULL; ) { 1355 if ((r = rtsock_info_rtable_entry(route, 1356 (unsigned int)filter, sizeof(struct sockaddr_in), 1357 oldp, off)) < 0) 1358 return r; 1359 off += r; 1360 } 1361 } 1362 1363 if (family == AF_UNSPEC || family == AF_INET6) { 1364 for (route = NULL; (route = route_enum_v6(route)) != NULL; ) { 1365 if ((r = rtsock_info_rtable_entry(route, 1366 (unsigned int)filter, sizeof(struct sockaddr_in6), 1367 oldp, off)) < 0) 1368 return r; 1369 off += r; 1370 } 1371 } 1372 1373 /* TODO: should we add slack here? */ 1374 return off; 1375 } 1376 1377 /* 1378 * Generate routing socket data for an ARP table entry, for either routing 1379 * socket broadcasting or a sysctl(7) request. The ARP table entry number is 1380 * given as 'num'. The type of the message (RTM_) is given as 'type'. The 1381 * resulting routing message header is stored in 'rtm' and an address vector is 1382 * stored in 'rta'. The latter may point to addresses generated in 'addr' and 1383 * 'gateway'. The caller is responsible for combining the results into an 1384 * appropriate routing message. 1385 */ 1386 static void 1387 rtsock_get_arp(struct rt_msghdr * rtm, struct rtsock_rta * rta, 1388 struct sockaddr_in * addr, struct sockaddr_dlx * gateway, 1389 lldata_arp_num_t num, unsigned int type) 1390 { 1391 struct ifdev *ifdev; 1392 unsigned int flags; 1393 1394 lldata_arp_get(num, addr, gateway, &ifdev, &flags); 1395 1396 memset(rtm, 0, sizeof(*rtm)); 1397 rtm->rtm_version = RTM_VERSION; 1398 rtm->rtm_type = type; 1399 rtm->rtm_flags = flags; 1400 rtm->rtm_index = ifdev_get_index(ifdev); 1401 1402 /* TODO: obtaining and reporting the proper expiry time, if any. */ 1403 if (!(flags & RTF_STATIC)) 1404 rtm->rtm_rmx.rmx_expire = (time_t)-1; 1405 1406 rtsock_rta_init(rta); 1407 1408 rtsock_rta_set(rta, RTAX_DST, addr, addr->sin_len); 1409 1410 rtsock_rta_set(rta, RTAX_GATEWAY, gateway, gateway->sdlx_len); 1411 } 1412 1413 /* 1414 * Send a routing message about an ARP table entry, with the given type which 1415 * may be one of RTM_ADD, RTM_CHANGE, RTM_DELETE, RTM_LOCK, and RTM_GET. The 1416 * routing socket request information 'rtr', if not NULL, provides additional 1417 * information about the routing socket that was the source of the request (if 1418 * any) and various fields that should be echoed. 1419 */ 1420 void 1421 rtsock_msg_arp(lldata_arp_num_t num, unsigned int type, 1422 const struct rtsock_request * rtr) 1423 { 1424 struct sockaddr_in addr; 1425 struct sockaddr_dlx gateway; 1426 struct rt_msghdr rtm; 1427 struct rtsock_rta rta; 1428 struct pbuf *pbuf; 1429 1430 assert(rtr != NULL); 1431 1432 /* 1433 * We do not maintain the link-local tables ourselves, and thus, we do 1434 * not have a complete view of modifications to them. In order not to 1435 * confuse userland with inconsistent updates (e.g., deletion of 1436 * previously unreported entries), send these routing messages to the 1437 * source of the routing request only. 1438 */ 1439 if (!rtsock_msg_one(rtr->rtr_src, AF_INET, NULL /*pbuf*/)) 1440 return; 1441 1442 rtsock_get_arp(&rtm, &rta, &addr, &gateway, num, type); 1443 1444 if (rtr != NULL) { 1445 rtm.rtm_flags |= RTF_DONE; 1446 rtm.rtm_pid = rtr->rtr_pid; 1447 rtm.rtm_seq = rtr->rtr_seq; 1448 } 1449 1450 if (rtsock_rta_finalize(&rtm, sizeof(rtm), &rtm.rtm_msglen, 1451 &rtm.rtm_addrs, &rta, &pbuf, NULL, 0) > 0) 1452 rtsock_msg_one(rtr->rtr_src, AF_INET, pbuf); 1453 } 1454 1455 /* 1456 * Obtain ARP table entries. 1457 */ 1458 static ssize_t 1459 rtsock_info_lltable_arp(struct rmib_oldp * oldp) 1460 { 1461 struct sockaddr_in addr; 1462 struct sockaddr_dlx gateway; 1463 struct rt_msghdr rtm; 1464 struct rtsock_rta rta; 1465 lldata_arp_num_t num; 1466 ssize_t r, off; 1467 1468 off = 0; 1469 1470 for (num = 0; lldata_arp_enum(&num); num++) { 1471 /* Size (over)estimation shortcut. */ 1472 if (oldp == NULL) { 1473 off += sizeof(struct rt_msghdr) + 1474 RT_ROUNDUP(sizeof(addr)) + 1475 RT_ROUNDUP(sizeof(gateway)); 1476 1477 continue; 1478 } 1479 1480 rtsock_get_arp(&rtm, &rta, &addr, &gateway, num, RTM_GET); 1481 1482 if ((r = rtsock_rta_finalize(&rtm, sizeof(rtm), 1483 &rtm.rtm_msglen, &rtm.rtm_addrs, &rta, NULL /*pbuf*/, oldp, 1484 off)) < 0) 1485 return r; 1486 off += r; 1487 } 1488 1489 /* TODO: should we add slack here? */ 1490 return off; 1491 } 1492 1493 /* 1494 * Generate routing socket data for an NDP table entry, for either routing 1495 * socket broadcasting or a sysctl(7) request. The NDP table entry number is 1496 * given as 'num'. The type of the message (RTM_) is given as 'type'. The 1497 * resulting routing message header is stored in 'rtm' and an address vector is 1498 * stored in 'rta'. The latter may point to addresses generated in 'addr' and 1499 * 'gateway'. The caller is responsible for combining the results into an 1500 * appropriate routing message. 1501 */ 1502 static void 1503 rtsock_get_ndp(struct rt_msghdr * rtm, struct rtsock_rta * rta, 1504 struct sockaddr_in6 * addr, struct sockaddr_dlx * gateway, 1505 lldata_ndp_num_t num, unsigned int type) 1506 { 1507 struct ifdev *ifdev; 1508 unsigned int flags; 1509 1510 lldata_ndp_get(num, addr, gateway, &ifdev, &flags); 1511 1512 memset(rtm, 0, sizeof(*rtm)); 1513 rtm->rtm_version = RTM_VERSION; 1514 rtm->rtm_type = type; 1515 rtm->rtm_flags = flags; 1516 rtm->rtm_index = ifdev_get_index(ifdev); 1517 1518 rtsock_rta_init(rta); 1519 1520 rtsock_rta_set(rta, RTAX_DST, addr, addr->sin6_len); 1521 1522 rtsock_rta_set(rta, RTAX_GATEWAY, gateway, gateway->sdlx_len); 1523 } 1524 1525 /* 1526 * Send a routing message about an NDP table entry, with the given type which 1527 * may be one of RTM_ADD, RTM_CHANGE, RTM_DELETE, RTM_LOCK, and RTM_GET. The 1528 * routing socket request information 'rtr', if not NULL, provides additional 1529 * information about the routing socket that was the source of the request (if 1530 * any) and various fields that should be echoed. 1531 */ 1532 void 1533 rtsock_msg_ndp(lldata_ndp_num_t num, unsigned int type, 1534 const struct rtsock_request * rtr) 1535 { 1536 struct sockaddr_in6 addr; 1537 struct sockaddr_dlx gateway; 1538 struct rt_msghdr rtm; 1539 struct rtsock_rta rta; 1540 struct pbuf *pbuf; 1541 1542 assert(rtr != NULL); 1543 1544 /* 1545 * We do not maintain the link-local tables ourselves, and thus, we do 1546 * not have a complete view of modifications to them. In order not to 1547 * confuse userland with inconsistent updates (e.g., deletion of 1548 * previously unreported entries), send these routing messages to the 1549 * source of the routing request only. 1550 */ 1551 if (!rtsock_msg_one(rtr->rtr_src, AF_INET6, NULL /*pbuf*/)) 1552 return; 1553 1554 rtsock_get_ndp(&rtm, &rta, &addr, &gateway, num, type); 1555 1556 if (rtr != NULL) { 1557 rtm.rtm_flags |= RTF_DONE; 1558 rtm.rtm_pid = rtr->rtr_pid; 1559 rtm.rtm_seq = rtr->rtr_seq; 1560 } 1561 1562 if (rtsock_rta_finalize(&rtm, sizeof(rtm), &rtm.rtm_msglen, 1563 &rtm.rtm_addrs, &rta, &pbuf, NULL, 0) > 0) 1564 rtsock_msg_one(rtr->rtr_src, AF_INET6, pbuf); 1565 } 1566 1567 /* 1568 * Obtain NDP table entries. 1569 */ 1570 static ssize_t 1571 rtsock_info_lltable_ndp(struct rmib_oldp * oldp) 1572 { 1573 struct rt_msghdr rtm; 1574 struct rtsock_rta rta; 1575 struct sockaddr_in6 addr; 1576 struct sockaddr_dlx gateway; 1577 lldata_ndp_num_t num; 1578 ssize_t r, off; 1579 1580 off = 0; 1581 1582 for (num = 0; lldata_ndp_enum(&num); num++) { 1583 /* Size (over)estimation shortcut. */ 1584 if (oldp == NULL) { 1585 off += sizeof(struct rt_msghdr) + 1586 RT_ROUNDUP(sizeof(addr)) + 1587 RT_ROUNDUP(sizeof(gateway)); 1588 1589 continue; 1590 } 1591 1592 rtsock_get_ndp(&rtm, &rta, &addr, &gateway, num, RTM_GET); 1593 1594 if ((r = rtsock_rta_finalize(&rtm, sizeof(rtm), 1595 &rtm.rtm_msglen, &rtm.rtm_addrs, &rta, NULL /*pbuf*/, oldp, 1596 off)) < 0) 1597 return r; 1598 off += r; 1599 } 1600 1601 /* TODO: should we add slack here? */ 1602 return off; 1603 } 1604 1605 /* 1606 * Obtain link-layer (ARP, NDP) table entries. 1607 */ 1608 static ssize_t 1609 rtsock_info_lltable(struct rmib_oldp * oldp, int family) 1610 { 1611 1612 switch (family) { 1613 case AF_INET: 1614 return rtsock_info_lltable_arp(oldp); 1615 1616 case AF_INET6: 1617 return rtsock_info_lltable_ndp(oldp); 1618 1619 default: 1620 return 0; 1621 } 1622 } 1623 1624 /* 1625 * Obtain link-layer address information for one specific interface. 1626 */ 1627 static ssize_t 1628 rtsock_info_if_dl(struct ifdev * ifdev, struct ifa_msghdr * ifam, 1629 struct rmib_oldp * oldp, ssize_t off) 1630 { 1631 struct rtsock_rta rta; 1632 struct sockaddr_dlx sdlx; 1633 ifaddr_dl_num_t num; 1634 ssize_t r, len; 1635 1636 len = 0; 1637 1638 for (num = 0; ifaddr_dl_enum(ifdev, &num); num++) { 1639 if (oldp == NULL) { 1640 len += sizeof(*ifam) + RT_ROUNDUP(sizeof(sdlx)); 1641 1642 continue; 1643 } 1644 1645 rtsock_rta_init(&rta); 1646 1647 rtsock_rta_add_dl(&rta, ifdev, num, &sdlx); 1648 1649 if ((r = rtsock_rta_finalize(ifam, sizeof(*ifam), 1650 &ifam->ifam_msglen, &ifam->ifam_addrs, &rta, NULL /*pbuf*/, 1651 oldp, off + len)) < 0) 1652 return r; 1653 len += r; 1654 } 1655 1656 return len; 1657 } 1658 1659 /* 1660 * Obtain IPv4 address information for one specific interface. 1661 */ 1662 static ssize_t 1663 rtsock_info_if_v4(struct ifdev * ifdev, struct ifa_msghdr * ifam, 1664 struct rmib_oldp * oldp, ssize_t off) 1665 { 1666 struct sockaddr_in sin[4]; 1667 struct rtsock_rta rta; 1668 ifaddr_v4_num_t num; 1669 ssize_t r, len; 1670 1671 len = 0; 1672 1673 /* 1674 * Mostly for future compatibility, we support multiple IPv4 interface 1675 * addresses here. Every interface has an interface address and a 1676 * netmask. In addition, an interface may have either a broadcast or a 1677 * destination address. 1678 */ 1679 for (num = 0; ifaddr_v4_enum(ifdev, &num); num++) { 1680 /* Size (over)estimation shortcut. */ 1681 if (oldp == NULL) { 1682 len += sizeof(*ifam) + RT_ROUNDUP(sizeof(sin[0])) * 3; 1683 1684 continue; 1685 } 1686 1687 rtsock_rta_init(&rta); 1688 1689 rtsock_rta_add_v4(&rta, ifdev, num, sin); 1690 1691 if ((r = rtsock_rta_finalize(ifam, sizeof(*ifam), 1692 &ifam->ifam_msglen, &ifam->ifam_addrs, &rta, NULL /*pbuf*/, 1693 oldp, off + len)) < 0) 1694 return r; 1695 len += r; 1696 } 1697 1698 return len; 1699 } 1700 1701 /* 1702 * Obtain IPv6 address information for one specific interface. 1703 */ 1704 static ssize_t 1705 rtsock_info_if_v6(struct ifdev * ifdev, struct ifa_msghdr * ifam, 1706 struct rmib_oldp * oldp, ssize_t off) 1707 { 1708 struct sockaddr_in6 sin6[3]; 1709 struct rtsock_rta rta; 1710 ifaddr_v6_num_t num; 1711 ssize_t r, len; 1712 1713 len = 0; 1714 1715 /* As with IPv4, except that IPv6 has no broadcast addresses. */ 1716 for (num = 0; ifaddr_v6_enum(ifdev, &num); num++) { 1717 /* Size (over)estimation shortcut. */ 1718 if (oldp == NULL) { 1719 len += sizeof(*ifam) + RT_ROUNDUP(sizeof(sin6[0])) * 3; 1720 1721 continue; 1722 } 1723 1724 rtsock_rta_init(&rta); 1725 1726 rtsock_rta_add_v6(&rta, ifdev, num, sin6); 1727 1728 if ((r = rtsock_rta_finalize(ifam, sizeof(*ifam), 1729 &ifam->ifam_msglen, &ifam->ifam_addrs, &rta, NULL /*pbuf*/, 1730 oldp, off + len)) < 0) 1731 return r; 1732 len += r; 1733 } 1734 1735 return len; 1736 } 1737 1738 /* 1739 * Obtain information for one specific interface. 1740 */ 1741 static ssize_t 1742 rtsock_info_if(struct ifdev * ifdev, struct rmib_oldp * oldp, ssize_t off, 1743 int family) 1744 { 1745 struct rtsock_rta rta; 1746 struct sockaddr_dlx sdlx; 1747 struct if_msghdr ifm; 1748 struct ifa_msghdr ifam; 1749 unsigned int ifflags; 1750 ssize_t r, len, sdlxsize; 1751 1752 len = 0; 1753 1754 ifflags = ifdev_get_ifflags(ifdev); 1755 1756 /* Create an interface information entry. */ 1757 rtsock_rta_init(&rta); 1758 1759 if (oldp != NULL) { 1760 memset(&ifm, 0, sizeof(ifm)); 1761 ifm.ifm_version = RTM_VERSION; 1762 ifm.ifm_type = RTM_IFINFO; 1763 ifm.ifm_flags = ifflags; 1764 ifm.ifm_index = ifdev_get_index(ifdev); 1765 memcpy(&ifm.ifm_data, ifdev_get_ifdata(ifdev), 1766 sizeof(ifm.ifm_data)); 1767 } 1768 1769 /* 1770 * Generate a datalink socket address structure. TODO: see if it is 1771 * worth obtaining just the length for the (oldp == NULL) case here. 1772 */ 1773 memset(&sdlx, 0, sizeof(sdlx)); 1774 1775 ifaddr_dl_get(ifdev, 0, &sdlx); 1776 1777 sdlxsize = RT_ROUNDUP(sdlx.sdlx_len); 1778 1779 rtsock_rta_set(&rta, RTAX_IFP, &sdlx, sdlxsize); 1780 1781 if ((r = rtsock_rta_finalize(&ifm, sizeof(ifm), &ifm.ifm_msglen, 1782 &ifm.ifm_addrs, &rta, NULL /*pbuf*/, oldp, off + len)) < 0) 1783 return r; 1784 len += r; 1785 1786 /* Generate a header for all addresses once. */ 1787 if (oldp != NULL) { 1788 memset(&ifam, 0, sizeof(ifam)); 1789 ifam.ifam_version = RTM_VERSION; 1790 ifam.ifam_type = RTM_NEWADDR; 1791 ifam.ifam_flags = 0; 1792 ifam.ifam_index = ifdev_get_index(ifdev); 1793 ifam.ifam_metric = ifdev_get_metric(ifdev); 1794 } 1795 1796 /* If requested and applicable, add any datalink addresses. */ 1797 if (family == AF_UNSPEC || family == AF_LINK) { 1798 if ((r = rtsock_info_if_dl(ifdev, &ifam, oldp, off + len)) < 0) 1799 return r; 1800 len += r; 1801 } 1802 1803 /* If requested and applicable, add any IPv4 addresses. */ 1804 if (family == AF_UNSPEC || family == AF_INET) { 1805 if ((r = rtsock_info_if_v4(ifdev, &ifam, oldp, off + len)) < 0) 1806 return r; 1807 len += r; 1808 } 1809 1810 /* If requested and applicable, add any IPv6 addresses. */ 1811 if (family == AF_UNSPEC || family == AF_INET6) { 1812 if ((r = rtsock_info_if_v6(ifdev, &ifam, oldp, off + len)) < 0) 1813 return r; 1814 len += r; 1815 } 1816 1817 return len; 1818 } 1819 1820 /* 1821 * Obtain interface information. 1822 */ 1823 static ssize_t 1824 rtsock_info_iflist(struct rmib_oldp * oldp, int family, uint32_t ifindex) 1825 { 1826 struct ifdev *ifdev; 1827 ssize_t r, off; 1828 1829 /* 1830 * If information about a specific interface index is requested, then 1831 * return information for just that interface. 1832 */ 1833 if (ifindex != 0) { 1834 if ((ifdev = ifdev_get_by_index(ifindex)) != NULL) 1835 return rtsock_info_if(ifdev, oldp, 0, family); 1836 else 1837 return 0; 1838 } 1839 1840 /* Otherwise, iterate through the list of all interfaces. */ 1841 off = 0; 1842 1843 for (ifdev = ifdev_enum(NULL); ifdev != NULL; 1844 ifdev = ifdev_enum(ifdev)) { 1845 1846 /* Avoid generating results that are never copied out. */ 1847 if (oldp != NULL && !rmib_inrange(oldp, off)) 1848 oldp = NULL; 1849 1850 if ((r = rtsock_info_if(ifdev, oldp, off, family)) < 0) 1851 return r; 1852 1853 off += r; 1854 } 1855 1856 /* TODO: should we add slack here? */ 1857 return off; 1858 } 1859 1860 /* 1861 * Obtain routing table, ARP cache, and interface information through 1862 * sysctl(7). Return the (produced, or if oldp is NULL, estimated) byte size 1863 * of the output on success, or a negative error code on failure. 1864 */ 1865 static ssize_t 1866 rtsock_info(struct rmib_call * call, struct rmib_node * node __unused, 1867 struct rmib_oldp * oldp, struct rmib_newp * newp __unused) 1868 { 1869 int family, filter; 1870 1871 if (call->call_namelen != 3) 1872 return EINVAL; 1873 1874 family = call->call_name[0]; 1875 filter = call->call_name[2]; 1876 1877 switch (call->call_name[1]) { 1878 case NET_RT_FLAGS: 1879 /* 1880 * Preliminary support for changes as of NetBSD 8, where by 1881 * default, the use of this subcall implies an ARP/NDP-only 1882 * request. 1883 */ 1884 if (filter == 0) 1885 filter |= RTF_LLDATA; 1886 1887 if (filter & RTF_LLDATA) { 1888 if (family == AF_UNSPEC) 1889 return EINVAL; 1890 1891 /* 1892 * Split off ARP/NDP handling from the normal routing 1893 * table listing, as done since NetBSD 8. We generate 1894 * the ARP/NDP listing from here, and keep those 1895 * entries out of the routing table dump below. Since 1896 * the filter is of a match-any type, and we have just 1897 * matched a flag, no further filtering is needed here. 1898 */ 1899 return rtsock_info_lltable(oldp, family); 1900 } 1901 1902 /* FALLTHROUGH */ 1903 case NET_RT_DUMP: 1904 return rtsock_info_rtable(oldp, family, filter); 1905 1906 case NET_RT_IFLIST: 1907 return rtsock_info_iflist(oldp, family, filter); 1908 1909 default: 1910 return EINVAL; 1911 } 1912 } 1913