1 /* 2 * Copyright (c) 1980, 1986, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)if.c 8.3 (Berkeley) 1/4/94 30 * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $ 31 */ 32 33 #include "opt_inet6.h" 34 #include "opt_inet.h" 35 #include "opt_ifpoll.h" 36 37 #include <sys/param.h> 38 #include <sys/malloc.h> 39 #include <sys/mbuf.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/priv.h> 43 #include <sys/protosw.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/socketops.h> 47 #include <sys/kernel.h> 48 #include <sys/ktr.h> 49 #include <sys/mutex.h> 50 #include <sys/lock.h> 51 #include <sys/sockio.h> 52 #include <sys/syslog.h> 53 #include <sys/sysctl.h> 54 #include <sys/domain.h> 55 #include <sys/thread.h> 56 #include <sys/serialize.h> 57 #include <sys/bus.h> 58 #include <sys/jail.h> 59 60 #include <sys/thread2.h> 61 #include <sys/msgport2.h> 62 #include <sys/mutex2.h> 63 64 #include <net/if.h> 65 #include <net/if_arp.h> 66 #include <net/if_dl.h> 67 #include <net/if_types.h> 68 #include <net/if_var.h> 69 #include <net/if_ringmap.h> 70 #include <net/ifq_var.h> 71 #include <net/radix.h> 72 #include <net/route.h> 73 #include <net/if_clone.h> 74 #include <net/netisr2.h> 75 #include <net/netmsg2.h> 76 77 #include <machine/atomic.h> 78 #include <machine/stdarg.h> 79 #include <machine/smp.h> 80 81 #if defined(INET) || defined(INET6) 82 #include <netinet/in.h> 83 #include <netinet/in_var.h> 84 #include <netinet/if_ether.h> 85 #ifdef INET6 86 #include <netinet6/in6_var.h> 87 #include <netinet6/in6_ifattach.h> 88 #endif /* INET6 */ 89 #endif /* INET || INET6 */ 90 91 struct netmsg_ifaddr { 92 struct netmsg_base base; 93 struct ifaddr *ifa; 94 struct ifnet *ifp; 95 int tail; 96 }; 97 98 struct ifsubq_stage_head { 99 TAILQ_HEAD(, ifsubq_stage) stg_head; 100 } __cachealign; 101 102 struct if_ringmap { 103 int rm_cnt; 104 int rm_grid; 105 int rm_cpumap[]; 106 }; 107 108 #define RINGMAP_FLAG_NONE 0x0 109 #define RINGMAP_FLAG_POWEROF2 0x1 110 111 /* 112 * System initialization 113 */ 114 static void if_attachdomain(void *); 115 static void if_attachdomain1(struct ifnet *); 116 static int ifconf(u_long, caddr_t, struct ucred *); 117 static void ifinit(void *); 118 static void ifnetinit(void *); 119 static void if_slowtimo(void *); 120 static void link_rtrequest(int, struct rtentry *); 121 static int if_rtdel(struct radix_node *, void *); 122 static void if_slowtimo_dispatch(netmsg_t); 123 124 /* Helper functions */ 125 static void ifsq_watchdog_reset(struct ifsubq_watchdog *); 126 static int if_delmulti_serialized(struct ifnet *, struct sockaddr *); 127 static struct ifnet_array *ifnet_array_alloc(int); 128 static void ifnet_array_free(struct ifnet_array *); 129 static struct ifnet_array *ifnet_array_add(struct ifnet *, 130 const struct ifnet_array *); 131 static struct ifnet_array *ifnet_array_del(struct ifnet *, 132 const struct ifnet_array *); 133 static struct ifg_group *if_creategroup(const char *); 134 static int if_destroygroup(struct ifg_group *); 135 static int if_delgroup_locked(struct ifnet *, const char *); 136 static int if_getgroups(struct ifgroupreq *, struct ifnet *); 137 static int if_getgroupmembers(struct ifgroupreq *); 138 139 #ifdef INET6 140 /* 141 * XXX: declare here to avoid to include many inet6 related files.. 142 * should be more generalized? 143 */ 144 extern void nd6_setmtu(struct ifnet *); 145 #endif 146 147 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); 148 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); 149 SYSCTL_NODE(_net_link, OID_AUTO, ringmap, CTLFLAG_RW, 0, "link ringmap"); 150 151 static int ifsq_stage_cntmax = 16; 152 TUNABLE_INT("net.link.stage_cntmax", &ifsq_stage_cntmax); 153 SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW, 154 &ifsq_stage_cntmax, 0, "ifq staging packet count max"); 155 156 static int if_stats_compat = 0; 157 SYSCTL_INT(_net_link, OID_AUTO, stats_compat, CTLFLAG_RW, 158 &if_stats_compat, 0, "Compat the old ifnet stats"); 159 160 static int if_ringmap_dumprdr = 0; 161 SYSCTL_INT(_net_link_ringmap, OID_AUTO, dump_rdr, CTLFLAG_RW, 162 &if_ringmap_dumprdr, 0, "dump redirect table"); 163 164 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL); 165 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, ifnetinit, NULL); 166 167 static if_com_alloc_t *if_com_alloc[256]; 168 static if_com_free_t *if_com_free[256]; 169 170 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); 171 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); 172 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure"); 173 174 int ifqmaxlen = IFQ_MAXLEN; 175 struct ifnethead ifnet = TAILQ_HEAD_INITIALIZER(ifnet); 176 struct ifgrouphead ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head); 177 static struct lock ifgroup_lock; 178 179 static struct ifnet_array ifnet_array0; 180 static struct ifnet_array *ifnet_array = &ifnet_array0; 181 182 static struct callout if_slowtimo_timer; 183 static struct netmsg_base if_slowtimo_netmsg; 184 185 int if_index = 0; 186 struct ifnet **ifindex2ifnet = NULL; 187 static struct mtx ifnet_mtx = MTX_INITIALIZER("ifnet"); 188 189 static struct ifsubq_stage_head ifsubq_stage_heads[MAXCPU]; 190 191 #ifdef notyet 192 #define IFQ_KTR_STRING "ifq=%p" 193 #define IFQ_KTR_ARGS struct ifaltq *ifq 194 #ifndef KTR_IFQ 195 #define KTR_IFQ KTR_ALL 196 #endif 197 KTR_INFO_MASTER(ifq); 198 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS); 199 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS); 200 #define logifq(name, arg) KTR_LOG(ifq_ ## name, arg) 201 202 #define IF_START_KTR_STRING "ifp=%p" 203 #define IF_START_KTR_ARGS struct ifnet *ifp 204 #ifndef KTR_IF_START 205 #define KTR_IF_START KTR_ALL 206 #endif 207 KTR_INFO_MASTER(if_start); 208 KTR_INFO(KTR_IF_START, if_start, run, 0, 209 IF_START_KTR_STRING, IF_START_KTR_ARGS); 210 KTR_INFO(KTR_IF_START, if_start, sched, 1, 211 IF_START_KTR_STRING, IF_START_KTR_ARGS); 212 KTR_INFO(KTR_IF_START, if_start, avoid, 2, 213 IF_START_KTR_STRING, IF_START_KTR_ARGS); 214 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3, 215 IF_START_KTR_STRING, IF_START_KTR_ARGS); 216 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4, 217 IF_START_KTR_STRING, IF_START_KTR_ARGS); 218 #define logifstart(name, arg) KTR_LOG(if_start_ ## name, arg) 219 #endif /* notyet */ 220 221 /* 222 * Network interface utility routines. 223 * 224 * Routines with ifa_ifwith* names take sockaddr *'s as 225 * parameters. 226 */ 227 /* ARGSUSED */ 228 static void 229 ifinit(void *dummy) 230 { 231 lockinit(&ifgroup_lock, "ifgroup", 0, 0); 232 233 callout_init_mp(&if_slowtimo_timer); 234 netmsg_init(&if_slowtimo_netmsg, NULL, &netisr_adone_rport, 235 MSGF_PRIORITY, if_slowtimo_dispatch); 236 237 /* Start if_slowtimo */ 238 lwkt_sendmsg(netisr_cpuport(0), &if_slowtimo_netmsg.lmsg); 239 } 240 241 static void 242 ifsq_ifstart_ipifunc(void *arg) 243 { 244 struct ifaltq_subque *ifsq = arg; 245 struct lwkt_msg *lmsg = ifsq_get_ifstart_lmsg(ifsq, mycpuid); 246 247 crit_enter(); 248 if (lmsg->ms_flags & MSGF_DONE) 249 lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), lmsg); 250 crit_exit(); 251 } 252 253 static __inline void 254 ifsq_stage_remove(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 255 { 256 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 257 TAILQ_REMOVE(&head->stg_head, stage, stg_link); 258 stage->stg_flags &= ~(IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED); 259 stage->stg_cnt = 0; 260 stage->stg_len = 0; 261 } 262 263 static __inline void 264 ifsq_stage_insert(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 265 { 266 KKASSERT((stage->stg_flags & 267 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 268 stage->stg_flags |= IFSQ_STAGE_FLAG_QUED; 269 TAILQ_INSERT_TAIL(&head->stg_head, stage, stg_link); 270 } 271 272 /* 273 * Schedule ifnet.if_start on the subqueue owner CPU 274 */ 275 static void 276 ifsq_ifstart_schedule(struct ifaltq_subque *ifsq, int force) 277 { 278 int cpu; 279 280 if (!force && curthread->td_type == TD_TYPE_NETISR && 281 ifsq_stage_cntmax > 0) { 282 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 283 284 stage->stg_cnt = 0; 285 stage->stg_len = 0; 286 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 287 ifsq_stage_insert(&ifsubq_stage_heads[mycpuid], stage); 288 stage->stg_flags |= IFSQ_STAGE_FLAG_SCHED; 289 return; 290 } 291 292 cpu = ifsq_get_cpuid(ifsq); 293 if (cpu != mycpuid) 294 lwkt_send_ipiq(globaldata_find(cpu), ifsq_ifstart_ipifunc, ifsq); 295 else 296 ifsq_ifstart_ipifunc(ifsq); 297 } 298 299 /* 300 * NOTE: 301 * This function will release ifnet.if_start subqueue interlock, 302 * if ifnet.if_start for the subqueue does not need to be scheduled 303 */ 304 static __inline int 305 ifsq_ifstart_need_schedule(struct ifaltq_subque *ifsq, int running) 306 { 307 if (!running || ifsq_is_empty(ifsq) 308 #ifdef ALTQ 309 || ifsq->ifsq_altq->altq_tbr != NULL 310 #endif 311 ) { 312 ALTQ_SQ_LOCK(ifsq); 313 /* 314 * ifnet.if_start subqueue interlock is released, if: 315 * 1) Hardware can not take any packets, due to 316 * o interface is marked down 317 * o hardware queue is full (ifsq_is_oactive) 318 * Under the second situation, hardware interrupt 319 * or polling(4) will call/schedule ifnet.if_start 320 * on the subqueue when hardware queue is ready 321 * 2) There is no packet in the subqueue. 322 * Further ifq_dispatch or ifq_handoff will call/ 323 * schedule ifnet.if_start on the subqueue. 324 * 3) TBR is used and it does not allow further 325 * dequeueing. 326 * TBR callout will call ifnet.if_start on the 327 * subqueue. 328 */ 329 if (!running || !ifsq_data_ready(ifsq)) { 330 ifsq_clr_started(ifsq); 331 ALTQ_SQ_UNLOCK(ifsq); 332 return 0; 333 } 334 ALTQ_SQ_UNLOCK(ifsq); 335 } 336 return 1; 337 } 338 339 static void 340 ifsq_ifstart_dispatch(netmsg_t msg) 341 { 342 struct lwkt_msg *lmsg = &msg->base.lmsg; 343 struct ifaltq_subque *ifsq = lmsg->u.ms_resultp; 344 struct ifnet *ifp = ifsq_get_ifp(ifsq); 345 struct globaldata *gd = mycpu; 346 int running = 0, need_sched; 347 348 crit_enter_gd(gd); 349 350 lwkt_replymsg(lmsg, 0); /* reply ASAP */ 351 352 if (gd->gd_cpuid != ifsq_get_cpuid(ifsq)) { 353 /* 354 * We need to chase the subqueue owner CPU change. 355 */ 356 ifsq_ifstart_schedule(ifsq, 1); 357 crit_exit_gd(gd); 358 return; 359 } 360 361 ifsq_serialize_hw(ifsq); 362 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 363 ifp->if_start(ifp, ifsq); 364 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 365 running = 1; 366 } 367 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 368 ifsq_deserialize_hw(ifsq); 369 370 if (need_sched) { 371 /* 372 * More data need to be transmitted, ifnet.if_start is 373 * scheduled on the subqueue owner CPU, and we keep going. 374 * NOTE: ifnet.if_start subqueue interlock is not released. 375 */ 376 ifsq_ifstart_schedule(ifsq, 0); 377 } 378 379 crit_exit_gd(gd); 380 } 381 382 /* Device driver ifnet.if_start helper function */ 383 void 384 ifsq_devstart(struct ifaltq_subque *ifsq) 385 { 386 struct ifnet *ifp = ifsq_get_ifp(ifsq); 387 int running = 0; 388 389 ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq); 390 391 ALTQ_SQ_LOCK(ifsq); 392 if (ifsq_is_started(ifsq) || !ifsq_data_ready(ifsq)) { 393 ALTQ_SQ_UNLOCK(ifsq); 394 return; 395 } 396 ifsq_set_started(ifsq); 397 ALTQ_SQ_UNLOCK(ifsq); 398 399 ifp->if_start(ifp, ifsq); 400 401 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 402 running = 1; 403 404 if (ifsq_ifstart_need_schedule(ifsq, running)) { 405 /* 406 * More data need to be transmitted, ifnet.if_start is 407 * scheduled on ifnet's CPU, and we keep going. 408 * NOTE: ifnet.if_start interlock is not released. 409 */ 410 ifsq_ifstart_schedule(ifsq, 0); 411 } 412 } 413 414 void 415 if_devstart(struct ifnet *ifp) 416 { 417 ifsq_devstart(ifq_get_subq_default(&ifp->if_snd)); 418 } 419 420 /* Device driver ifnet.if_start schedule helper function */ 421 void 422 ifsq_devstart_sched(struct ifaltq_subque *ifsq) 423 { 424 ifsq_ifstart_schedule(ifsq, 1); 425 } 426 427 void 428 if_devstart_sched(struct ifnet *ifp) 429 { 430 ifsq_devstart_sched(ifq_get_subq_default(&ifp->if_snd)); 431 } 432 433 static void 434 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 435 { 436 lwkt_serialize_enter(ifp->if_serializer); 437 } 438 439 static void 440 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 441 { 442 lwkt_serialize_exit(ifp->if_serializer); 443 } 444 445 static int 446 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 447 { 448 return lwkt_serialize_try(ifp->if_serializer); 449 } 450 451 #ifdef INVARIANTS 452 static void 453 if_default_serialize_assert(struct ifnet *ifp, 454 enum ifnet_serialize slz __unused, 455 boolean_t serialized) 456 { 457 if (serialized) 458 ASSERT_SERIALIZED(ifp->if_serializer); 459 else 460 ASSERT_NOT_SERIALIZED(ifp->if_serializer); 461 } 462 #endif 463 464 /* 465 * Attach an interface to the list of "active" interfaces. 466 * 467 * The serializer is optional. 468 */ 469 void 470 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer) 471 { 472 unsigned socksize; 473 int namelen, masklen; 474 struct sockaddr_dl *sdl, *sdl_addr; 475 struct ifaddr *ifa; 476 struct ifaltq *ifq; 477 struct ifnet **old_ifindex2ifnet = NULL; 478 struct ifnet_array *old_ifnet_array; 479 int i, q, qlen; 480 char qlenname[64]; 481 482 static int if_indexlim = 8; 483 484 if (ifp->if_serialize != NULL) { 485 KASSERT(ifp->if_deserialize != NULL && 486 ifp->if_tryserialize != NULL && 487 ifp->if_serialize_assert != NULL, 488 ("serialize functions are partially setup")); 489 490 /* 491 * If the device supplies serialize functions, 492 * then clear if_serializer to catch any invalid 493 * usage of this field. 494 */ 495 KASSERT(serializer == NULL, 496 ("both serialize functions and default serializer " 497 "are supplied")); 498 ifp->if_serializer = NULL; 499 } else { 500 KASSERT(ifp->if_deserialize == NULL && 501 ifp->if_tryserialize == NULL && 502 ifp->if_serialize_assert == NULL, 503 ("serialize functions are partially setup")); 504 ifp->if_serialize = if_default_serialize; 505 ifp->if_deserialize = if_default_deserialize; 506 ifp->if_tryserialize = if_default_tryserialize; 507 #ifdef INVARIANTS 508 ifp->if_serialize_assert = if_default_serialize_assert; 509 #endif 510 511 /* 512 * The serializer can be passed in from the device, 513 * allowing the same serializer to be used for both 514 * the interrupt interlock and the device queue. 515 * If not specified, the netif structure will use an 516 * embedded serializer. 517 */ 518 if (serializer == NULL) { 519 serializer = &ifp->if_default_serializer; 520 lwkt_serialize_init(serializer); 521 } 522 ifp->if_serializer = serializer; 523 } 524 525 /* 526 * Make if_addrhead available on all CPUs, since they 527 * could be accessed by any threads. 528 */ 529 ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead), 530 M_IFADDR, M_WAITOK | M_ZERO); 531 for (i = 0; i < ncpus; ++i) 532 TAILQ_INIT(&ifp->if_addrheads[i]); 533 534 TAILQ_INIT(&ifp->if_multiaddrs); 535 TAILQ_INIT(&ifp->if_groups); 536 getmicrotime(&ifp->if_lastchange); 537 if_addgroup(ifp, IFG_ALL); 538 539 /* 540 * create a Link Level name for this device 541 */ 542 namelen = strlen(ifp->if_xname); 543 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; 544 socksize = masklen + ifp->if_addrlen; 545 if (socksize < sizeof(*sdl)) 546 socksize = sizeof(*sdl); 547 socksize = RT_ROUNDUP(socksize); 548 ifa = ifa_create(sizeof(struct ifaddr) + 2 * socksize); 549 sdl = sdl_addr = (struct sockaddr_dl *)(ifa + 1); 550 sdl->sdl_len = socksize; 551 sdl->sdl_family = AF_LINK; 552 bcopy(ifp->if_xname, sdl->sdl_data, namelen); 553 sdl->sdl_nlen = namelen; 554 sdl->sdl_type = ifp->if_type; 555 ifp->if_lladdr = ifa; 556 ifa->ifa_ifp = ifp; 557 ifa->ifa_rtrequest = link_rtrequest; 558 ifa->ifa_addr = (struct sockaddr *)sdl; 559 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); 560 ifa->ifa_netmask = (struct sockaddr *)sdl; 561 sdl->sdl_len = masklen; 562 while (namelen != 0) 563 sdl->sdl_data[--namelen] = 0xff; 564 ifa_iflink(ifa, ifp, 0 /* Insert head */); 565 566 /* 567 * Make if_data available on all CPUs, since they could 568 * be updated by hardware interrupt routing, which could 569 * be bound to any CPU. 570 */ 571 ifp->if_data_pcpu = kmalloc_cachealign( 572 ncpus * sizeof(struct ifdata_pcpu), M_DEVBUF, M_WAITOK | M_ZERO); 573 574 if (ifp->if_mapsubq == NULL) 575 ifp->if_mapsubq = ifq_mapsubq_default; 576 577 ifq = &ifp->if_snd; 578 ifq->altq_type = 0; 579 ifq->altq_disc = NULL; 580 ifq->altq_flags &= ALTQF_CANTCHANGE; 581 ifq->altq_tbr = NULL; 582 ifq->altq_ifp = ifp; 583 584 if (ifq->altq_subq_cnt <= 0) 585 ifq->altq_subq_cnt = 1; 586 ifq->altq_subq = kmalloc_cachealign( 587 ifq->altq_subq_cnt * sizeof(struct ifaltq_subque), 588 M_DEVBUF, M_WAITOK | M_ZERO); 589 590 if (ifq->altq_maxlen == 0) { 591 if_printf(ifp, "driver didn't set altq_maxlen\n"); 592 ifq_set_maxlen(ifq, ifqmaxlen); 593 } 594 595 /* Allow user to override driver's setting. */ 596 ksnprintf(qlenname, sizeof(qlenname), "net.%s.qlenmax", ifp->if_xname); 597 qlen = -1; 598 TUNABLE_INT_FETCH(qlenname, &qlen); 599 if (qlen > 0) { 600 if_printf(ifp, "qlenmax -> %d\n", qlen); 601 ifq_set_maxlen(ifq, qlen); 602 } 603 604 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 605 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 606 607 ALTQ_SQ_LOCK_INIT(ifsq); 608 ifsq->ifsq_index = q; 609 610 ifsq->ifsq_altq = ifq; 611 ifsq->ifsq_ifp = ifp; 612 613 ifsq->ifsq_maxlen = ifq->altq_maxlen; 614 ifsq->ifsq_maxbcnt = ifsq->ifsq_maxlen * MCLBYTES; 615 ifsq->ifsq_prepended = NULL; 616 ifsq->ifsq_started = 0; 617 ifsq->ifsq_hw_oactive = 0; 618 ifsq_set_cpuid(ifsq, 0); 619 if (ifp->if_serializer != NULL) 620 ifsq_set_hw_serialize(ifsq, ifp->if_serializer); 621 622 /* XXX: netisr_ncpus */ 623 ifsq->ifsq_stage = 624 kmalloc_cachealign(ncpus * sizeof(struct ifsubq_stage), 625 M_DEVBUF, M_WAITOK | M_ZERO); 626 for (i = 0; i < ncpus; ++i) 627 ifsq->ifsq_stage[i].stg_subq = ifsq; 628 629 /* 630 * Allocate one if_start message for each CPU, since 631 * the hardware TX ring could be assigned to any CPU. 632 * 633 * NOTE: 634 * If the hardware TX ring polling CPU and the hardware 635 * TX ring interrupt CPU are same, one if_start message 636 * should be enough. 637 */ 638 ifsq->ifsq_ifstart_nmsg = 639 kmalloc(ncpus * sizeof(struct netmsg_base), 640 M_LWKTMSG, M_WAITOK); 641 for (i = 0; i < ncpus; ++i) { 642 netmsg_init(&ifsq->ifsq_ifstart_nmsg[i], NULL, 643 &netisr_adone_rport, 0, ifsq_ifstart_dispatch); 644 ifsq->ifsq_ifstart_nmsg[i].lmsg.u.ms_resultp = ifsq; 645 } 646 } 647 ifq_set_classic(ifq); 648 649 /* 650 * Increase mbuf cluster/jcluster limits for the mbufs that 651 * could sit on the device queues for quite some time. 652 */ 653 if (ifp->if_nmbclusters > 0) 654 mcl_inclimit(ifp->if_nmbclusters); 655 if (ifp->if_nmbjclusters > 0) 656 mjcl_inclimit(ifp->if_nmbjclusters); 657 658 /* 659 * Install this ifp into ifindex2inet, ifnet queue and ifnet 660 * array after it is setup. 661 * 662 * Protect ifindex2ifnet, ifnet queue and ifnet array changes 663 * by ifnet lock, so that non-netisr threads could get a 664 * consistent view. 665 */ 666 ifnet_lock(); 667 668 /* Don't update if_index until ifindex2ifnet is setup */ 669 ifp->if_index = if_index + 1; 670 sdl_addr->sdl_index = ifp->if_index; 671 672 /* 673 * Install this ifp into ifindex2ifnet 674 */ 675 if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) { 676 unsigned int n; 677 struct ifnet **q; 678 679 /* 680 * Grow ifindex2ifnet 681 */ 682 if_indexlim <<= 1; 683 n = if_indexlim * sizeof(*q); 684 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO); 685 if (ifindex2ifnet != NULL) { 686 bcopy(ifindex2ifnet, q, n/2); 687 /* Free old ifindex2ifnet after sync all netisrs */ 688 old_ifindex2ifnet = ifindex2ifnet; 689 } 690 ifindex2ifnet = q; 691 } 692 ifindex2ifnet[ifp->if_index] = ifp; 693 /* 694 * Update if_index after this ifp is installed into ifindex2ifnet, 695 * so that netisrs could get a consistent view of ifindex2ifnet. 696 */ 697 cpu_sfence(); 698 if_index = ifp->if_index; 699 700 /* 701 * Install this ifp into ifnet array. 702 */ 703 /* Free old ifnet array after sync all netisrs */ 704 old_ifnet_array = ifnet_array; 705 ifnet_array = ifnet_array_add(ifp, old_ifnet_array); 706 707 /* 708 * Install this ifp into ifnet queue. 709 */ 710 TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_link); 711 712 ifnet_unlock(); 713 714 /* 715 * Sync all netisrs so that the old ifindex2ifnet and ifnet array 716 * are no longer accessed and we can free them safely later on. 717 */ 718 netmsg_service_sync(); 719 if (old_ifindex2ifnet != NULL) 720 kfree(old_ifindex2ifnet, M_IFADDR); 721 ifnet_array_free(old_ifnet_array); 722 723 if (!SLIST_EMPTY(&domains)) 724 if_attachdomain1(ifp); 725 726 /* Announce the interface. */ 727 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 728 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); 729 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 730 } 731 732 static void 733 if_attachdomain(void *dummy) 734 { 735 struct ifnet *ifp; 736 737 ifnet_lock(); 738 TAILQ_FOREACH(ifp, &ifnetlist, if_list) 739 if_attachdomain1(ifp); 740 ifnet_unlock(); 741 } 742 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, 743 if_attachdomain, NULL); 744 745 static void 746 if_attachdomain1(struct ifnet *ifp) 747 { 748 struct domain *dp; 749 750 crit_enter(); 751 752 /* address family dependent data region */ 753 bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); 754 SLIST_FOREACH(dp, &domains, dom_next) 755 if (dp->dom_ifattach) 756 ifp->if_afdata[dp->dom_family] = 757 (*dp->dom_ifattach)(ifp); 758 crit_exit(); 759 } 760 761 /* 762 * Purge all addresses whose type is _not_ AF_LINK 763 */ 764 static void 765 if_purgeaddrs_nolink_dispatch(netmsg_t nmsg) 766 { 767 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp; 768 struct ifaddr_container *ifac, *next; 769 770 ASSERT_NETISR0; 771 772 /* 773 * The ifaddr processing in the following loop will block, 774 * however, this function is called in netisr0, in which 775 * ifaddr list changes happen, so we don't care about the 776 * blockness of the ifaddr processing here. 777 */ 778 TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid], 779 ifa_link, next) { 780 struct ifaddr *ifa = ifac->ifa; 781 782 /* Ignore marker */ 783 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 784 continue; 785 786 /* Leave link ifaddr as it is */ 787 if (ifa->ifa_addr->sa_family == AF_LINK) 788 continue; 789 #ifdef INET 790 /* XXX: Ugly!! ad hoc just for INET */ 791 if (ifa->ifa_addr->sa_family == AF_INET) { 792 struct ifaliasreq ifr; 793 struct sockaddr_in saved_addr, saved_dst; 794 #ifdef IFADDR_DEBUG_VERBOSE 795 int i; 796 797 kprintf("purge in4 addr %p: ", ifa); 798 for (i = 0; i < ncpus; ++i) { 799 kprintf("%d ", 800 ifa->ifa_containers[i].ifa_refcnt); 801 } 802 kprintf("\n"); 803 #endif 804 805 /* Save information for panic. */ 806 memcpy(&saved_addr, ifa->ifa_addr, sizeof(saved_addr)); 807 if (ifa->ifa_dstaddr != NULL) { 808 memcpy(&saved_dst, ifa->ifa_dstaddr, 809 sizeof(saved_dst)); 810 } else { 811 memset(&saved_dst, 0, sizeof(saved_dst)); 812 } 813 814 bzero(&ifr, sizeof ifr); 815 ifr.ifra_addr = *ifa->ifa_addr; 816 if (ifa->ifa_dstaddr) 817 ifr.ifra_broadaddr = *ifa->ifa_dstaddr; 818 if (in_control(SIOCDIFADDR, (caddr_t)&ifr, ifp, 819 NULL) == 0) 820 continue; 821 822 /* MUST NOT HAPPEN */ 823 panic("%s: in_control failed %x, dst %x", ifp->if_xname, 824 ntohl(saved_addr.sin_addr.s_addr), 825 ntohl(saved_dst.sin_addr.s_addr)); 826 } 827 #endif /* INET */ 828 #ifdef INET6 829 if (ifa->ifa_addr->sa_family == AF_INET6) { 830 #ifdef IFADDR_DEBUG_VERBOSE 831 int i; 832 833 kprintf("purge in6 addr %p: ", ifa); 834 for (i = 0; i < ncpus; ++i) { 835 kprintf("%d ", 836 ifa->ifa_containers[i].ifa_refcnt); 837 } 838 kprintf("\n"); 839 #endif 840 841 in6_purgeaddr(ifa); 842 /* ifp_addrhead is already updated */ 843 continue; 844 } 845 #endif /* INET6 */ 846 if_printf(ifp, "destroy ifaddr family %d\n", 847 ifa->ifa_addr->sa_family); 848 ifa_ifunlink(ifa, ifp); 849 ifa_destroy(ifa); 850 } 851 852 netisr_replymsg(&nmsg->base, 0); 853 } 854 855 void 856 if_purgeaddrs_nolink(struct ifnet *ifp) 857 { 858 struct netmsg_base nmsg; 859 860 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 861 if_purgeaddrs_nolink_dispatch); 862 nmsg.lmsg.u.ms_resultp = ifp; 863 netisr_domsg(&nmsg, 0); 864 } 865 866 static void 867 ifq_stage_detach_handler(netmsg_t nmsg) 868 { 869 struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp; 870 int q; 871 872 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 873 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 874 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 875 876 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) 877 ifsq_stage_remove(&ifsubq_stage_heads[mycpuid], stage); 878 } 879 lwkt_replymsg(&nmsg->lmsg, 0); 880 } 881 882 static void 883 ifq_stage_detach(struct ifaltq *ifq) 884 { 885 struct netmsg_base base; 886 int cpu; 887 888 netmsg_init(&base, NULL, &curthread->td_msgport, 0, 889 ifq_stage_detach_handler); 890 base.lmsg.u.ms_resultp = ifq; 891 892 /* XXX netisr_ncpus */ 893 for (cpu = 0; cpu < ncpus; ++cpu) 894 lwkt_domsg(netisr_cpuport(cpu), &base.lmsg, 0); 895 } 896 897 struct netmsg_if_rtdel { 898 struct netmsg_base base; 899 struct ifnet *ifp; 900 }; 901 902 static void 903 if_rtdel_dispatch(netmsg_t msg) 904 { 905 struct netmsg_if_rtdel *rmsg = (void *)msg; 906 int i, cpu; 907 908 cpu = mycpuid; 909 ASSERT_NETISR_NCPUS(cpu); 910 911 for (i = 1; i <= AF_MAX; i++) { 912 struct radix_node_head *rnh; 913 914 if ((rnh = rt_tables[cpu][i]) == NULL) 915 continue; 916 rnh->rnh_walktree(rnh, if_rtdel, rmsg->ifp); 917 } 918 netisr_forwardmsg(&msg->base, cpu + 1); 919 } 920 921 /* 922 * Detach an interface, removing it from the 923 * list of "active" interfaces. 924 */ 925 void 926 if_detach(struct ifnet *ifp) 927 { 928 struct ifnet_array *old_ifnet_array; 929 struct ifg_list *ifgl; 930 struct netmsg_if_rtdel msg; 931 struct domain *dp; 932 int q; 933 934 /* Announce that the interface is gone. */ 935 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 936 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 937 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); 938 939 /* 940 * Remove this ifp from ifindex2inet, ifnet queue and ifnet 941 * array before it is whacked. 942 * 943 * Protect ifindex2ifnet, ifnet queue and ifnet array changes 944 * by ifnet lock, so that non-netisr threads could get a 945 * consistent view. 946 */ 947 ifnet_lock(); 948 949 /* 950 * Remove this ifp from ifindex2ifnet and maybe decrement if_index. 951 */ 952 ifindex2ifnet[ifp->if_index] = NULL; 953 while (if_index > 0 && ifindex2ifnet[if_index] == NULL) 954 if_index--; 955 956 /* 957 * Remove this ifp from ifnet queue. 958 */ 959 TAILQ_REMOVE(&ifnetlist, ifp, if_link); 960 961 /* 962 * Remove this ifp from ifnet array. 963 */ 964 /* Free old ifnet array after sync all netisrs */ 965 old_ifnet_array = ifnet_array; 966 ifnet_array = ifnet_array_del(ifp, old_ifnet_array); 967 968 ifnet_unlock(); 969 970 ifgroup_lockmgr(LK_EXCLUSIVE); 971 while ((ifgl = TAILQ_FIRST(&ifp->if_groups)) != NULL) 972 if_delgroup_locked(ifp, ifgl->ifgl_group->ifg_group); 973 ifgroup_lockmgr(LK_RELEASE); 974 975 /* 976 * Sync all netisrs so that the old ifnet array is no longer 977 * accessed and we can free it safely later on. 978 */ 979 netmsg_service_sync(); 980 ifnet_array_free(old_ifnet_array); 981 982 /* 983 * Remove routes and flush queues. 984 */ 985 crit_enter(); 986 #ifdef IFPOLL_ENABLE 987 if (ifp->if_flags & IFF_NPOLLING) 988 ifpoll_deregister(ifp); 989 #endif 990 if_down(ifp); 991 992 /* Decrease the mbuf clusters/jclusters limits increased by us */ 993 if (ifp->if_nmbclusters > 0) 994 mcl_inclimit(-ifp->if_nmbclusters); 995 if (ifp->if_nmbjclusters > 0) 996 mjcl_inclimit(-ifp->if_nmbjclusters); 997 998 #ifdef ALTQ 999 if (ifq_is_enabled(&ifp->if_snd)) 1000 altq_disable(&ifp->if_snd); 1001 if (ifq_is_attached(&ifp->if_snd)) 1002 altq_detach(&ifp->if_snd); 1003 #endif 1004 1005 /* 1006 * Clean up all addresses. 1007 */ 1008 ifp->if_lladdr = NULL; 1009 1010 if_purgeaddrs_nolink(ifp); 1011 if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) { 1012 struct ifaddr *ifa; 1013 1014 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 1015 KASSERT(ifa->ifa_addr->sa_family == AF_LINK, 1016 ("non-link ifaddr is left on if_addrheads")); 1017 1018 ifa_ifunlink(ifa, ifp); 1019 ifa_destroy(ifa); 1020 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]), 1021 ("there are still ifaddrs left on if_addrheads")); 1022 } 1023 1024 #ifdef INET 1025 /* 1026 * Remove all IPv4 kernel structures related to ifp. 1027 */ 1028 in_ifdetach(ifp); 1029 #endif 1030 1031 #ifdef INET6 1032 /* 1033 * Remove all IPv6 kernel structs related to ifp. This should be done 1034 * before removing routing entries below, since IPv6 interface direct 1035 * routes are expected to be removed by the IPv6-specific kernel API. 1036 * Otherwise, the kernel will detect some inconsistency and bark it. 1037 */ 1038 in6_ifdetach(ifp); 1039 #endif 1040 1041 /* 1042 * Delete all remaining routes using this interface 1043 */ 1044 netmsg_init(&msg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 1045 if_rtdel_dispatch); 1046 msg.ifp = ifp; 1047 netisr_domsg_global(&msg.base); 1048 1049 SLIST_FOREACH(dp, &domains, dom_next) { 1050 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) 1051 (*dp->dom_ifdetach)(ifp, 1052 ifp->if_afdata[dp->dom_family]); 1053 } 1054 1055 kfree(ifp->if_addrheads, M_IFADDR); 1056 1057 lwkt_synchronize_ipiqs("if_detach"); 1058 ifq_stage_detach(&ifp->if_snd); 1059 1060 for (q = 0; q < ifp->if_snd.altq_subq_cnt; ++q) { 1061 struct ifaltq_subque *ifsq = &ifp->if_snd.altq_subq[q]; 1062 1063 kfree(ifsq->ifsq_ifstart_nmsg, M_LWKTMSG); 1064 kfree(ifsq->ifsq_stage, M_DEVBUF); 1065 } 1066 kfree(ifp->if_snd.altq_subq, M_DEVBUF); 1067 1068 kfree(ifp->if_data_pcpu, M_DEVBUF); 1069 1070 crit_exit(); 1071 } 1072 1073 int 1074 ifgroup_lockmgr(u_int flags) 1075 { 1076 return lockmgr(&ifgroup_lock, flags); 1077 } 1078 1079 /* 1080 * Create an empty interface group. 1081 */ 1082 static struct ifg_group * 1083 if_creategroup(const char *groupname) 1084 { 1085 struct ifg_group *ifg; 1086 1087 ifg = kmalloc(sizeof(*ifg), M_IFNET, M_WAITOK); 1088 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); 1089 ifg->ifg_refcnt = 0; 1090 ifg->ifg_carp_demoted = 0; 1091 TAILQ_INIT(&ifg->ifg_members); 1092 1093 ifgroup_lockmgr(LK_EXCLUSIVE); 1094 TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next); 1095 ifgroup_lockmgr(LK_RELEASE); 1096 1097 EVENTHANDLER_INVOKE(group_attach_event, ifg); 1098 1099 return (ifg); 1100 } 1101 1102 /* 1103 * Destroy an empty interface group. 1104 */ 1105 static int 1106 if_destroygroup(struct ifg_group *ifg) 1107 { 1108 KASSERT(ifg->ifg_refcnt == 0, 1109 ("trying to delete a non-empty interface group")); 1110 1111 ifgroup_lockmgr(LK_EXCLUSIVE); 1112 TAILQ_REMOVE(&ifg_head, ifg, ifg_next); 1113 ifgroup_lockmgr(LK_RELEASE); 1114 1115 EVENTHANDLER_INVOKE(group_detach_event, ifg); 1116 kfree(ifg, M_IFNET); 1117 1118 return (0); 1119 } 1120 1121 /* 1122 * Add the interface to a group. 1123 * The target group will be created if it doesn't exist. 1124 */ 1125 int 1126 if_addgroup(struct ifnet *ifp, const char *groupname) 1127 { 1128 struct ifg_list *ifgl; 1129 struct ifg_group *ifg; 1130 struct ifg_member *ifgm; 1131 1132 if (groupname[0] && 1133 groupname[strlen(groupname) - 1] >= '0' && 1134 groupname[strlen(groupname) - 1] <= '9') 1135 return (EINVAL); 1136 1137 ifgroup_lockmgr(LK_SHARED); 1138 1139 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1140 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) { 1141 ifgroup_lockmgr(LK_RELEASE); 1142 return (EEXIST); 1143 } 1144 } 1145 1146 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) { 1147 if (strcmp(ifg->ifg_group, groupname) == 0) 1148 break; 1149 } 1150 1151 ifgroup_lockmgr(LK_RELEASE); 1152 1153 if (ifg == NULL) 1154 ifg = if_creategroup(groupname); 1155 1156 ifgl = kmalloc(sizeof(*ifgl), M_IFNET, M_WAITOK); 1157 ifgm = kmalloc(sizeof(*ifgm), M_IFNET, M_WAITOK); 1158 ifgl->ifgl_group = ifg; 1159 ifgm->ifgm_ifp = ifp; 1160 ifg->ifg_refcnt++; 1161 1162 ifgroup_lockmgr(LK_EXCLUSIVE); 1163 TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); 1164 TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); 1165 ifgroup_lockmgr(LK_RELEASE); 1166 1167 EVENTHANDLER_INVOKE(group_change_event, groupname); 1168 1169 return (0); 1170 } 1171 1172 /* 1173 * Remove the interface from a group. 1174 * The group will be destroyed if it becomes empty. 1175 * 1176 * The 'ifgroup_lock' must be hold exclusively when calling this. 1177 */ 1178 static int 1179 if_delgroup_locked(struct ifnet *ifp, const char *groupname) 1180 { 1181 struct ifg_list *ifgl; 1182 struct ifg_member *ifgm; 1183 1184 KKASSERT(lockstatus(&ifgroup_lock, curthread) == LK_EXCLUSIVE); 1185 1186 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1187 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) 1188 break; 1189 } 1190 if (ifgl == NULL) 1191 return (ENOENT); 1192 1193 TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); 1194 1195 TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) { 1196 if (ifgm->ifgm_ifp == ifp) 1197 break; 1198 } 1199 1200 if (ifgm != NULL) { 1201 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); 1202 1203 ifgroup_lockmgr(LK_RELEASE); 1204 EVENTHANDLER_INVOKE(group_change_event, groupname); 1205 ifgroup_lockmgr(LK_EXCLUSIVE); 1206 1207 kfree(ifgm, M_IFNET); 1208 ifgl->ifgl_group->ifg_refcnt--; 1209 } 1210 1211 if (ifgl->ifgl_group->ifg_refcnt == 0) { 1212 ifgroup_lockmgr(LK_RELEASE); 1213 if_destroygroup(ifgl->ifgl_group); 1214 ifgroup_lockmgr(LK_EXCLUSIVE); 1215 } 1216 1217 kfree(ifgl, M_IFNET); 1218 1219 return (0); 1220 } 1221 1222 int 1223 if_delgroup(struct ifnet *ifp, const char *groupname) 1224 { 1225 int error; 1226 1227 ifgroup_lockmgr(LK_EXCLUSIVE); 1228 error = if_delgroup_locked(ifp, groupname); 1229 ifgroup_lockmgr(LK_RELEASE); 1230 1231 return (error); 1232 } 1233 1234 /* 1235 * Store all the groups that the interface belongs to in memory 1236 * pointed to by data. 1237 */ 1238 static int 1239 if_getgroups(struct ifgroupreq *ifgr, struct ifnet *ifp) 1240 { 1241 struct ifg_list *ifgl; 1242 struct ifg_req *ifgrq, *p; 1243 int len, error; 1244 1245 len = 0; 1246 ifgroup_lockmgr(LK_SHARED); 1247 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1248 len += sizeof(struct ifg_req); 1249 ifgroup_lockmgr(LK_RELEASE); 1250 1251 if (ifgr->ifgr_len == 0) { 1252 /* 1253 * Caller is asking how much memory should be allocated in 1254 * the next request in order to hold all the groups. 1255 */ 1256 ifgr->ifgr_len = len; 1257 return (0); 1258 } else if (ifgr->ifgr_len != len) { 1259 return (EINVAL); 1260 } 1261 1262 ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO); 1263 if (ifgrq == NULL) 1264 return (ENOMEM); 1265 1266 ifgroup_lockmgr(LK_SHARED); 1267 p = ifgrq; 1268 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1269 if (len < sizeof(struct ifg_req)) { 1270 ifgroup_lockmgr(LK_RELEASE); 1271 return (EINVAL); 1272 } 1273 1274 strlcpy(p->ifgrq_group, ifgl->ifgl_group->ifg_group, 1275 sizeof(ifgrq->ifgrq_group)); 1276 len -= sizeof(struct ifg_req); 1277 p++; 1278 } 1279 ifgroup_lockmgr(LK_RELEASE); 1280 1281 error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len); 1282 kfree(ifgrq, M_TEMP); 1283 if (error) 1284 return (error); 1285 1286 return (0); 1287 } 1288 1289 /* 1290 * Store all the members of a group in memory pointed to by data. 1291 */ 1292 static int 1293 if_getgroupmembers(struct ifgroupreq *ifgr) 1294 { 1295 struct ifg_group *ifg; 1296 struct ifg_member *ifgm; 1297 struct ifg_req *ifgrq, *p; 1298 int len, error; 1299 1300 ifgroup_lockmgr(LK_SHARED); 1301 1302 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) { 1303 if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0) 1304 break; 1305 } 1306 if (ifg == NULL) { 1307 ifgroup_lockmgr(LK_RELEASE); 1308 return (ENOENT); 1309 } 1310 1311 len = 0; 1312 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) 1313 len += sizeof(struct ifg_req); 1314 1315 ifgroup_lockmgr(LK_RELEASE); 1316 1317 if (ifgr->ifgr_len == 0) { 1318 ifgr->ifgr_len = len; 1319 return (0); 1320 } else if (ifgr->ifgr_len != len) { 1321 return (EINVAL); 1322 } 1323 1324 ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO); 1325 if (ifgrq == NULL) 1326 return (ENOMEM); 1327 1328 ifgroup_lockmgr(LK_SHARED); 1329 p = ifgrq; 1330 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { 1331 if (len < sizeof(struct ifg_req)) { 1332 ifgroup_lockmgr(LK_RELEASE); 1333 return (EINVAL); 1334 } 1335 1336 strlcpy(p->ifgrq_member, ifgm->ifgm_ifp->if_xname, 1337 sizeof(p->ifgrq_member)); 1338 len -= sizeof(struct ifg_req); 1339 p++; 1340 } 1341 ifgroup_lockmgr(LK_RELEASE); 1342 1343 error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len); 1344 kfree(ifgrq, M_TEMP); 1345 if (error) 1346 return (error); 1347 1348 return (0); 1349 } 1350 1351 /* 1352 * Delete Routes for a Network Interface 1353 * 1354 * Called for each routing entry via the rnh->rnh_walktree() call above 1355 * to delete all route entries referencing a detaching network interface. 1356 * 1357 * Arguments: 1358 * rn pointer to node in the routing table 1359 * arg argument passed to rnh->rnh_walktree() - detaching interface 1360 * 1361 * Returns: 1362 * 0 successful 1363 * errno failed - reason indicated 1364 * 1365 */ 1366 static int 1367 if_rtdel(struct radix_node *rn, void *arg) 1368 { 1369 struct rtentry *rt = (struct rtentry *)rn; 1370 struct ifnet *ifp = arg; 1371 int err; 1372 1373 if (rt->rt_ifp == ifp) { 1374 1375 /* 1376 * Protect (sorta) against walktree recursion problems 1377 * with cloned routes 1378 */ 1379 if (!(rt->rt_flags & RTF_UP)) 1380 return (0); 1381 1382 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, 1383 rt_mask(rt), rt->rt_flags, 1384 NULL); 1385 if (err) { 1386 log(LOG_WARNING, "if_rtdel: error %d\n", err); 1387 } 1388 } 1389 1390 return (0); 1391 } 1392 1393 static __inline boolean_t 1394 ifa_prefer(const struct ifaddr *cur_ifa, const struct ifaddr *old_ifa) 1395 { 1396 if (old_ifa == NULL) 1397 return TRUE; 1398 1399 if ((old_ifa->ifa_ifp->if_flags & IFF_UP) == 0 && 1400 (cur_ifa->ifa_ifp->if_flags & IFF_UP)) 1401 return TRUE; 1402 if ((old_ifa->ifa_flags & IFA_ROUTE) == 0 && 1403 (cur_ifa->ifa_flags & IFA_ROUTE)) 1404 return TRUE; 1405 return FALSE; 1406 } 1407 1408 /* 1409 * Locate an interface based on a complete address. 1410 */ 1411 struct ifaddr * 1412 ifa_ifwithaddr(struct sockaddr *addr) 1413 { 1414 const struct ifnet_array *arr; 1415 int i; 1416 1417 arr = ifnet_array_get(); 1418 for (i = 0; i < arr->ifnet_count; ++i) { 1419 struct ifnet *ifp = arr->ifnet_arr[i]; 1420 struct ifaddr_container *ifac; 1421 1422 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1423 struct ifaddr *ifa = ifac->ifa; 1424 1425 if (ifa->ifa_addr->sa_family != addr->sa_family) 1426 continue; 1427 if (sa_equal(addr, ifa->ifa_addr)) 1428 return (ifa); 1429 if ((ifp->if_flags & IFF_BROADCAST) && 1430 ifa->ifa_broadaddr && 1431 /* IPv6 doesn't have broadcast */ 1432 ifa->ifa_broadaddr->sa_len != 0 && 1433 sa_equal(ifa->ifa_broadaddr, addr)) 1434 return (ifa); 1435 } 1436 } 1437 return (NULL); 1438 } 1439 1440 /* 1441 * Locate the point to point interface with a given destination address. 1442 */ 1443 struct ifaddr * 1444 ifa_ifwithdstaddr(struct sockaddr *addr) 1445 { 1446 const struct ifnet_array *arr; 1447 int i; 1448 1449 arr = ifnet_array_get(); 1450 for (i = 0; i < arr->ifnet_count; ++i) { 1451 struct ifnet *ifp = arr->ifnet_arr[i]; 1452 struct ifaddr_container *ifac; 1453 1454 if (!(ifp->if_flags & IFF_POINTOPOINT)) 1455 continue; 1456 1457 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1458 struct ifaddr *ifa = ifac->ifa; 1459 1460 if (ifa->ifa_addr->sa_family != addr->sa_family) 1461 continue; 1462 if (ifa->ifa_dstaddr && 1463 sa_equal(addr, ifa->ifa_dstaddr)) 1464 return (ifa); 1465 } 1466 } 1467 return (NULL); 1468 } 1469 1470 /* 1471 * Find an interface on a specific network. If many, choice 1472 * is most specific found. 1473 */ 1474 struct ifaddr * 1475 ifa_ifwithnet(struct sockaddr *addr) 1476 { 1477 struct ifaddr *ifa_maybe = NULL; 1478 u_int af = addr->sa_family; 1479 char *addr_data = addr->sa_data, *cplim; 1480 const struct ifnet_array *arr; 1481 int i; 1482 1483 /* 1484 * AF_LINK addresses can be looked up directly by their index number, 1485 * so do that if we can. 1486 */ 1487 if (af == AF_LINK) { 1488 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; 1489 1490 if (sdl->sdl_index && sdl->sdl_index <= if_index) 1491 return (ifindex2ifnet[sdl->sdl_index]->if_lladdr); 1492 } 1493 1494 /* 1495 * Scan though each interface, looking for ones that have 1496 * addresses in this address family. 1497 */ 1498 arr = ifnet_array_get(); 1499 for (i = 0; i < arr->ifnet_count; ++i) { 1500 struct ifnet *ifp = arr->ifnet_arr[i]; 1501 struct ifaddr_container *ifac; 1502 1503 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1504 struct ifaddr *ifa = ifac->ifa; 1505 char *cp, *cp2, *cp3; 1506 1507 if (ifa->ifa_addr->sa_family != af) 1508 next: continue; 1509 if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) { 1510 /* 1511 * This is a bit broken as it doesn't 1512 * take into account that the remote end may 1513 * be a single node in the network we are 1514 * looking for. 1515 * The trouble is that we don't know the 1516 * netmask for the remote end. 1517 */ 1518 if (ifa->ifa_dstaddr != NULL && 1519 sa_equal(addr, ifa->ifa_dstaddr)) 1520 return (ifa); 1521 } else { 1522 /* 1523 * if we have a special address handler, 1524 * then use it instead of the generic one. 1525 */ 1526 if (ifa->ifa_claim_addr) { 1527 if ((*ifa->ifa_claim_addr)(ifa, addr)) { 1528 return (ifa); 1529 } else { 1530 continue; 1531 } 1532 } 1533 1534 /* 1535 * Scan all the bits in the ifa's address. 1536 * If a bit dissagrees with what we are 1537 * looking for, mask it with the netmask 1538 * to see if it really matters. 1539 * (A byte at a time) 1540 */ 1541 if (ifa->ifa_netmask == 0) 1542 continue; 1543 cp = addr_data; 1544 cp2 = ifa->ifa_addr->sa_data; 1545 cp3 = ifa->ifa_netmask->sa_data; 1546 cplim = ifa->ifa_netmask->sa_len + 1547 (char *)ifa->ifa_netmask; 1548 while (cp3 < cplim) 1549 if ((*cp++ ^ *cp2++) & *cp3++) 1550 goto next; /* next address! */ 1551 /* 1552 * If the netmask of what we just found 1553 * is more specific than what we had before 1554 * (if we had one) then remember the new one 1555 * before continuing to search for an even 1556 * better one. If the netmasks are equal, 1557 * we prefer the this ifa based on the result 1558 * of ifa_prefer(). 1559 */ 1560 if (ifa_maybe == NULL || 1561 rn_refines((char *)ifa->ifa_netmask, 1562 (char *)ifa_maybe->ifa_netmask) || 1563 (sa_equal(ifa_maybe->ifa_netmask, 1564 ifa->ifa_netmask) && 1565 ifa_prefer(ifa, ifa_maybe))) 1566 ifa_maybe = ifa; 1567 } 1568 } 1569 } 1570 return (ifa_maybe); 1571 } 1572 1573 /* 1574 * Find an interface address specific to an interface best matching 1575 * a given address. 1576 */ 1577 struct ifaddr * 1578 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp) 1579 { 1580 struct ifaddr_container *ifac; 1581 char *cp, *cp2, *cp3; 1582 char *cplim; 1583 struct ifaddr *ifa_maybe = NULL; 1584 u_int af = addr->sa_family; 1585 1586 if (af >= AF_MAX) 1587 return (0); 1588 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1589 struct ifaddr *ifa = ifac->ifa; 1590 1591 if (ifa->ifa_addr->sa_family != af) 1592 continue; 1593 if (ifa_maybe == NULL) 1594 ifa_maybe = ifa; 1595 if (ifa->ifa_netmask == NULL) { 1596 if (sa_equal(addr, ifa->ifa_addr) || 1597 (ifa->ifa_dstaddr != NULL && 1598 sa_equal(addr, ifa->ifa_dstaddr))) 1599 return (ifa); 1600 continue; 1601 } 1602 if (ifp->if_flags & IFF_POINTOPOINT) { 1603 if (sa_equal(addr, ifa->ifa_dstaddr)) 1604 return (ifa); 1605 } else { 1606 cp = addr->sa_data; 1607 cp2 = ifa->ifa_addr->sa_data; 1608 cp3 = ifa->ifa_netmask->sa_data; 1609 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; 1610 for (; cp3 < cplim; cp3++) 1611 if ((*cp++ ^ *cp2++) & *cp3) 1612 break; 1613 if (cp3 == cplim) 1614 return (ifa); 1615 } 1616 } 1617 return (ifa_maybe); 1618 } 1619 1620 /* 1621 * Default action when installing a route with a Link Level gateway. 1622 * Lookup an appropriate real ifa to point to. 1623 * This should be moved to /sys/net/link.c eventually. 1624 */ 1625 static void 1626 link_rtrequest(int cmd, struct rtentry *rt) 1627 { 1628 struct ifaddr *ifa; 1629 struct sockaddr *dst; 1630 struct ifnet *ifp; 1631 1632 if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL || 1633 (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL) 1634 return; 1635 ifa = ifaof_ifpforaddr(dst, ifp); 1636 if (ifa != NULL) { 1637 IFAFREE(rt->rt_ifa); 1638 IFAREF(ifa); 1639 rt->rt_ifa = ifa; 1640 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) 1641 ifa->ifa_rtrequest(cmd, rt); 1642 } 1643 } 1644 1645 struct netmsg_ifroute { 1646 struct netmsg_base base; 1647 struct ifnet *ifp; 1648 int flag; 1649 int fam; 1650 }; 1651 1652 /* 1653 * Mark an interface down and notify protocols of the transition. 1654 */ 1655 static void 1656 if_unroute_dispatch(netmsg_t nmsg) 1657 { 1658 struct netmsg_ifroute *msg = (struct netmsg_ifroute *)nmsg; 1659 struct ifnet *ifp = msg->ifp; 1660 int flag = msg->flag, fam = msg->fam; 1661 struct ifaddr_container *ifac; 1662 1663 ASSERT_NETISR0; 1664 1665 ifp->if_flags &= ~flag; 1666 getmicrotime(&ifp->if_lastchange); 1667 /* 1668 * The ifaddr processing in the following loop will block, 1669 * however, this function is called in netisr0, in which 1670 * ifaddr list changes happen, so we don't care about the 1671 * blockness of the ifaddr processing here. 1672 */ 1673 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1674 struct ifaddr *ifa = ifac->ifa; 1675 1676 /* Ignore marker */ 1677 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 1678 continue; 1679 1680 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) 1681 kpfctlinput(PRC_IFDOWN, ifa->ifa_addr); 1682 } 1683 ifq_purge_all(&ifp->if_snd); 1684 rt_ifmsg(ifp); 1685 1686 netisr_replymsg(&nmsg->base, 0); 1687 } 1688 1689 void 1690 if_unroute(struct ifnet *ifp, int flag, int fam) 1691 { 1692 struct netmsg_ifroute msg; 1693 1694 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, 1695 if_unroute_dispatch); 1696 msg.ifp = ifp; 1697 msg.flag = flag; 1698 msg.fam = fam; 1699 netisr_domsg(&msg.base, 0); 1700 } 1701 1702 /* 1703 * Mark an interface up and notify protocols of the transition. 1704 */ 1705 static void 1706 if_route_dispatch(netmsg_t nmsg) 1707 { 1708 struct netmsg_ifroute *msg = (struct netmsg_ifroute *)nmsg; 1709 struct ifnet *ifp = msg->ifp; 1710 int flag = msg->flag, fam = msg->fam; 1711 struct ifaddr_container *ifac; 1712 1713 ASSERT_NETISR0; 1714 1715 ifq_purge_all(&ifp->if_snd); 1716 ifp->if_flags |= flag; 1717 getmicrotime(&ifp->if_lastchange); 1718 /* 1719 * The ifaddr processing in the following loop will block, 1720 * however, this function is called in netisr0, in which 1721 * ifaddr list changes happen, so we don't care about the 1722 * blockness of the ifaddr processing here. 1723 */ 1724 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1725 struct ifaddr *ifa = ifac->ifa; 1726 1727 /* Ignore marker */ 1728 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 1729 continue; 1730 1731 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) 1732 kpfctlinput(PRC_IFUP, ifa->ifa_addr); 1733 } 1734 rt_ifmsg(ifp); 1735 #ifdef INET6 1736 in6_if_up(ifp); 1737 #endif 1738 1739 netisr_replymsg(&nmsg->base, 0); 1740 } 1741 1742 void 1743 if_route(struct ifnet *ifp, int flag, int fam) 1744 { 1745 struct netmsg_ifroute msg; 1746 1747 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, 1748 if_route_dispatch); 1749 msg.ifp = ifp; 1750 msg.flag = flag; 1751 msg.fam = fam; 1752 netisr_domsg(&msg.base, 0); 1753 } 1754 1755 /* 1756 * Mark an interface down and notify protocols of the transition. An 1757 * interface going down is also considered to be a synchronizing event. 1758 * We must ensure that all packet processing related to the interface 1759 * has completed before we return so e.g. the caller can free the ifnet 1760 * structure that the mbufs may be referencing. 1761 * 1762 * NOTE: must be called at splnet or eqivalent. 1763 */ 1764 void 1765 if_down(struct ifnet *ifp) 1766 { 1767 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN); 1768 if_unroute(ifp, IFF_UP, AF_UNSPEC); 1769 netmsg_service_sync(); 1770 } 1771 1772 /* 1773 * Mark an interface up and notify protocols of 1774 * the transition. 1775 * NOTE: must be called at splnet or eqivalent. 1776 */ 1777 void 1778 if_up(struct ifnet *ifp) 1779 { 1780 if_route(ifp, IFF_UP, AF_UNSPEC); 1781 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP); 1782 } 1783 1784 /* 1785 * Process a link state change. 1786 * NOTE: must be called at splsoftnet or equivalent. 1787 */ 1788 void 1789 if_link_state_change(struct ifnet *ifp) 1790 { 1791 int link_state = ifp->if_link_state; 1792 1793 rt_ifmsg(ifp); 1794 devctl_notify("IFNET", ifp->if_xname, 1795 (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); 1796 1797 EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state); 1798 } 1799 1800 /* 1801 * Handle interface watchdog timer routines. Called 1802 * from softclock, we decrement timers (if set) and 1803 * call the appropriate interface routine on expiration. 1804 */ 1805 static void 1806 if_slowtimo_dispatch(netmsg_t nmsg) 1807 { 1808 struct globaldata *gd = mycpu; 1809 const struct ifnet_array *arr; 1810 int i; 1811 1812 ASSERT_NETISR0; 1813 1814 crit_enter_gd(gd); 1815 lwkt_replymsg(&nmsg->lmsg, 0); /* reply ASAP */ 1816 crit_exit_gd(gd); 1817 1818 arr = ifnet_array_get(); 1819 for (i = 0; i < arr->ifnet_count; ++i) { 1820 struct ifnet *ifp = arr->ifnet_arr[i]; 1821 1822 crit_enter_gd(gd); 1823 1824 if (if_stats_compat) { 1825 IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets); 1826 IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors); 1827 IFNET_STAT_GET(ifp, opackets, ifp->if_opackets); 1828 IFNET_STAT_GET(ifp, oerrors, ifp->if_oerrors); 1829 IFNET_STAT_GET(ifp, collisions, ifp->if_collisions); 1830 IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes); 1831 IFNET_STAT_GET(ifp, obytes, ifp->if_obytes); 1832 IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts); 1833 IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts); 1834 IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops); 1835 IFNET_STAT_GET(ifp, noproto, ifp->if_noproto); 1836 IFNET_STAT_GET(ifp, oqdrops, ifp->if_oqdrops); 1837 } 1838 1839 if (ifp->if_timer == 0 || --ifp->if_timer) { 1840 crit_exit_gd(gd); 1841 continue; 1842 } 1843 if (ifp->if_watchdog) { 1844 if (ifnet_tryserialize_all(ifp)) { 1845 (*ifp->if_watchdog)(ifp); 1846 ifnet_deserialize_all(ifp); 1847 } else { 1848 /* try again next timeout */ 1849 ++ifp->if_timer; 1850 } 1851 } 1852 1853 crit_exit_gd(gd); 1854 } 1855 1856 callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL); 1857 } 1858 1859 static void 1860 if_slowtimo(void *arg __unused) 1861 { 1862 struct lwkt_msg *lmsg = &if_slowtimo_netmsg.lmsg; 1863 1864 KASSERT(mycpuid == 0, ("not on cpu0")); 1865 crit_enter(); 1866 if (lmsg->ms_flags & MSGF_DONE) 1867 lwkt_sendmsg_oncpu(netisr_cpuport(0), lmsg); 1868 crit_exit(); 1869 } 1870 1871 /* 1872 * Map interface name to 1873 * interface structure pointer. 1874 */ 1875 struct ifnet * 1876 ifunit(const char *name) 1877 { 1878 struct ifnet *ifp; 1879 1880 /* 1881 * Search all the interfaces for this name/number 1882 */ 1883 KASSERT(mtx_owned(&ifnet_mtx), ("ifnet is not locked")); 1884 1885 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 1886 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1887 break; 1888 } 1889 return (ifp); 1890 } 1891 1892 struct ifnet * 1893 ifunit_netisr(const char *name) 1894 { 1895 const struct ifnet_array *arr; 1896 int i; 1897 1898 /* 1899 * Search all the interfaces for this name/number 1900 */ 1901 1902 arr = ifnet_array_get(); 1903 for (i = 0; i < arr->ifnet_count; ++i) { 1904 struct ifnet *ifp = arr->ifnet_arr[i]; 1905 1906 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1907 return ifp; 1908 } 1909 return NULL; 1910 } 1911 1912 /* 1913 * Interface ioctls. 1914 */ 1915 int 1916 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred) 1917 { 1918 struct ifnet *ifp; 1919 struct ifgroupreq *ifgr; 1920 struct ifreq *ifr; 1921 struct ifstat *ifs; 1922 int error, do_ifup = 0; 1923 short oif_flags; 1924 int new_flags; 1925 size_t namelen, onamelen; 1926 char new_name[IFNAMSIZ]; 1927 struct ifaddr *ifa; 1928 struct sockaddr_dl *sdl; 1929 1930 switch (cmd) { 1931 case SIOCGIFCONF: 1932 return (ifconf(cmd, data, cred)); 1933 default: 1934 break; 1935 } 1936 1937 ifr = (struct ifreq *)data; 1938 1939 switch (cmd) { 1940 case SIOCIFCREATE: 1941 case SIOCIFCREATE2: 1942 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 1943 return (error); 1944 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), 1945 cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL)); 1946 case SIOCIFDESTROY: 1947 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 1948 return (error); 1949 return (if_clone_destroy(ifr->ifr_name)); 1950 case SIOCIFGCLONERS: 1951 return (if_clone_list((struct if_clonereq *)data)); 1952 case SIOCGIFGMEMB: 1953 return (if_getgroupmembers((struct ifgroupreq *)data)); 1954 default: 1955 break; 1956 } 1957 1958 /* 1959 * Nominal ioctl through interface, lookup the ifp and obtain a 1960 * lock to serialize the ifconfig ioctl operation. 1961 */ 1962 ifnet_lock(); 1963 1964 ifp = ifunit(ifr->ifr_name); 1965 if (ifp == NULL) { 1966 ifnet_unlock(); 1967 return (ENXIO); 1968 } 1969 error = 0; 1970 1971 switch (cmd) { 1972 case SIOCGIFINDEX: 1973 ifr->ifr_index = ifp->if_index; 1974 break; 1975 1976 case SIOCGIFFLAGS: 1977 ifr->ifr_flags = ifp->if_flags; 1978 ifr->ifr_flagshigh = ifp->if_flags >> 16; 1979 break; 1980 1981 case SIOCGIFCAP: 1982 ifr->ifr_reqcap = ifp->if_capabilities; 1983 ifr->ifr_curcap = ifp->if_capenable; 1984 break; 1985 1986 case SIOCGIFMETRIC: 1987 ifr->ifr_metric = ifp->if_metric; 1988 break; 1989 1990 case SIOCGIFMTU: 1991 ifr->ifr_mtu = ifp->if_mtu; 1992 break; 1993 1994 case SIOCGIFTSOLEN: 1995 ifr->ifr_tsolen = ifp->if_tsolen; 1996 break; 1997 1998 case SIOCGIFDATA: 1999 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data, 2000 sizeof(ifp->if_data)); 2001 break; 2002 2003 case SIOCGIFPHYS: 2004 ifr->ifr_phys = ifp->if_physical; 2005 break; 2006 2007 case SIOCGIFPOLLCPU: 2008 ifr->ifr_pollcpu = -1; 2009 break; 2010 2011 case SIOCSIFPOLLCPU: 2012 break; 2013 2014 case SIOCSIFFLAGS: 2015 error = priv_check_cred(cred, PRIV_ROOT, 0); 2016 if (error) 2017 break; 2018 new_flags = (ifr->ifr_flags & 0xffff) | 2019 (ifr->ifr_flagshigh << 16); 2020 if (ifp->if_flags & IFF_SMART) { 2021 /* Smart drivers twiddle their own routes */ 2022 } else if (ifp->if_flags & IFF_UP && 2023 (new_flags & IFF_UP) == 0) { 2024 if_down(ifp); 2025 } else if (new_flags & IFF_UP && 2026 (ifp->if_flags & IFF_UP) == 0) { 2027 do_ifup = 1; 2028 } 2029 2030 #ifdef IFPOLL_ENABLE 2031 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) { 2032 if (new_flags & IFF_NPOLLING) 2033 ifpoll_register(ifp); 2034 else 2035 ifpoll_deregister(ifp); 2036 } 2037 #endif 2038 2039 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | 2040 (new_flags &~ IFF_CANTCHANGE); 2041 if (new_flags & IFF_PPROMISC) { 2042 /* Permanently promiscuous mode requested */ 2043 ifp->if_flags |= IFF_PROMISC; 2044 } else if (ifp->if_pcount == 0) { 2045 ifp->if_flags &= ~IFF_PROMISC; 2046 } 2047 if (ifp->if_ioctl) { 2048 ifnet_serialize_all(ifp); 2049 ifp->if_ioctl(ifp, cmd, data, cred); 2050 ifnet_deserialize_all(ifp); 2051 } 2052 if (do_ifup) 2053 if_up(ifp); 2054 getmicrotime(&ifp->if_lastchange); 2055 break; 2056 2057 case SIOCSIFCAP: 2058 error = priv_check_cred(cred, PRIV_ROOT, 0); 2059 if (error) 2060 break; 2061 if (ifr->ifr_reqcap & ~ifp->if_capabilities) { 2062 error = EINVAL; 2063 break; 2064 } 2065 ifnet_serialize_all(ifp); 2066 ifp->if_ioctl(ifp, cmd, data, cred); 2067 ifnet_deserialize_all(ifp); 2068 break; 2069 2070 case SIOCSIFNAME: 2071 error = priv_check_cred(cred, PRIV_ROOT, 0); 2072 if (error) 2073 break; 2074 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL); 2075 if (error) 2076 break; 2077 if (new_name[0] == '\0') { 2078 error = EINVAL; 2079 break; 2080 } 2081 if (ifunit(new_name) != NULL) { 2082 error = EEXIST; 2083 break; 2084 } 2085 2086 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 2087 2088 /* Announce the departure of the interface. */ 2089 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 2090 2091 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); 2092 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 2093 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 2094 namelen = strlen(new_name); 2095 onamelen = sdl->sdl_nlen; 2096 /* 2097 * Move the address if needed. This is safe because we 2098 * allocate space for a name of length IFNAMSIZ when we 2099 * create this in if_attach(). 2100 */ 2101 if (namelen != onamelen) { 2102 bcopy(sdl->sdl_data + onamelen, 2103 sdl->sdl_data + namelen, sdl->sdl_alen); 2104 } 2105 bcopy(new_name, sdl->sdl_data, namelen); 2106 sdl->sdl_nlen = namelen; 2107 sdl = (struct sockaddr_dl *)ifa->ifa_netmask; 2108 bzero(sdl->sdl_data, onamelen); 2109 while (namelen != 0) 2110 sdl->sdl_data[--namelen] = 0xff; 2111 2112 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 2113 2114 /* Announce the return of the interface. */ 2115 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 2116 break; 2117 2118 case SIOCSIFMETRIC: 2119 error = priv_check_cred(cred, PRIV_ROOT, 0); 2120 if (error) 2121 break; 2122 ifp->if_metric = ifr->ifr_metric; 2123 getmicrotime(&ifp->if_lastchange); 2124 break; 2125 2126 case SIOCSIFPHYS: 2127 error = priv_check_cred(cred, PRIV_ROOT, 0); 2128 if (error) 2129 break; 2130 if (ifp->if_ioctl == NULL) { 2131 error = EOPNOTSUPP; 2132 break; 2133 } 2134 ifnet_serialize_all(ifp); 2135 error = ifp->if_ioctl(ifp, cmd, data, cred); 2136 ifnet_deserialize_all(ifp); 2137 if (error == 0) 2138 getmicrotime(&ifp->if_lastchange); 2139 break; 2140 2141 case SIOCSIFMTU: 2142 { 2143 u_long oldmtu = ifp->if_mtu; 2144 2145 error = priv_check_cred(cred, PRIV_ROOT, 0); 2146 if (error) 2147 break; 2148 if (ifp->if_ioctl == NULL) { 2149 error = EOPNOTSUPP; 2150 break; 2151 } 2152 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) { 2153 error = EINVAL; 2154 break; 2155 } 2156 ifnet_serialize_all(ifp); 2157 error = ifp->if_ioctl(ifp, cmd, data, cred); 2158 ifnet_deserialize_all(ifp); 2159 if (error == 0) { 2160 getmicrotime(&ifp->if_lastchange); 2161 rt_ifmsg(ifp); 2162 } 2163 /* 2164 * If the link MTU changed, do network layer specific procedure. 2165 */ 2166 if (ifp->if_mtu != oldmtu) { 2167 #ifdef INET6 2168 nd6_setmtu(ifp); 2169 #endif 2170 } 2171 break; 2172 } 2173 2174 case SIOCSIFTSOLEN: 2175 error = priv_check_cred(cred, PRIV_ROOT, 0); 2176 if (error) 2177 break; 2178 2179 /* XXX need driver supplied upper limit */ 2180 if (ifr->ifr_tsolen <= 0) { 2181 error = EINVAL; 2182 break; 2183 } 2184 ifp->if_tsolen = ifr->ifr_tsolen; 2185 break; 2186 2187 case SIOCADDMULTI: 2188 case SIOCDELMULTI: 2189 error = priv_check_cred(cred, PRIV_ROOT, 0); 2190 if (error) 2191 break; 2192 2193 /* Don't allow group membership on non-multicast interfaces. */ 2194 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2195 error = EOPNOTSUPP; 2196 break; 2197 } 2198 2199 /* Don't let users screw up protocols' entries. */ 2200 if (ifr->ifr_addr.sa_family != AF_LINK) { 2201 error = EINVAL; 2202 break; 2203 } 2204 2205 if (cmd == SIOCADDMULTI) { 2206 struct ifmultiaddr *ifma; 2207 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); 2208 } else { 2209 error = if_delmulti(ifp, &ifr->ifr_addr); 2210 } 2211 if (error == 0) 2212 getmicrotime(&ifp->if_lastchange); 2213 break; 2214 2215 case SIOCSIFPHYADDR: 2216 case SIOCDIFPHYADDR: 2217 #ifdef INET6 2218 case SIOCSIFPHYADDR_IN6: 2219 #endif 2220 case SIOCSLIFPHYADDR: 2221 case SIOCSIFMEDIA: 2222 case SIOCSIFGENERIC: 2223 error = priv_check_cred(cred, PRIV_ROOT, 0); 2224 if (error) 2225 break; 2226 if (ifp->if_ioctl == NULL) { 2227 error = EOPNOTSUPP; 2228 break; 2229 } 2230 ifnet_serialize_all(ifp); 2231 error = ifp->if_ioctl(ifp, cmd, data, cred); 2232 ifnet_deserialize_all(ifp); 2233 if (error == 0) 2234 getmicrotime(&ifp->if_lastchange); 2235 break; 2236 2237 case SIOCGIFSTATUS: 2238 ifs = (struct ifstat *)data; 2239 ifs->ascii[0] = '\0'; 2240 /* fall through */ 2241 case SIOCGIFPSRCADDR: 2242 case SIOCGIFPDSTADDR: 2243 case SIOCGLIFPHYADDR: 2244 case SIOCGIFMEDIA: 2245 case SIOCGIFGENERIC: 2246 if (ifp->if_ioctl == NULL) { 2247 error = EOPNOTSUPP; 2248 break; 2249 } 2250 ifnet_serialize_all(ifp); 2251 error = ifp->if_ioctl(ifp, cmd, data, cred); 2252 ifnet_deserialize_all(ifp); 2253 break; 2254 2255 case SIOCSIFLLADDR: 2256 error = priv_check_cred(cred, PRIV_ROOT, 0); 2257 if (error) 2258 break; 2259 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, 2260 ifr->ifr_addr.sa_len); 2261 EVENTHANDLER_INVOKE(iflladdr_event, ifp); 2262 break; 2263 2264 case SIOCAIFGROUP: 2265 ifgr = (struct ifgroupreq *)ifr; 2266 if ((error = priv_check_cred(cred, PRIV_NET_ADDIFGROUP, 0))) 2267 return (error); 2268 if ((error = if_addgroup(ifp, ifgr->ifgr_group))) 2269 return (error); 2270 break; 2271 2272 case SIOCDIFGROUP: 2273 ifgr = (struct ifgroupreq *)ifr; 2274 if ((error = priv_check_cred(cred, PRIV_NET_DELIFGROUP, 0))) 2275 return (error); 2276 if ((error = if_delgroup(ifp, ifgr->ifgr_group))) 2277 return (error); 2278 break; 2279 2280 case SIOCGIFGROUP: 2281 ifgr = (struct ifgroupreq *)ifr; 2282 if ((error = if_getgroups(ifgr, ifp))) 2283 return (error); 2284 break; 2285 2286 default: 2287 oif_flags = ifp->if_flags; 2288 if (so->so_proto == 0) { 2289 error = EOPNOTSUPP; 2290 break; 2291 } 2292 error = so_pru_control_direct(so, cmd, data, ifp); 2293 2294 /* 2295 * If the socket control method returns EOPNOTSUPP, pass the 2296 * request directly to the interface. 2297 * 2298 * Exclude the SIOCSIF{ADDR,BRDADDR,DSTADDR,NETMASK} ioctls, 2299 * because drivers may trust these ioctls to come from an 2300 * already privileged layer and thus do not perform credentials 2301 * checks or input validation. 2302 */ 2303 if (error == EOPNOTSUPP && 2304 ifp->if_ioctl != NULL && 2305 cmd != SIOCSIFADDR && 2306 cmd != SIOCSIFBRDADDR && 2307 cmd != SIOCSIFDSTADDR && 2308 cmd != SIOCSIFNETMASK) { 2309 ifnet_serialize_all(ifp); 2310 error = ifp->if_ioctl(ifp, cmd, data, cred); 2311 ifnet_deserialize_all(ifp); 2312 } 2313 2314 if ((oif_flags ^ ifp->if_flags) & IFF_UP) { 2315 #ifdef INET6 2316 DELAY(100);/* XXX: temporary workaround for fxp issue*/ 2317 if (ifp->if_flags & IFF_UP) { 2318 crit_enter(); 2319 in6_if_up(ifp); 2320 crit_exit(); 2321 } 2322 #endif 2323 } 2324 break; 2325 } 2326 2327 ifnet_unlock(); 2328 return (error); 2329 } 2330 2331 /* 2332 * Set/clear promiscuous mode on interface ifp based on the truth value 2333 * of pswitch. The calls are reference counted so that only the first 2334 * "on" request actually has an effect, as does the final "off" request. 2335 * Results are undefined if the "off" and "on" requests are not matched. 2336 */ 2337 int 2338 ifpromisc(struct ifnet *ifp, int pswitch) 2339 { 2340 struct ifreq ifr; 2341 int error; 2342 int oldflags; 2343 2344 oldflags = ifp->if_flags; 2345 if (ifp->if_flags & IFF_PPROMISC) { 2346 /* Do nothing if device is in permanently promiscuous mode */ 2347 ifp->if_pcount += pswitch ? 1 : -1; 2348 return (0); 2349 } 2350 if (pswitch) { 2351 /* 2352 * If the device is not configured up, we cannot put it in 2353 * promiscuous mode. 2354 */ 2355 if ((ifp->if_flags & IFF_UP) == 0) 2356 return (ENETDOWN); 2357 if (ifp->if_pcount++ != 0) 2358 return (0); 2359 ifp->if_flags |= IFF_PROMISC; 2360 log(LOG_INFO, "%s: promiscuous mode enabled\n", 2361 ifp->if_xname); 2362 } else { 2363 if (--ifp->if_pcount > 0) 2364 return (0); 2365 ifp->if_flags &= ~IFF_PROMISC; 2366 log(LOG_INFO, "%s: promiscuous mode disabled\n", 2367 ifp->if_xname); 2368 } 2369 ifr.ifr_flags = ifp->if_flags; 2370 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2371 ifnet_serialize_all(ifp); 2372 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL); 2373 ifnet_deserialize_all(ifp); 2374 if (error == 0) 2375 rt_ifmsg(ifp); 2376 else 2377 ifp->if_flags = oldflags; 2378 return error; 2379 } 2380 2381 /* 2382 * Return interface configuration 2383 * of system. List may be used 2384 * in later ioctl's (above) to get 2385 * other information. 2386 */ 2387 static int 2388 ifconf(u_long cmd, caddr_t data, struct ucred *cred) 2389 { 2390 struct ifconf *ifc = (struct ifconf *)data; 2391 struct ifnet *ifp; 2392 struct sockaddr *sa; 2393 struct ifreq ifr, *ifrp; 2394 int space = ifc->ifc_len, error = 0; 2395 2396 ifrp = ifc->ifc_req; 2397 2398 ifnet_lock(); 2399 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 2400 struct ifaddr_container *ifac, *ifac_mark; 2401 struct ifaddr_marker mark; 2402 struct ifaddrhead *head; 2403 int addrs; 2404 2405 if (space <= sizeof ifr) 2406 break; 2407 2408 /* 2409 * Zero the stack declared structure first to prevent 2410 * memory disclosure. 2411 */ 2412 bzero(&ifr, sizeof(ifr)); 2413 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) 2414 >= sizeof(ifr.ifr_name)) { 2415 error = ENAMETOOLONG; 2416 break; 2417 } 2418 2419 /* 2420 * Add a marker, since copyout() could block and during that 2421 * period the list could be changed. Inserting the marker to 2422 * the header of the list will not cause trouble for the code 2423 * assuming that the first element of the list is AF_LINK; the 2424 * marker will be moved to the next position w/o blocking. 2425 */ 2426 ifa_marker_init(&mark, ifp); 2427 ifac_mark = &mark.ifac; 2428 head = &ifp->if_addrheads[mycpuid]; 2429 2430 addrs = 0; 2431 TAILQ_INSERT_HEAD(head, ifac_mark, ifa_link); 2432 while ((ifac = TAILQ_NEXT(ifac_mark, ifa_link)) != NULL) { 2433 struct ifaddr *ifa = ifac->ifa; 2434 2435 TAILQ_REMOVE(head, ifac_mark, ifa_link); 2436 TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link); 2437 2438 /* Ignore marker */ 2439 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 2440 continue; 2441 2442 if (space <= sizeof ifr) 2443 break; 2444 sa = ifa->ifa_addr; 2445 if (cred->cr_prison && 2446 prison_if(cred, sa)) 2447 continue; 2448 addrs++; 2449 /* 2450 * Keep a reference on this ifaddr, so that it will 2451 * not be destroyed when its address is copied to 2452 * the userland, which could block. 2453 */ 2454 IFAREF(ifa); 2455 if (sa->sa_len <= sizeof(*sa)) { 2456 ifr.ifr_addr = *sa; 2457 error = copyout(&ifr, ifrp, sizeof ifr); 2458 ifrp++; 2459 } else { 2460 if (space < (sizeof ifr) + sa->sa_len - 2461 sizeof(*sa)) { 2462 IFAFREE(ifa); 2463 break; 2464 } 2465 space -= sa->sa_len - sizeof(*sa); 2466 error = copyout(&ifr, ifrp, 2467 sizeof ifr.ifr_name); 2468 if (error == 0) 2469 error = copyout(sa, &ifrp->ifr_addr, 2470 sa->sa_len); 2471 ifrp = (struct ifreq *) 2472 (sa->sa_len + (caddr_t)&ifrp->ifr_addr); 2473 } 2474 IFAFREE(ifa); 2475 if (error) 2476 break; 2477 space -= sizeof ifr; 2478 } 2479 TAILQ_REMOVE(head, ifac_mark, ifa_link); 2480 if (error) 2481 break; 2482 if (!addrs) { 2483 bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr); 2484 error = copyout(&ifr, ifrp, sizeof ifr); 2485 if (error) 2486 break; 2487 space -= sizeof ifr; 2488 ifrp++; 2489 } 2490 } 2491 ifnet_unlock(); 2492 2493 ifc->ifc_len -= space; 2494 return (error); 2495 } 2496 2497 /* 2498 * Just like if_promisc(), but for all-multicast-reception mode. 2499 */ 2500 int 2501 if_allmulti(struct ifnet *ifp, int onswitch) 2502 { 2503 int error = 0; 2504 struct ifreq ifr; 2505 2506 crit_enter(); 2507 2508 if (onswitch) { 2509 if (ifp->if_amcount++ == 0) { 2510 ifp->if_flags |= IFF_ALLMULTI; 2511 ifr.ifr_flags = ifp->if_flags; 2512 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2513 ifnet_serialize_all(ifp); 2514 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2515 NULL); 2516 ifnet_deserialize_all(ifp); 2517 } 2518 } else { 2519 if (ifp->if_amcount > 1) { 2520 ifp->if_amcount--; 2521 } else { 2522 ifp->if_amcount = 0; 2523 ifp->if_flags &= ~IFF_ALLMULTI; 2524 ifr.ifr_flags = ifp->if_flags; 2525 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2526 ifnet_serialize_all(ifp); 2527 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2528 NULL); 2529 ifnet_deserialize_all(ifp); 2530 } 2531 } 2532 2533 crit_exit(); 2534 2535 if (error == 0) 2536 rt_ifmsg(ifp); 2537 return error; 2538 } 2539 2540 /* 2541 * Add a multicast listenership to the interface in question. 2542 * The link layer provides a routine which converts 2543 */ 2544 int 2545 if_addmulti_serialized(struct ifnet *ifp, struct sockaddr *sa, 2546 struct ifmultiaddr **retifma) 2547 { 2548 struct sockaddr *llsa, *dupsa; 2549 int error; 2550 struct ifmultiaddr *ifma; 2551 2552 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2553 2554 /* 2555 * If the matching multicast address already exists 2556 * then don't add a new one, just add a reference 2557 */ 2558 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2559 if (sa_equal(sa, ifma->ifma_addr)) { 2560 ifma->ifma_refcount++; 2561 if (retifma) 2562 *retifma = ifma; 2563 return 0; 2564 } 2565 } 2566 2567 /* 2568 * Give the link layer a chance to accept/reject it, and also 2569 * find out which AF_LINK address this maps to, if it isn't one 2570 * already. 2571 */ 2572 if (ifp->if_resolvemulti) { 2573 error = ifp->if_resolvemulti(ifp, &llsa, sa); 2574 if (error) 2575 return error; 2576 } else { 2577 llsa = NULL; 2578 } 2579 2580 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT); 2581 dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_INTWAIT); 2582 bcopy(sa, dupsa, sa->sa_len); 2583 2584 ifma->ifma_addr = dupsa; 2585 ifma->ifma_lladdr = llsa; 2586 ifma->ifma_ifp = ifp; 2587 ifma->ifma_refcount = 1; 2588 ifma->ifma_protospec = NULL; 2589 rt_newmaddrmsg(RTM_NEWMADDR, ifma); 2590 2591 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2592 if (retifma) 2593 *retifma = ifma; 2594 2595 if (llsa != NULL) { 2596 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2597 if (sa_equal(ifma->ifma_addr, llsa)) 2598 break; 2599 } 2600 if (ifma) { 2601 ifma->ifma_refcount++; 2602 } else { 2603 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT); 2604 dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_INTWAIT); 2605 bcopy(llsa, dupsa, llsa->sa_len); 2606 ifma->ifma_addr = dupsa; 2607 ifma->ifma_ifp = ifp; 2608 ifma->ifma_refcount = 1; 2609 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2610 } 2611 } 2612 /* 2613 * We are certain we have added something, so call down to the 2614 * interface to let them know about it. 2615 */ 2616 if (ifp->if_ioctl) 2617 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL); 2618 2619 return 0; 2620 } 2621 2622 int 2623 if_addmulti(struct ifnet *ifp, struct sockaddr *sa, 2624 struct ifmultiaddr **retifma) 2625 { 2626 int error; 2627 2628 ifnet_serialize_all(ifp); 2629 error = if_addmulti_serialized(ifp, sa, retifma); 2630 ifnet_deserialize_all(ifp); 2631 2632 return error; 2633 } 2634 2635 /* 2636 * Remove a reference to a multicast address on this interface. Yell 2637 * if the request does not match an existing membership. 2638 */ 2639 static int 2640 if_delmulti_serialized(struct ifnet *ifp, struct sockaddr *sa) 2641 { 2642 struct ifmultiaddr *ifma; 2643 2644 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2645 2646 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2647 if (sa_equal(sa, ifma->ifma_addr)) 2648 break; 2649 if (ifma == NULL) 2650 return ENOENT; 2651 2652 if (ifma->ifma_refcount > 1) { 2653 ifma->ifma_refcount--; 2654 return 0; 2655 } 2656 2657 rt_newmaddrmsg(RTM_DELMADDR, ifma); 2658 sa = ifma->ifma_lladdr; 2659 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2660 /* 2661 * Make sure the interface driver is notified 2662 * in the case of a link layer mcast group being left. 2663 */ 2664 if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) 2665 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2666 kfree(ifma->ifma_addr, M_IFMADDR); 2667 kfree(ifma, M_IFMADDR); 2668 if (sa == NULL) 2669 return 0; 2670 2671 /* 2672 * Now look for the link-layer address which corresponds to 2673 * this network address. It had been squirreled away in 2674 * ifma->ifma_lladdr for this purpose (so we don't have 2675 * to call ifp->if_resolvemulti() again), and we saved that 2676 * value in sa above. If some nasty deleted the 2677 * link-layer address out from underneath us, we can deal because 2678 * the address we stored was is not the same as the one which was 2679 * in the record for the link-layer address. (So we don't complain 2680 * in that case.) 2681 */ 2682 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2683 if (sa_equal(sa, ifma->ifma_addr)) 2684 break; 2685 if (ifma == NULL) 2686 return 0; 2687 2688 if (ifma->ifma_refcount > 1) { 2689 ifma->ifma_refcount--; 2690 return 0; 2691 } 2692 2693 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2694 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2695 kfree(ifma->ifma_addr, M_IFMADDR); 2696 kfree(sa, M_IFMADDR); 2697 kfree(ifma, M_IFMADDR); 2698 2699 return 0; 2700 } 2701 2702 int 2703 if_delmulti(struct ifnet *ifp, struct sockaddr *sa) 2704 { 2705 int error; 2706 2707 ifnet_serialize_all(ifp); 2708 error = if_delmulti_serialized(ifp, sa); 2709 ifnet_deserialize_all(ifp); 2710 2711 return error; 2712 } 2713 2714 /* 2715 * Delete all multicast group membership for an interface. 2716 * Should be used to quickly flush all multicast filters. 2717 */ 2718 void 2719 if_delallmulti_serialized(struct ifnet *ifp) 2720 { 2721 struct ifmultiaddr *ifma, mark; 2722 struct sockaddr sa; 2723 2724 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2725 2726 bzero(&sa, sizeof(sa)); 2727 sa.sa_family = AF_UNSPEC; 2728 sa.sa_len = sizeof(sa); 2729 2730 bzero(&mark, sizeof(mark)); 2731 mark.ifma_addr = &sa; 2732 2733 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, &mark, ifma_link); 2734 while ((ifma = TAILQ_NEXT(&mark, ifma_link)) != NULL) { 2735 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); 2736 TAILQ_INSERT_AFTER(&ifp->if_multiaddrs, ifma, &mark, 2737 ifma_link); 2738 2739 if (ifma->ifma_addr->sa_family == AF_UNSPEC) 2740 continue; 2741 2742 if_delmulti_serialized(ifp, ifma->ifma_addr); 2743 } 2744 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); 2745 } 2746 2747 2748 /* 2749 * Set the link layer address on an interface. 2750 * 2751 * At this time we only support certain types of interfaces, 2752 * and we don't allow the length of the address to change. 2753 */ 2754 int 2755 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) 2756 { 2757 struct sockaddr_dl *sdl; 2758 struct ifreq ifr; 2759 2760 sdl = IF_LLSOCKADDR(ifp); 2761 if (sdl == NULL) 2762 return (EINVAL); 2763 if (len != sdl->sdl_alen) /* don't allow length to change */ 2764 return (EINVAL); 2765 switch (ifp->if_type) { 2766 case IFT_ETHER: /* these types use struct arpcom */ 2767 case IFT_XETHER: 2768 case IFT_L2VLAN: 2769 case IFT_IEEE8023ADLAG: 2770 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len); 2771 bcopy(lladdr, LLADDR(sdl), len); 2772 break; 2773 default: 2774 return (ENODEV); 2775 } 2776 /* 2777 * If the interface is already up, we need 2778 * to re-init it in order to reprogram its 2779 * address filter. 2780 */ 2781 ifnet_serialize_all(ifp); 2782 if ((ifp->if_flags & IFF_UP) != 0) { 2783 #ifdef INET 2784 struct ifaddr_container *ifac; 2785 #endif 2786 2787 ifp->if_flags &= ~IFF_UP; 2788 ifr.ifr_flags = ifp->if_flags; 2789 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2790 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2791 NULL); 2792 ifp->if_flags |= IFF_UP; 2793 ifr.ifr_flags = ifp->if_flags; 2794 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2795 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2796 NULL); 2797 #ifdef INET 2798 /* 2799 * Also send gratuitous ARPs to notify other nodes about 2800 * the address change. 2801 */ 2802 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 2803 struct ifaddr *ifa = ifac->ifa; 2804 2805 if (ifa->ifa_addr != NULL && 2806 ifa->ifa_addr->sa_family == AF_INET) 2807 arp_gratuitous(ifp, ifa); 2808 } 2809 #endif 2810 } 2811 ifnet_deserialize_all(ifp); 2812 return (0); 2813 } 2814 2815 struct ifmultiaddr * 2816 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp) 2817 { 2818 struct ifmultiaddr *ifma; 2819 2820 /* TODO: need ifnet_serialize_main */ 2821 ifnet_serialize_all(ifp); 2822 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2823 if (sa_equal(ifma->ifma_addr, sa)) 2824 break; 2825 ifnet_deserialize_all(ifp); 2826 2827 return ifma; 2828 } 2829 2830 /* 2831 * This function locates the first real ethernet MAC from a network 2832 * card and loads it into node, returning 0 on success or ENOENT if 2833 * no suitable interfaces were found. It is used by the uuid code to 2834 * generate a unique 6-byte number. 2835 */ 2836 int 2837 if_getanyethermac(uint16_t *node, int minlen) 2838 { 2839 struct ifnet *ifp; 2840 struct sockaddr_dl *sdl; 2841 2842 ifnet_lock(); 2843 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 2844 if (ifp->if_type != IFT_ETHER) 2845 continue; 2846 sdl = IF_LLSOCKADDR(ifp); 2847 if (sdl->sdl_alen < minlen) 2848 continue; 2849 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node, 2850 minlen); 2851 ifnet_unlock(); 2852 return(0); 2853 } 2854 ifnet_unlock(); 2855 return (ENOENT); 2856 } 2857 2858 /* 2859 * The name argument must be a pointer to storage which will last as 2860 * long as the interface does. For physical devices, the result of 2861 * device_get_name(dev) is a good choice and for pseudo-devices a 2862 * static string works well. 2863 */ 2864 void 2865 if_initname(struct ifnet *ifp, const char *name, int unit) 2866 { 2867 ifp->if_dname = name; 2868 ifp->if_dunit = unit; 2869 if (unit != IF_DUNIT_NONE) 2870 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); 2871 else 2872 strlcpy(ifp->if_xname, name, IFNAMSIZ); 2873 } 2874 2875 int 2876 if_printf(struct ifnet *ifp, const char *fmt, ...) 2877 { 2878 __va_list ap; 2879 int retval; 2880 2881 retval = kprintf("%s: ", ifp->if_xname); 2882 __va_start(ap, fmt); 2883 retval += kvprintf(fmt, ap); 2884 __va_end(ap); 2885 return (retval); 2886 } 2887 2888 struct ifnet * 2889 if_alloc(uint8_t type) 2890 { 2891 struct ifnet *ifp; 2892 size_t size; 2893 2894 /* 2895 * XXX temporary hack until arpcom is setup in if_l2com 2896 */ 2897 if (type == IFT_ETHER) 2898 size = sizeof(struct arpcom); 2899 else 2900 size = sizeof(struct ifnet); 2901 2902 ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO); 2903 2904 ifp->if_type = type; 2905 2906 if (if_com_alloc[type] != NULL) { 2907 ifp->if_l2com = if_com_alloc[type](type, ifp); 2908 if (ifp->if_l2com == NULL) { 2909 kfree(ifp, M_IFNET); 2910 return (NULL); 2911 } 2912 } 2913 return (ifp); 2914 } 2915 2916 void 2917 if_free(struct ifnet *ifp) 2918 { 2919 kfree(ifp, M_IFNET); 2920 } 2921 2922 void 2923 ifq_set_classic(struct ifaltq *ifq) 2924 { 2925 ifq_set_methods(ifq, ifq->altq_ifp->if_mapsubq, 2926 ifsq_classic_enqueue, ifsq_classic_dequeue, ifsq_classic_request); 2927 } 2928 2929 void 2930 ifq_set_methods(struct ifaltq *ifq, altq_mapsubq_t mapsubq, 2931 ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request) 2932 { 2933 int q; 2934 2935 KASSERT(mapsubq != NULL, ("mapsubq is not specified")); 2936 KASSERT(enqueue != NULL, ("enqueue is not specified")); 2937 KASSERT(dequeue != NULL, ("dequeue is not specified")); 2938 KASSERT(request != NULL, ("request is not specified")); 2939 2940 ifq->altq_mapsubq = mapsubq; 2941 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 2942 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 2943 2944 ifsq->ifsq_enqueue = enqueue; 2945 ifsq->ifsq_dequeue = dequeue; 2946 ifsq->ifsq_request = request; 2947 } 2948 } 2949 2950 static void 2951 ifsq_norm_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) 2952 { 2953 2954 classq_add(&ifsq->ifsq_norm, m); 2955 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); 2956 } 2957 2958 static void 2959 ifsq_prio_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) 2960 { 2961 2962 classq_add(&ifsq->ifsq_prio, m); 2963 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); 2964 ALTQ_SQ_PRIO_CNTR_INC(ifsq, m->m_pkthdr.len); 2965 } 2966 2967 static struct mbuf * 2968 ifsq_norm_dequeue(struct ifaltq_subque *ifsq) 2969 { 2970 struct mbuf *m; 2971 2972 m = classq_get(&ifsq->ifsq_norm); 2973 if (m != NULL) 2974 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); 2975 return (m); 2976 } 2977 2978 static struct mbuf * 2979 ifsq_prio_dequeue(struct ifaltq_subque *ifsq) 2980 { 2981 struct mbuf *m; 2982 2983 m = classq_get(&ifsq->ifsq_prio); 2984 if (m != NULL) { 2985 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); 2986 ALTQ_SQ_PRIO_CNTR_DEC(ifsq, m->m_pkthdr.len); 2987 } 2988 return (m); 2989 } 2990 2991 int 2992 ifsq_classic_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m, 2993 struct altq_pktattr *pa __unused) 2994 { 2995 2996 M_ASSERTPKTHDR(m); 2997 again: 2998 if (ifsq->ifsq_len >= ifsq->ifsq_maxlen || 2999 ifsq->ifsq_bcnt >= ifsq->ifsq_maxbcnt) { 3000 struct mbuf *m_drop; 3001 3002 if (m->m_flags & M_PRIO) { 3003 m_drop = NULL; 3004 if (ifsq->ifsq_prio_len < (ifsq->ifsq_maxlen >> 1) && 3005 ifsq->ifsq_prio_bcnt < (ifsq->ifsq_maxbcnt >> 1)) { 3006 /* Try dropping some from normal queue. */ 3007 m_drop = ifsq_norm_dequeue(ifsq); 3008 } 3009 if (m_drop == NULL) 3010 m_drop = ifsq_prio_dequeue(ifsq); 3011 } else { 3012 m_drop = ifsq_norm_dequeue(ifsq); 3013 } 3014 if (m_drop != NULL) { 3015 IFNET_STAT_INC(ifsq->ifsq_ifp, oqdrops, 1); 3016 m_freem(m_drop); 3017 goto again; 3018 } 3019 /* 3020 * No old packets could be dropped! 3021 * NOTE: Caller increases oqdrops. 3022 */ 3023 m_freem(m); 3024 return (ENOBUFS); 3025 } else { 3026 if (m->m_flags & M_PRIO) 3027 ifsq_prio_enqueue(ifsq, m); 3028 else 3029 ifsq_norm_enqueue(ifsq, m); 3030 return (0); 3031 } 3032 } 3033 3034 struct mbuf * 3035 ifsq_classic_dequeue(struct ifaltq_subque *ifsq, int op) 3036 { 3037 struct mbuf *m; 3038 3039 switch (op) { 3040 case ALTDQ_POLL: 3041 m = classq_head(&ifsq->ifsq_prio); 3042 if (m == NULL) 3043 m = classq_head(&ifsq->ifsq_norm); 3044 break; 3045 3046 case ALTDQ_REMOVE: 3047 m = ifsq_prio_dequeue(ifsq); 3048 if (m == NULL) 3049 m = ifsq_norm_dequeue(ifsq); 3050 break; 3051 3052 default: 3053 panic("unsupported ALTQ dequeue op: %d", op); 3054 } 3055 return m; 3056 } 3057 3058 int 3059 ifsq_classic_request(struct ifaltq_subque *ifsq, int req, void *arg) 3060 { 3061 switch (req) { 3062 case ALTRQ_PURGE: 3063 for (;;) { 3064 struct mbuf *m; 3065 3066 m = ifsq_classic_dequeue(ifsq, ALTDQ_REMOVE); 3067 if (m == NULL) 3068 break; 3069 m_freem(m); 3070 } 3071 break; 3072 3073 default: 3074 panic("unsupported ALTQ request: %d", req); 3075 } 3076 return 0; 3077 } 3078 3079 static void 3080 ifsq_ifstart_try(struct ifaltq_subque *ifsq, int force_sched) 3081 { 3082 struct ifnet *ifp = ifsq_get_ifp(ifsq); 3083 int running = 0, need_sched; 3084 3085 /* 3086 * Try to do direct ifnet.if_start on the subqueue first, if there is 3087 * contention on the subqueue hardware serializer, ifnet.if_start on 3088 * the subqueue will be scheduled on the subqueue owner CPU. 3089 */ 3090 if (!ifsq_tryserialize_hw(ifsq)) { 3091 /* 3092 * Subqueue hardware serializer contention happened, 3093 * ifnet.if_start on the subqueue is scheduled on 3094 * the subqueue owner CPU, and we keep going. 3095 */ 3096 ifsq_ifstart_schedule(ifsq, 1); 3097 return; 3098 } 3099 3100 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 3101 ifp->if_start(ifp, ifsq); 3102 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 3103 running = 1; 3104 } 3105 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 3106 3107 ifsq_deserialize_hw(ifsq); 3108 3109 if (need_sched) { 3110 /* 3111 * More data need to be transmitted, ifnet.if_start on the 3112 * subqueue is scheduled on the subqueue owner CPU, and we 3113 * keep going. 3114 * NOTE: ifnet.if_start subqueue interlock is not released. 3115 */ 3116 ifsq_ifstart_schedule(ifsq, force_sched); 3117 } 3118 } 3119 3120 /* 3121 * Subqeue packets staging mechanism: 3122 * 3123 * The packets enqueued into the subqueue are staged to a certain amount 3124 * before the ifnet.if_start on the subqueue is called. In this way, the 3125 * driver could avoid writing to hardware registers upon every packet, 3126 * instead, hardware registers could be written when certain amount of 3127 * packets are put onto hardware TX ring. The measurement on several modern 3128 * NICs (emx(4), igb(4), bnx(4), bge(4), jme(4)) shows that the hardware 3129 * registers writing aggregation could save ~20% CPU time when 18bytes UDP 3130 * datagrams are transmitted at 1.48Mpps. The performance improvement by 3131 * hardware registers writing aggeregation is also mentioned by Luigi Rizzo's 3132 * netmap paper (http://info.iet.unipi.it/~luigi/netmap/). 3133 * 3134 * Subqueue packets staging is performed for two entry points into drivers' 3135 * transmission function: 3136 * - Direct ifnet.if_start calling on the subqueue, i.e. ifsq_ifstart_try() 3137 * - ifnet.if_start scheduling on the subqueue, i.e. ifsq_ifstart_schedule() 3138 * 3139 * Subqueue packets staging will be stopped upon any of the following 3140 * conditions: 3141 * - If the count of packets enqueued on the current CPU is great than or 3142 * equal to ifsq_stage_cntmax. (XXX this should be per-interface) 3143 * - If the total length of packets enqueued on the current CPU is great 3144 * than or equal to the hardware's MTU - max_protohdr. max_protohdr is 3145 * cut from the hardware's MTU mainly bacause a full TCP segment's size 3146 * is usually less than hardware's MTU. 3147 * - ifsq_ifstart_schedule() is not pending on the current CPU and 3148 * ifnet.if_start subqueue interlock (ifaltq_subq.ifsq_started) is not 3149 * released. 3150 * - The if_start_rollup(), which is registered as low priority netisr 3151 * rollup function, is called; probably because no more work is pending 3152 * for netisr. 3153 * 3154 * NOTE: 3155 * Currently subqueue packet staging is only performed in netisr threads. 3156 */ 3157 int 3158 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa) 3159 { 3160 struct ifaltq *ifq = &ifp->if_snd; 3161 struct ifaltq_subque *ifsq; 3162 int error, start = 0, len, mcast = 0, avoid_start = 0; 3163 struct ifsubq_stage_head *head = NULL; 3164 struct ifsubq_stage *stage = NULL; 3165 struct globaldata *gd = mycpu; 3166 struct thread *td = gd->gd_curthread; 3167 3168 crit_enter_quick(td); 3169 3170 ifsq = ifq_map_subq(ifq, gd->gd_cpuid); 3171 ASSERT_ALTQ_SQ_NOT_SERIALIZED_HW(ifsq); 3172 3173 len = m->m_pkthdr.len; 3174 if (m->m_flags & M_MCAST) 3175 mcast = 1; 3176 3177 if (td->td_type == TD_TYPE_NETISR) { 3178 head = &ifsubq_stage_heads[mycpuid]; 3179 stage = ifsq_get_stage(ifsq, mycpuid); 3180 3181 stage->stg_cnt++; 3182 stage->stg_len += len; 3183 if (stage->stg_cnt < ifsq_stage_cntmax && 3184 stage->stg_len < (ifp->if_mtu - max_protohdr)) 3185 avoid_start = 1; 3186 } 3187 3188 ALTQ_SQ_LOCK(ifsq); 3189 error = ifsq_enqueue_locked(ifsq, m, pa); 3190 if (error) { 3191 IFNET_STAT_INC(ifp, oqdrops, 1); 3192 if (!ifsq_data_ready(ifsq)) { 3193 ALTQ_SQ_UNLOCK(ifsq); 3194 crit_exit_quick(td); 3195 return error; 3196 } 3197 avoid_start = 0; 3198 } 3199 if (!ifsq_is_started(ifsq)) { 3200 if (avoid_start) { 3201 ALTQ_SQ_UNLOCK(ifsq); 3202 3203 KKASSERT(!error); 3204 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 3205 ifsq_stage_insert(head, stage); 3206 3207 IFNET_STAT_INC(ifp, obytes, len); 3208 if (mcast) 3209 IFNET_STAT_INC(ifp, omcasts, 1); 3210 crit_exit_quick(td); 3211 return error; 3212 } 3213 3214 /* 3215 * Hold the subqueue interlock of ifnet.if_start 3216 */ 3217 ifsq_set_started(ifsq); 3218 start = 1; 3219 } 3220 ALTQ_SQ_UNLOCK(ifsq); 3221 3222 if (!error) { 3223 IFNET_STAT_INC(ifp, obytes, len); 3224 if (mcast) 3225 IFNET_STAT_INC(ifp, omcasts, 1); 3226 } 3227 3228 if (stage != NULL) { 3229 if (!start && (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)) { 3230 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 3231 if (!avoid_start) { 3232 ifsq_stage_remove(head, stage); 3233 ifsq_ifstart_schedule(ifsq, 1); 3234 } 3235 crit_exit_quick(td); 3236 return error; 3237 } 3238 3239 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) { 3240 ifsq_stage_remove(head, stage); 3241 } else { 3242 stage->stg_cnt = 0; 3243 stage->stg_len = 0; 3244 } 3245 } 3246 3247 if (!start) { 3248 crit_exit_quick(td); 3249 return error; 3250 } 3251 3252 ifsq_ifstart_try(ifsq, 0); 3253 3254 crit_exit_quick(td); 3255 return error; 3256 } 3257 3258 void * 3259 ifa_create(int size) 3260 { 3261 struct ifaddr *ifa; 3262 int i; 3263 3264 KASSERT(size >= sizeof(*ifa), ("ifaddr size too small")); 3265 3266 ifa = kmalloc(size, M_IFADDR, M_INTWAIT | M_ZERO); 3267 3268 /* 3269 * Make ifa_container availabel on all CPUs, since they 3270 * could be accessed by any threads. 3271 */ 3272 ifa->ifa_containers = 3273 kmalloc_cachealign(ncpus * sizeof(struct ifaddr_container), 3274 M_IFADDR, M_INTWAIT | M_ZERO); 3275 3276 ifa->ifa_ncnt = ncpus; 3277 for (i = 0; i < ncpus; ++i) { 3278 struct ifaddr_container *ifac = &ifa->ifa_containers[i]; 3279 3280 ifac->ifa_magic = IFA_CONTAINER_MAGIC; 3281 ifac->ifa = ifa; 3282 ifac->ifa_refcnt = 1; 3283 } 3284 #ifdef IFADDR_DEBUG 3285 kprintf("alloc ifa %p %d\n", ifa, size); 3286 #endif 3287 return ifa; 3288 } 3289 3290 void 3291 ifac_free(struct ifaddr_container *ifac, int cpu_id) 3292 { 3293 struct ifaddr *ifa = ifac->ifa; 3294 3295 KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC); 3296 KKASSERT(ifac->ifa_refcnt == 0); 3297 KASSERT(ifac->ifa_listmask == 0, 3298 ("ifa is still on %#x lists", ifac->ifa_listmask)); 3299 3300 ifac->ifa_magic = IFA_CONTAINER_DEAD; 3301 3302 #ifdef IFADDR_DEBUG_VERBOSE 3303 kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id); 3304 #endif 3305 3306 KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus, 3307 ("invalid # of ifac, %d", ifa->ifa_ncnt)); 3308 if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) { 3309 #ifdef IFADDR_DEBUG 3310 kprintf("free ifa %p\n", ifa); 3311 #endif 3312 kfree(ifa->ifa_containers, M_IFADDR); 3313 kfree(ifa, M_IFADDR); 3314 } 3315 } 3316 3317 static void 3318 ifa_iflink_dispatch(netmsg_t nmsg) 3319 { 3320 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3321 struct ifaddr *ifa = msg->ifa; 3322 struct ifnet *ifp = msg->ifp; 3323 int cpu = mycpuid; 3324 struct ifaddr_container *ifac; 3325 3326 crit_enter(); 3327 3328 ifac = &ifa->ifa_containers[cpu]; 3329 ASSERT_IFAC_VALID(ifac); 3330 KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0, 3331 ("ifaddr is on if_addrheads")); 3332 3333 ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD; 3334 if (msg->tail) 3335 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link); 3336 else 3337 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link); 3338 3339 crit_exit(); 3340 3341 netisr_forwardmsg_all(&nmsg->base, cpu + 1); 3342 } 3343 3344 void 3345 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail) 3346 { 3347 struct netmsg_ifaddr msg; 3348 3349 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3350 0, ifa_iflink_dispatch); 3351 msg.ifa = ifa; 3352 msg.ifp = ifp; 3353 msg.tail = tail; 3354 3355 netisr_domsg(&msg.base, 0); 3356 } 3357 3358 static void 3359 ifa_ifunlink_dispatch(netmsg_t nmsg) 3360 { 3361 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3362 struct ifaddr *ifa = msg->ifa; 3363 struct ifnet *ifp = msg->ifp; 3364 int cpu = mycpuid; 3365 struct ifaddr_container *ifac; 3366 3367 crit_enter(); 3368 3369 ifac = &ifa->ifa_containers[cpu]; 3370 ASSERT_IFAC_VALID(ifac); 3371 KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD, 3372 ("ifaddr is not on if_addrhead")); 3373 3374 TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link); 3375 ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD; 3376 3377 crit_exit(); 3378 3379 netisr_forwardmsg_all(&nmsg->base, cpu + 1); 3380 } 3381 3382 void 3383 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp) 3384 { 3385 struct netmsg_ifaddr msg; 3386 3387 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3388 0, ifa_ifunlink_dispatch); 3389 msg.ifa = ifa; 3390 msg.ifp = ifp; 3391 3392 netisr_domsg(&msg.base, 0); 3393 } 3394 3395 static void 3396 ifa_destroy_dispatch(netmsg_t nmsg) 3397 { 3398 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3399 3400 IFAFREE(msg->ifa); 3401 netisr_forwardmsg_all(&nmsg->base, mycpuid + 1); 3402 } 3403 3404 void 3405 ifa_destroy(struct ifaddr *ifa) 3406 { 3407 struct netmsg_ifaddr msg; 3408 3409 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3410 0, ifa_destroy_dispatch); 3411 msg.ifa = ifa; 3412 3413 netisr_domsg(&msg.base, 0); 3414 } 3415 3416 static void 3417 if_start_rollup(void) 3418 { 3419 struct ifsubq_stage_head *head = &ifsubq_stage_heads[mycpuid]; 3420 struct ifsubq_stage *stage; 3421 3422 crit_enter(); 3423 3424 while ((stage = TAILQ_FIRST(&head->stg_head)) != NULL) { 3425 struct ifaltq_subque *ifsq = stage->stg_subq; 3426 int is_sched = 0; 3427 3428 if (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED) 3429 is_sched = 1; 3430 ifsq_stage_remove(head, stage); 3431 3432 if (is_sched) { 3433 ifsq_ifstart_schedule(ifsq, 1); 3434 } else { 3435 int start = 0; 3436 3437 ALTQ_SQ_LOCK(ifsq); 3438 if (!ifsq_is_started(ifsq)) { 3439 /* 3440 * Hold the subqueue interlock of 3441 * ifnet.if_start 3442 */ 3443 ifsq_set_started(ifsq); 3444 start = 1; 3445 } 3446 ALTQ_SQ_UNLOCK(ifsq); 3447 3448 if (start) 3449 ifsq_ifstart_try(ifsq, 1); 3450 } 3451 KKASSERT((stage->stg_flags & 3452 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 3453 } 3454 3455 crit_exit(); 3456 } 3457 3458 static void 3459 ifnetinit(void *dummy __unused) 3460 { 3461 int i; 3462 3463 /* XXX netisr_ncpus */ 3464 for (i = 0; i < ncpus; ++i) 3465 TAILQ_INIT(&ifsubq_stage_heads[i].stg_head); 3466 netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART); 3467 } 3468 3469 void 3470 if_register_com_alloc(u_char type, 3471 if_com_alloc_t *a, if_com_free_t *f) 3472 { 3473 3474 KASSERT(if_com_alloc[type] == NULL, 3475 ("if_register_com_alloc: %d already registered", type)); 3476 KASSERT(if_com_free[type] == NULL, 3477 ("if_register_com_alloc: %d free already registered", type)); 3478 3479 if_com_alloc[type] = a; 3480 if_com_free[type] = f; 3481 } 3482 3483 void 3484 if_deregister_com_alloc(u_char type) 3485 { 3486 3487 KASSERT(if_com_alloc[type] != NULL, 3488 ("if_deregister_com_alloc: %d not registered", type)); 3489 KASSERT(if_com_free[type] != NULL, 3490 ("if_deregister_com_alloc: %d free not registered", type)); 3491 if_com_alloc[type] = NULL; 3492 if_com_free[type] = NULL; 3493 } 3494 3495 void 3496 ifq_set_maxlen(struct ifaltq *ifq, int len) 3497 { 3498 ifq->altq_maxlen = len + (ncpus * ifsq_stage_cntmax); 3499 } 3500 3501 int 3502 ifq_mapsubq_default(struct ifaltq *ifq __unused, int cpuid __unused) 3503 { 3504 return ALTQ_SUBQ_INDEX_DEFAULT; 3505 } 3506 3507 int 3508 ifq_mapsubq_modulo(struct ifaltq *ifq, int cpuid) 3509 { 3510 3511 return (cpuid % ifq->altq_subq_mappriv); 3512 } 3513 3514 static void 3515 ifsq_watchdog(void *arg) 3516 { 3517 struct ifsubq_watchdog *wd = arg; 3518 struct ifnet *ifp; 3519 3520 if (__predict_true(wd->wd_timer == 0 || --wd->wd_timer)) 3521 goto done; 3522 3523 ifp = ifsq_get_ifp(wd->wd_subq); 3524 if (ifnet_tryserialize_all(ifp)) { 3525 wd->wd_watchdog(wd->wd_subq); 3526 ifnet_deserialize_all(ifp); 3527 } else { 3528 /* try again next timeout */ 3529 wd->wd_timer = 1; 3530 } 3531 done: 3532 ifsq_watchdog_reset(wd); 3533 } 3534 3535 static void 3536 ifsq_watchdog_reset(struct ifsubq_watchdog *wd) 3537 { 3538 callout_reset_bycpu(&wd->wd_callout, hz, ifsq_watchdog, wd, 3539 ifsq_get_cpuid(wd->wd_subq)); 3540 } 3541 3542 void 3543 ifsq_watchdog_init(struct ifsubq_watchdog *wd, struct ifaltq_subque *ifsq, 3544 ifsq_watchdog_t watchdog) 3545 { 3546 callout_init_mp(&wd->wd_callout); 3547 wd->wd_timer = 0; 3548 wd->wd_subq = ifsq; 3549 wd->wd_watchdog = watchdog; 3550 } 3551 3552 void 3553 ifsq_watchdog_start(struct ifsubq_watchdog *wd) 3554 { 3555 wd->wd_timer = 0; 3556 ifsq_watchdog_reset(wd); 3557 } 3558 3559 void 3560 ifsq_watchdog_stop(struct ifsubq_watchdog *wd) 3561 { 3562 wd->wd_timer = 0; 3563 callout_stop(&wd->wd_callout); 3564 } 3565 3566 void 3567 ifnet_lock(void) 3568 { 3569 KASSERT(curthread->td_type != TD_TYPE_NETISR, 3570 ("try holding ifnet lock in netisr")); 3571 mtx_lock(&ifnet_mtx); 3572 } 3573 3574 void 3575 ifnet_unlock(void) 3576 { 3577 KASSERT(curthread->td_type != TD_TYPE_NETISR, 3578 ("try holding ifnet lock in netisr")); 3579 mtx_unlock(&ifnet_mtx); 3580 } 3581 3582 static struct ifnet_array * 3583 ifnet_array_alloc(int count) 3584 { 3585 struct ifnet_array *arr; 3586 3587 arr = kmalloc(__offsetof(struct ifnet_array, ifnet_arr[count]), 3588 M_IFNET, M_WAITOK); 3589 arr->ifnet_count = count; 3590 3591 return arr; 3592 } 3593 3594 static void 3595 ifnet_array_free(struct ifnet_array *arr) 3596 { 3597 if (arr == &ifnet_array0) 3598 return; 3599 kfree(arr, M_IFNET); 3600 } 3601 3602 static struct ifnet_array * 3603 ifnet_array_add(struct ifnet *ifp, const struct ifnet_array *old_arr) 3604 { 3605 struct ifnet_array *arr; 3606 int count, i; 3607 3608 KASSERT(old_arr->ifnet_count >= 0, 3609 ("invalid ifnet array count %d", old_arr->ifnet_count)); 3610 count = old_arr->ifnet_count + 1; 3611 arr = ifnet_array_alloc(count); 3612 3613 /* 3614 * Save the old ifnet array and append this ifp to the end of 3615 * the new ifnet array. 3616 */ 3617 for (i = 0; i < old_arr->ifnet_count; ++i) { 3618 KASSERT(old_arr->ifnet_arr[i] != ifp, 3619 ("%s is already in ifnet array", ifp->if_xname)); 3620 arr->ifnet_arr[i] = old_arr->ifnet_arr[i]; 3621 } 3622 KASSERT(i == count - 1, 3623 ("add %s, ifnet array index mismatch, should be %d, but got %d", 3624 ifp->if_xname, count - 1, i)); 3625 arr->ifnet_arr[i] = ifp; 3626 3627 return arr; 3628 } 3629 3630 static struct ifnet_array * 3631 ifnet_array_del(struct ifnet *ifp, const struct ifnet_array *old_arr) 3632 { 3633 struct ifnet_array *arr; 3634 int count, i, idx, found = 0; 3635 3636 KASSERT(old_arr->ifnet_count > 0, 3637 ("invalid ifnet array count %d", old_arr->ifnet_count)); 3638 count = old_arr->ifnet_count - 1; 3639 arr = ifnet_array_alloc(count); 3640 3641 /* 3642 * Save the old ifnet array, but skip this ifp. 3643 */ 3644 idx = 0; 3645 for (i = 0; i < old_arr->ifnet_count; ++i) { 3646 if (old_arr->ifnet_arr[i] == ifp) { 3647 KASSERT(!found, 3648 ("dup %s is in ifnet array", ifp->if_xname)); 3649 found = 1; 3650 continue; 3651 } 3652 KASSERT(idx < count, 3653 ("invalid ifnet array index %d, count %d", idx, count)); 3654 arr->ifnet_arr[idx] = old_arr->ifnet_arr[i]; 3655 ++idx; 3656 } 3657 KASSERT(found, ("%s is not in ifnet array", ifp->if_xname)); 3658 KASSERT(idx == count, 3659 ("del %s, ifnet array count mismatch, should be %d, but got %d ", 3660 ifp->if_xname, count, idx)); 3661 3662 return arr; 3663 } 3664 3665 const struct ifnet_array * 3666 ifnet_array_get(void) 3667 { 3668 const struct ifnet_array *ret; 3669 3670 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 3671 ret = ifnet_array; 3672 /* Make sure 'ret' is really used. */ 3673 cpu_ccfence(); 3674 return (ret); 3675 } 3676 3677 int 3678 ifnet_array_isempty(void) 3679 { 3680 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 3681 if (ifnet_array->ifnet_count == 0) 3682 return 1; 3683 else 3684 return 0; 3685 } 3686 3687 void 3688 ifa_marker_init(struct ifaddr_marker *mark, struct ifnet *ifp) 3689 { 3690 struct ifaddr *ifa; 3691 3692 memset(mark, 0, sizeof(*mark)); 3693 ifa = &mark->ifa; 3694 3695 mark->ifac.ifa = ifa; 3696 3697 ifa->ifa_addr = &mark->addr; 3698 ifa->ifa_dstaddr = &mark->dstaddr; 3699 ifa->ifa_netmask = &mark->netmask; 3700 ifa->ifa_ifp = ifp; 3701 } 3702 3703 static int 3704 if_ringcnt_fixup(int ring_cnt, int ring_cntmax) 3705 { 3706 3707 KASSERT(ring_cntmax > 0, ("invalid ring count max %d", ring_cntmax)); 3708 3709 if (ring_cnt <= 0 || ring_cnt > ring_cntmax) 3710 ring_cnt = ring_cntmax; 3711 if (ring_cnt > netisr_ncpus) 3712 ring_cnt = netisr_ncpus; 3713 return (ring_cnt); 3714 } 3715 3716 static void 3717 if_ringmap_set_grid(device_t dev, struct if_ringmap *rm, int grid) 3718 { 3719 int i, offset; 3720 3721 KASSERT(grid > 0, ("invalid if_ringmap grid %d", grid)); 3722 KASSERT(grid >= rm->rm_cnt, ("invalid if_ringmap grid %d, count %d", 3723 grid, rm->rm_cnt)); 3724 rm->rm_grid = grid; 3725 3726 offset = (rm->rm_grid * device_get_unit(dev)) % netisr_ncpus; 3727 for (i = 0; i < rm->rm_cnt; ++i) { 3728 rm->rm_cpumap[i] = offset + i; 3729 KASSERT(rm->rm_cpumap[i] < netisr_ncpus, 3730 ("invalid cpumap[%d] = %d, offset %d", i, 3731 rm->rm_cpumap[i], offset)); 3732 } 3733 } 3734 3735 static struct if_ringmap * 3736 if_ringmap_alloc_flags(device_t dev, int ring_cnt, int ring_cntmax, 3737 uint32_t flags) 3738 { 3739 struct if_ringmap *rm; 3740 int i, grid = 0, prev_grid; 3741 3742 ring_cnt = if_ringcnt_fixup(ring_cnt, ring_cntmax); 3743 rm = kmalloc(__offsetof(struct if_ringmap, rm_cpumap[ring_cnt]), 3744 M_DEVBUF, M_WAITOK | M_ZERO); 3745 3746 rm->rm_cnt = ring_cnt; 3747 if (flags & RINGMAP_FLAG_POWEROF2) 3748 rm->rm_cnt = 1 << (fls(rm->rm_cnt) - 1); 3749 3750 prev_grid = netisr_ncpus; 3751 for (i = 0; i < netisr_ncpus; ++i) { 3752 if (netisr_ncpus % (i + 1) != 0) 3753 continue; 3754 3755 grid = netisr_ncpus / (i + 1); 3756 if (rm->rm_cnt > grid) { 3757 grid = prev_grid; 3758 break; 3759 } 3760 3761 if (rm->rm_cnt > netisr_ncpus / (i + 2)) 3762 break; 3763 prev_grid = grid; 3764 } 3765 if_ringmap_set_grid(dev, rm, grid); 3766 3767 return (rm); 3768 } 3769 3770 struct if_ringmap * 3771 if_ringmap_alloc(device_t dev, int ring_cnt, int ring_cntmax) 3772 { 3773 3774 return (if_ringmap_alloc_flags(dev, ring_cnt, ring_cntmax, 3775 RINGMAP_FLAG_NONE)); 3776 } 3777 3778 struct if_ringmap * 3779 if_ringmap_alloc2(device_t dev, int ring_cnt, int ring_cntmax) 3780 { 3781 3782 return (if_ringmap_alloc_flags(dev, ring_cnt, ring_cntmax, 3783 RINGMAP_FLAG_POWEROF2)); 3784 } 3785 3786 void 3787 if_ringmap_free(struct if_ringmap *rm) 3788 { 3789 3790 kfree(rm, M_DEVBUF); 3791 } 3792 3793 /* 3794 * Align the two ringmaps. 3795 * 3796 * e.g. 8 netisrs, rm0 contains 4 rings, rm1 contains 2 rings. 3797 * 3798 * Before: 3799 * 3800 * CPU 0 1 2 3 4 5 6 7 3801 * NIC_RX n0 n1 n2 n3 3802 * NIC_TX N0 N1 3803 * 3804 * After: 3805 * 3806 * CPU 0 1 2 3 4 5 6 7 3807 * NIC_RX n0 n1 n2 n3 3808 * NIC_TX N0 N1 3809 */ 3810 void 3811 if_ringmap_align(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1) 3812 { 3813 3814 if (rm0->rm_grid > rm1->rm_grid) 3815 if_ringmap_set_grid(dev, rm1, rm0->rm_grid); 3816 else if (rm0->rm_grid < rm1->rm_grid) 3817 if_ringmap_set_grid(dev, rm0, rm1->rm_grid); 3818 } 3819 3820 void 3821 if_ringmap_match(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1) 3822 { 3823 int subset_grid, cnt, divisor, mod, offset, i; 3824 struct if_ringmap *subset_rm, *rm; 3825 int old_rm0_grid, old_rm1_grid; 3826 3827 if (rm0->rm_grid == rm1->rm_grid) 3828 return; 3829 3830 /* Save grid for later use */ 3831 old_rm0_grid = rm0->rm_grid; 3832 old_rm1_grid = rm1->rm_grid; 3833 3834 if_ringmap_align(dev, rm0, rm1); 3835 3836 /* 3837 * Re-shuffle rings to get more even distribution. 3838 * 3839 * e.g. 12 netisrs, rm0 contains 4 rings, rm1 contains 2 rings. 3840 * 3841 * CPU 0 1 2 3 4 5 6 7 8 9 10 11 3842 * 3843 * NIC_RX a0 a1 a2 a3 b0 b1 b2 b3 c0 c1 c2 c3 3844 * NIC_TX A0 A1 B0 B1 C0 C1 3845 * 3846 * NIC_RX d0 d1 d2 d3 e0 e1 e2 e3 f0 f1 f2 f3 3847 * NIC_TX D0 D1 E0 E1 F0 F1 3848 */ 3849 3850 if (rm0->rm_cnt >= (2 * old_rm1_grid)) { 3851 cnt = rm0->rm_cnt; 3852 subset_grid = old_rm1_grid; 3853 subset_rm = rm1; 3854 rm = rm0; 3855 } else if (rm1->rm_cnt > (2 * old_rm0_grid)) { 3856 cnt = rm1->rm_cnt; 3857 subset_grid = old_rm0_grid; 3858 subset_rm = rm0; 3859 rm = rm1; 3860 } else { 3861 /* No space to shuffle. */ 3862 return; 3863 } 3864 3865 mod = cnt / subset_grid; 3866 KKASSERT(mod >= 2); 3867 divisor = netisr_ncpus / rm->rm_grid; 3868 offset = ((device_get_unit(dev) / divisor) % mod) * subset_grid; 3869 3870 for (i = 0; i < subset_rm->rm_cnt; ++i) { 3871 subset_rm->rm_cpumap[i] += offset; 3872 KASSERT(subset_rm->rm_cpumap[i] < netisr_ncpus, 3873 ("match: invalid cpumap[%d] = %d, offset %d", 3874 i, subset_rm->rm_cpumap[i], offset)); 3875 } 3876 #ifdef INVARIANTS 3877 for (i = 0; i < subset_rm->rm_cnt; ++i) { 3878 int j; 3879 3880 for (j = 0; j < rm->rm_cnt; ++j) { 3881 if (rm->rm_cpumap[j] == subset_rm->rm_cpumap[i]) 3882 break; 3883 } 3884 KASSERT(j < rm->rm_cnt, 3885 ("subset cpumap[%d] = %d not found in superset", 3886 i, subset_rm->rm_cpumap[i])); 3887 } 3888 #endif 3889 } 3890 3891 int 3892 if_ringmap_count(const struct if_ringmap *rm) 3893 { 3894 3895 return (rm->rm_cnt); 3896 } 3897 3898 int 3899 if_ringmap_cpumap(const struct if_ringmap *rm, int ring) 3900 { 3901 3902 KASSERT(ring >= 0 && ring < rm->rm_cnt, ("invalid ring %d", ring)); 3903 return (rm->rm_cpumap[ring]); 3904 } 3905 3906 void 3907 if_ringmap_rdrtable(const struct if_ringmap *rm, int table[], int table_nent) 3908 { 3909 int i, grid_idx, grid_cnt, patch_off, patch_cnt, ncopy; 3910 3911 KASSERT(table_nent > 0 && (table_nent & NETISR_CPUMASK) == 0, 3912 ("invalid redirect table entries %d", table_nent)); 3913 3914 grid_idx = 0; 3915 for (i = 0; i < NETISR_CPUMAX; ++i) { 3916 table[i] = grid_idx++ % rm->rm_cnt; 3917 3918 if (grid_idx == rm->rm_grid) 3919 grid_idx = 0; 3920 } 3921 3922 /* 3923 * Make the ring distributed more evenly for the remainder 3924 * of each grid. 3925 * 3926 * e.g. 12 netisrs, rm contains 8 rings. 3927 * 3928 * Redirect table before: 3929 * 3930 * 0 1 2 3 4 5 6 7 0 1 2 3 0 1 2 3 3931 * 4 5 6 7 0 1 2 3 0 1 2 3 4 5 6 7 3932 * 0 1 2 3 0 1 2 3 4 5 6 7 0 1 2 3 3933 * .... 3934 * 3935 * Redirect table after being patched (pX, patched entries): 3936 * 3937 * 0 1 2 3 4 5 6 7 p0 p1 p2 p3 0 1 2 3 3938 * 4 5 6 7 p4 p5 p6 p7 0 1 2 3 4 5 6 7 3939 * p0 p1 p2 p3 0 1 2 3 4 5 6 7 p4 p5 p6 p7 3940 * .... 3941 */ 3942 patch_cnt = rm->rm_grid % rm->rm_cnt; 3943 if (patch_cnt == 0) 3944 goto done; 3945 patch_off = rm->rm_grid - (rm->rm_grid % rm->rm_cnt); 3946 3947 grid_cnt = roundup(NETISR_CPUMAX, rm->rm_grid) / rm->rm_grid; 3948 grid_idx = 0; 3949 for (i = 0; i < grid_cnt; ++i) { 3950 int j; 3951 3952 for (j = 0; j < patch_cnt; ++j) { 3953 int fix_idx; 3954 3955 fix_idx = (i * rm->rm_grid) + patch_off + j; 3956 if (fix_idx >= NETISR_CPUMAX) 3957 goto done; 3958 table[fix_idx] = grid_idx++ % rm->rm_cnt; 3959 } 3960 } 3961 done: 3962 /* 3963 * If the device supports larger redirect table, duplicate 3964 * the first NETISR_CPUMAX entries to the rest of the table, 3965 * so that it matches upper layer's expectation: 3966 * (hash & NETISR_CPUMASK) % netisr_ncpus 3967 */ 3968 ncopy = table_nent / NETISR_CPUMAX; 3969 for (i = 1; i < ncopy; ++i) { 3970 memcpy(&table[i * NETISR_CPUMAX], table, 3971 NETISR_CPUMAX * sizeof(table[0])); 3972 } 3973 if (if_ringmap_dumprdr) { 3974 for (i = 0; i < table_nent; ++i) { 3975 if (i != 0 && i % 16 == 0) 3976 kprintf("\n"); 3977 kprintf("%03d ", table[i]); 3978 } 3979 kprintf("\n"); 3980 } 3981 } 3982 3983 int 3984 if_ringmap_cpumap_sysctl(SYSCTL_HANDLER_ARGS) 3985 { 3986 struct if_ringmap *rm = arg1; 3987 int i, error = 0; 3988 3989 for (i = 0; i < rm->rm_cnt; ++i) { 3990 int cpu = rm->rm_cpumap[i]; 3991 3992 error = SYSCTL_OUT(req, &cpu, sizeof(cpu)); 3993 if (error) 3994 break; 3995 } 3996 return (error); 3997 } 3998