1 /* 2 * Copyright (c) 1980, 1986, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)if.c 8.3 (Berkeley) 1/4/94 30 * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $ 31 */ 32 33 #include "opt_inet6.h" 34 #include "opt_inet.h" 35 #include "opt_ifpoll.h" 36 37 #include <sys/param.h> 38 #include <sys/malloc.h> 39 #include <sys/mbuf.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/priv.h> 43 #include <sys/protosw.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/socketops.h> 47 #include <sys/kernel.h> 48 #include <sys/ktr.h> 49 #include <sys/mutex.h> 50 #include <sys/lock.h> 51 #include <sys/sockio.h> 52 #include <sys/syslog.h> 53 #include <sys/sysctl.h> 54 #include <sys/domain.h> 55 #include <sys/thread.h> 56 #include <sys/serialize.h> 57 #include <sys/bus.h> 58 #include <sys/jail.h> 59 60 #include <sys/thread2.h> 61 #include <sys/msgport2.h> 62 #include <sys/mutex2.h> 63 64 #include <net/if.h> 65 #include <net/if_arp.h> 66 #include <net/if_dl.h> 67 #include <net/if_types.h> 68 #include <net/if_var.h> 69 #include <net/if_ringmap.h> 70 #include <net/ifq_var.h> 71 #include <net/radix.h> 72 #include <net/route.h> 73 #include <net/if_clone.h> 74 #include <net/netisr2.h> 75 #include <net/netmsg2.h> 76 77 #include <machine/atomic.h> 78 #include <machine/stdarg.h> 79 #include <machine/smp.h> 80 81 #if defined(INET) || defined(INET6) 82 #include <netinet/in.h> 83 #include <netinet/in_var.h> 84 #include <netinet/if_ether.h> 85 #ifdef INET6 86 #include <netinet6/in6_var.h> 87 #include <netinet6/in6_ifattach.h> 88 #endif /* INET6 */ 89 #endif /* INET || INET6 */ 90 91 struct netmsg_ifaddr { 92 struct netmsg_base base; 93 struct ifaddr *ifa; 94 struct ifnet *ifp; 95 int tail; 96 }; 97 98 struct ifsubq_stage_head { 99 TAILQ_HEAD(, ifsubq_stage) stg_head; 100 } __cachealign; 101 102 struct if_ringmap { 103 int rm_cnt; 104 int rm_grid; 105 int rm_cpumap[]; 106 }; 107 108 #define RINGMAP_FLAG_NONE 0x0 109 #define RINGMAP_FLAG_POWEROF2 0x1 110 111 /* 112 * System initialization 113 */ 114 static void if_attachdomain(void *); 115 static void if_attachdomain1(struct ifnet *); 116 static int ifconf(u_long, caddr_t, struct ucred *); 117 static void ifinit(void *); 118 static void ifnetinit(void *); 119 static void if_slowtimo(void *); 120 static void link_rtrequest(int, struct rtentry *); 121 static int if_rtdel(struct radix_node *, void *); 122 static void if_slowtimo_dispatch(netmsg_t); 123 124 /* Helper functions */ 125 static void ifsq_watchdog_reset(struct ifsubq_watchdog *); 126 static int if_delmulti_serialized(struct ifnet *, struct sockaddr *); 127 static struct ifnet_array *ifnet_array_alloc(int); 128 static void ifnet_array_free(struct ifnet_array *); 129 static struct ifnet_array *ifnet_array_add(struct ifnet *, 130 const struct ifnet_array *); 131 static struct ifnet_array *ifnet_array_del(struct ifnet *, 132 const struct ifnet_array *); 133 static struct ifg_group *if_creategroup(const char *); 134 static int if_destroygroup(struct ifg_group *); 135 static int if_delgroup_locked(struct ifnet *, const char *); 136 static int if_getgroups(struct ifgroupreq *, struct ifnet *); 137 static int if_getgroupmembers(struct ifgroupreq *); 138 139 #ifdef INET6 140 /* 141 * XXX: declare here to avoid to include many inet6 related files.. 142 * should be more generalized? 143 */ 144 extern void nd6_setmtu(struct ifnet *); 145 #endif 146 147 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); 148 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); 149 SYSCTL_NODE(_net_link, OID_AUTO, ringmap, CTLFLAG_RW, 0, "link ringmap"); 150 151 static int ifsq_stage_cntmax = 16; 152 TUNABLE_INT("net.link.stage_cntmax", &ifsq_stage_cntmax); 153 SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW, 154 &ifsq_stage_cntmax, 0, "ifq staging packet count max"); 155 156 static int if_stats_compat = 0; 157 SYSCTL_INT(_net_link, OID_AUTO, stats_compat, CTLFLAG_RW, 158 &if_stats_compat, 0, "Compat the old ifnet stats"); 159 160 static int if_ringmap_dumprdr = 0; 161 SYSCTL_INT(_net_link_ringmap, OID_AUTO, dump_rdr, CTLFLAG_RW, 162 &if_ringmap_dumprdr, 0, "dump redirect table"); 163 164 /* Interface description */ 165 static unsigned int ifdescr_maxlen = 1024; 166 SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW, 167 &ifdescr_maxlen, 0, 168 "administrative maximum length for interface description"); 169 170 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL); 171 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, ifnetinit, NULL); 172 173 static if_com_alloc_t *if_com_alloc[256]; 174 static if_com_free_t *if_com_free[256]; 175 176 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); 177 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); 178 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure"); 179 MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions"); 180 181 int ifqmaxlen = IFQ_MAXLEN; 182 struct ifnethead ifnet = TAILQ_HEAD_INITIALIZER(ifnet); 183 struct ifgrouphead ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head); 184 static struct lock ifgroup_lock; 185 186 static struct ifnet_array ifnet_array0; 187 static struct ifnet_array *ifnet_array = &ifnet_array0; 188 189 static struct callout if_slowtimo_timer; 190 static struct netmsg_base if_slowtimo_netmsg; 191 192 int if_index = 0; 193 struct ifnet **ifindex2ifnet = NULL; 194 static struct mtx ifnet_mtx = MTX_INITIALIZER("ifnet"); 195 196 static struct ifsubq_stage_head ifsubq_stage_heads[MAXCPU]; 197 198 #ifdef notyet 199 #define IFQ_KTR_STRING "ifq=%p" 200 #define IFQ_KTR_ARGS struct ifaltq *ifq 201 #ifndef KTR_IFQ 202 #define KTR_IFQ KTR_ALL 203 #endif 204 KTR_INFO_MASTER(ifq); 205 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS); 206 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS); 207 #define logifq(name, arg) KTR_LOG(ifq_ ## name, arg) 208 209 #define IF_START_KTR_STRING "ifp=%p" 210 #define IF_START_KTR_ARGS struct ifnet *ifp 211 #ifndef KTR_IF_START 212 #define KTR_IF_START KTR_ALL 213 #endif 214 KTR_INFO_MASTER(if_start); 215 KTR_INFO(KTR_IF_START, if_start, run, 0, 216 IF_START_KTR_STRING, IF_START_KTR_ARGS); 217 KTR_INFO(KTR_IF_START, if_start, sched, 1, 218 IF_START_KTR_STRING, IF_START_KTR_ARGS); 219 KTR_INFO(KTR_IF_START, if_start, avoid, 2, 220 IF_START_KTR_STRING, IF_START_KTR_ARGS); 221 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3, 222 IF_START_KTR_STRING, IF_START_KTR_ARGS); 223 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4, 224 IF_START_KTR_STRING, IF_START_KTR_ARGS); 225 #define logifstart(name, arg) KTR_LOG(if_start_ ## name, arg) 226 #endif /* notyet */ 227 228 /* 229 * Network interface utility routines. 230 * 231 * Routines with ifa_ifwith* names take sockaddr *'s as 232 * parameters. 233 */ 234 /* ARGSUSED */ 235 static void 236 ifinit(void *dummy) 237 { 238 lockinit(&ifgroup_lock, "ifgroup", 0, 0); 239 240 callout_init_mp(&if_slowtimo_timer); 241 netmsg_init(&if_slowtimo_netmsg, NULL, &netisr_adone_rport, 242 MSGF_PRIORITY, if_slowtimo_dispatch); 243 244 /* Start if_slowtimo */ 245 lwkt_sendmsg(netisr_cpuport(0), &if_slowtimo_netmsg.lmsg); 246 } 247 248 static void 249 ifsq_ifstart_ipifunc(void *arg) 250 { 251 struct ifaltq_subque *ifsq = arg; 252 struct lwkt_msg *lmsg = ifsq_get_ifstart_lmsg(ifsq, mycpuid); 253 254 crit_enter(); 255 if (lmsg->ms_flags & MSGF_DONE) 256 lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), lmsg); 257 crit_exit(); 258 } 259 260 static __inline void 261 ifsq_stage_remove(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 262 { 263 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 264 TAILQ_REMOVE(&head->stg_head, stage, stg_link); 265 stage->stg_flags &= ~(IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED); 266 stage->stg_cnt = 0; 267 stage->stg_len = 0; 268 } 269 270 static __inline void 271 ifsq_stage_insert(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 272 { 273 KKASSERT((stage->stg_flags & 274 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 275 stage->stg_flags |= IFSQ_STAGE_FLAG_QUED; 276 TAILQ_INSERT_TAIL(&head->stg_head, stage, stg_link); 277 } 278 279 /* 280 * Schedule ifnet.if_start on the subqueue owner CPU 281 */ 282 static void 283 ifsq_ifstart_schedule(struct ifaltq_subque *ifsq, int force) 284 { 285 int cpu; 286 287 if (!force && curthread->td_type == TD_TYPE_NETISR && 288 ifsq_stage_cntmax > 0) { 289 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 290 291 stage->stg_cnt = 0; 292 stage->stg_len = 0; 293 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 294 ifsq_stage_insert(&ifsubq_stage_heads[mycpuid], stage); 295 stage->stg_flags |= IFSQ_STAGE_FLAG_SCHED; 296 return; 297 } 298 299 cpu = ifsq_get_cpuid(ifsq); 300 if (cpu != mycpuid) 301 lwkt_send_ipiq(globaldata_find(cpu), ifsq_ifstart_ipifunc, ifsq); 302 else 303 ifsq_ifstart_ipifunc(ifsq); 304 } 305 306 /* 307 * NOTE: 308 * This function will release ifnet.if_start subqueue interlock, 309 * if ifnet.if_start for the subqueue does not need to be scheduled 310 */ 311 static __inline int 312 ifsq_ifstart_need_schedule(struct ifaltq_subque *ifsq, int running) 313 { 314 if (!running || ifsq_is_empty(ifsq) 315 #ifdef ALTQ 316 || ifsq->ifsq_altq->altq_tbr != NULL 317 #endif 318 ) { 319 ALTQ_SQ_LOCK(ifsq); 320 /* 321 * ifnet.if_start subqueue interlock is released, if: 322 * 1) Hardware can not take any packets, due to 323 * o interface is marked down 324 * o hardware queue is full (ifsq_is_oactive) 325 * Under the second situation, hardware interrupt 326 * or polling(4) will call/schedule ifnet.if_start 327 * on the subqueue when hardware queue is ready 328 * 2) There is no packet in the subqueue. 329 * Further ifq_dispatch or ifq_handoff will call/ 330 * schedule ifnet.if_start on the subqueue. 331 * 3) TBR is used and it does not allow further 332 * dequeueing. 333 * TBR callout will call ifnet.if_start on the 334 * subqueue. 335 */ 336 if (!running || !ifsq_data_ready(ifsq)) { 337 ifsq_clr_started(ifsq); 338 ALTQ_SQ_UNLOCK(ifsq); 339 return 0; 340 } 341 ALTQ_SQ_UNLOCK(ifsq); 342 } 343 return 1; 344 } 345 346 static void 347 ifsq_ifstart_dispatch(netmsg_t msg) 348 { 349 struct lwkt_msg *lmsg = &msg->base.lmsg; 350 struct ifaltq_subque *ifsq = lmsg->u.ms_resultp; 351 struct ifnet *ifp = ifsq_get_ifp(ifsq); 352 struct globaldata *gd = mycpu; 353 int running = 0, need_sched; 354 355 crit_enter_gd(gd); 356 357 lwkt_replymsg(lmsg, 0); /* reply ASAP */ 358 359 if (gd->gd_cpuid != ifsq_get_cpuid(ifsq)) { 360 /* 361 * We need to chase the subqueue owner CPU change. 362 */ 363 ifsq_ifstart_schedule(ifsq, 1); 364 crit_exit_gd(gd); 365 return; 366 } 367 368 ifsq_serialize_hw(ifsq); 369 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 370 ifp->if_start(ifp, ifsq); 371 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 372 running = 1; 373 } 374 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 375 ifsq_deserialize_hw(ifsq); 376 377 if (need_sched) { 378 /* 379 * More data need to be transmitted, ifnet.if_start is 380 * scheduled on the subqueue owner CPU, and we keep going. 381 * NOTE: ifnet.if_start subqueue interlock is not released. 382 */ 383 ifsq_ifstart_schedule(ifsq, 0); 384 } 385 386 crit_exit_gd(gd); 387 } 388 389 /* Device driver ifnet.if_start helper function */ 390 void 391 ifsq_devstart(struct ifaltq_subque *ifsq) 392 { 393 struct ifnet *ifp = ifsq_get_ifp(ifsq); 394 int running = 0; 395 396 ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq); 397 398 ALTQ_SQ_LOCK(ifsq); 399 if (ifsq_is_started(ifsq) || !ifsq_data_ready(ifsq)) { 400 ALTQ_SQ_UNLOCK(ifsq); 401 return; 402 } 403 ifsq_set_started(ifsq); 404 ALTQ_SQ_UNLOCK(ifsq); 405 406 ifp->if_start(ifp, ifsq); 407 408 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 409 running = 1; 410 411 if (ifsq_ifstart_need_schedule(ifsq, running)) { 412 /* 413 * More data need to be transmitted, ifnet.if_start is 414 * scheduled on ifnet's CPU, and we keep going. 415 * NOTE: ifnet.if_start interlock is not released. 416 */ 417 ifsq_ifstart_schedule(ifsq, 0); 418 } 419 } 420 421 void 422 if_devstart(struct ifnet *ifp) 423 { 424 ifsq_devstart(ifq_get_subq_default(&ifp->if_snd)); 425 } 426 427 /* Device driver ifnet.if_start schedule helper function */ 428 void 429 ifsq_devstart_sched(struct ifaltq_subque *ifsq) 430 { 431 ifsq_ifstart_schedule(ifsq, 1); 432 } 433 434 void 435 if_devstart_sched(struct ifnet *ifp) 436 { 437 ifsq_devstart_sched(ifq_get_subq_default(&ifp->if_snd)); 438 } 439 440 static void 441 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 442 { 443 lwkt_serialize_enter(ifp->if_serializer); 444 } 445 446 static void 447 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 448 { 449 lwkt_serialize_exit(ifp->if_serializer); 450 } 451 452 static int 453 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 454 { 455 return lwkt_serialize_try(ifp->if_serializer); 456 } 457 458 #ifdef INVARIANTS 459 static void 460 if_default_serialize_assert(struct ifnet *ifp, 461 enum ifnet_serialize slz __unused, 462 boolean_t serialized) 463 { 464 if (serialized) 465 ASSERT_SERIALIZED(ifp->if_serializer); 466 else 467 ASSERT_NOT_SERIALIZED(ifp->if_serializer); 468 } 469 #endif 470 471 /* 472 * Attach an interface to the list of "active" interfaces. 473 * 474 * The serializer is optional. 475 */ 476 void 477 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer) 478 { 479 unsigned socksize; 480 int namelen, masklen; 481 struct sockaddr_dl *sdl, *sdl_addr; 482 struct ifaddr *ifa; 483 struct ifaltq *ifq; 484 struct ifnet **old_ifindex2ifnet = NULL; 485 struct ifnet_array *old_ifnet_array; 486 int i, q, qlen; 487 char qlenname[64]; 488 489 static int if_indexlim = 8; 490 491 if (ifp->if_serialize != NULL) { 492 KASSERT(ifp->if_deserialize != NULL && 493 ifp->if_tryserialize != NULL && 494 ifp->if_serialize_assert != NULL, 495 ("serialize functions are partially setup")); 496 497 /* 498 * If the device supplies serialize functions, 499 * then clear if_serializer to catch any invalid 500 * usage of this field. 501 */ 502 KASSERT(serializer == NULL, 503 ("both serialize functions and default serializer " 504 "are supplied")); 505 ifp->if_serializer = NULL; 506 } else { 507 KASSERT(ifp->if_deserialize == NULL && 508 ifp->if_tryserialize == NULL && 509 ifp->if_serialize_assert == NULL, 510 ("serialize functions are partially setup")); 511 ifp->if_serialize = if_default_serialize; 512 ifp->if_deserialize = if_default_deserialize; 513 ifp->if_tryserialize = if_default_tryserialize; 514 #ifdef INVARIANTS 515 ifp->if_serialize_assert = if_default_serialize_assert; 516 #endif 517 518 /* 519 * The serializer can be passed in from the device, 520 * allowing the same serializer to be used for both 521 * the interrupt interlock and the device queue. 522 * If not specified, the netif structure will use an 523 * embedded serializer. 524 */ 525 if (serializer == NULL) { 526 serializer = &ifp->if_default_serializer; 527 lwkt_serialize_init(serializer); 528 } 529 ifp->if_serializer = serializer; 530 } 531 532 /* 533 * Make if_addrhead available on all CPUs, since they 534 * could be accessed by any threads. 535 */ 536 ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead), 537 M_IFADDR, M_WAITOK | M_ZERO); 538 for (i = 0; i < ncpus; ++i) 539 TAILQ_INIT(&ifp->if_addrheads[i]); 540 541 TAILQ_INIT(&ifp->if_multiaddrs); 542 TAILQ_INIT(&ifp->if_groups); 543 getmicrotime(&ifp->if_lastchange); 544 if_addgroup(ifp, IFG_ALL); 545 546 /* 547 * create a Link Level name for this device 548 */ 549 namelen = strlen(ifp->if_xname); 550 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; 551 socksize = masklen + ifp->if_addrlen; 552 if (socksize < sizeof(*sdl)) 553 socksize = sizeof(*sdl); 554 socksize = RT_ROUNDUP(socksize); 555 ifa = ifa_create(sizeof(struct ifaddr) + 2 * socksize); 556 sdl = sdl_addr = (struct sockaddr_dl *)(ifa + 1); 557 sdl->sdl_len = socksize; 558 sdl->sdl_family = AF_LINK; 559 bcopy(ifp->if_xname, sdl->sdl_data, namelen); 560 sdl->sdl_nlen = namelen; 561 sdl->sdl_type = ifp->if_type; 562 ifp->if_lladdr = ifa; 563 ifa->ifa_ifp = ifp; 564 ifa->ifa_rtrequest = link_rtrequest; 565 ifa->ifa_addr = (struct sockaddr *)sdl; 566 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); 567 ifa->ifa_netmask = (struct sockaddr *)sdl; 568 sdl->sdl_len = masklen; 569 while (namelen != 0) 570 sdl->sdl_data[--namelen] = 0xff; 571 ifa_iflink(ifa, ifp, 0 /* Insert head */); 572 573 /* 574 * Make if_data available on all CPUs, since they could 575 * be updated by hardware interrupt routing, which could 576 * be bound to any CPU. 577 */ 578 ifp->if_data_pcpu = kmalloc(ncpus * sizeof(struct ifdata_pcpu), 579 M_DEVBUF, 580 M_WAITOK | M_ZERO | M_CACHEALIGN); 581 582 if (ifp->if_mapsubq == NULL) 583 ifp->if_mapsubq = ifq_mapsubq_default; 584 585 ifq = &ifp->if_snd; 586 ifq->altq_type = 0; 587 ifq->altq_disc = NULL; 588 ifq->altq_flags &= ALTQF_CANTCHANGE; 589 ifq->altq_tbr = NULL; 590 ifq->altq_ifp = ifp; 591 592 if (ifq->altq_subq_cnt <= 0) 593 ifq->altq_subq_cnt = 1; 594 ifq->altq_subq = 595 kmalloc(ifq->altq_subq_cnt * sizeof(struct ifaltq_subque), 596 M_DEVBUF, 597 M_WAITOK | M_ZERO | M_CACHEALIGN); 598 599 if (ifq->altq_maxlen == 0) { 600 if_printf(ifp, "driver didn't set altq_maxlen\n"); 601 ifq_set_maxlen(ifq, ifqmaxlen); 602 } 603 604 /* Allow user to override driver's setting. */ 605 ksnprintf(qlenname, sizeof(qlenname), "net.%s.qlenmax", ifp->if_xname); 606 qlen = -1; 607 TUNABLE_INT_FETCH(qlenname, &qlen); 608 if (qlen > 0) { 609 if_printf(ifp, "qlenmax -> %d\n", qlen); 610 ifq_set_maxlen(ifq, qlen); 611 } 612 613 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 614 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 615 616 ALTQ_SQ_LOCK_INIT(ifsq); 617 ifsq->ifsq_index = q; 618 619 ifsq->ifsq_altq = ifq; 620 ifsq->ifsq_ifp = ifp; 621 622 ifsq->ifsq_maxlen = ifq->altq_maxlen; 623 ifsq->ifsq_maxbcnt = ifsq->ifsq_maxlen * MCLBYTES; 624 ifsq->ifsq_prepended = NULL; 625 ifsq->ifsq_started = 0; 626 ifsq->ifsq_hw_oactive = 0; 627 ifsq_set_cpuid(ifsq, 0); 628 if (ifp->if_serializer != NULL) 629 ifsq_set_hw_serialize(ifsq, ifp->if_serializer); 630 631 /* XXX: netisr_ncpus */ 632 ifsq->ifsq_stage = 633 kmalloc(ncpus * sizeof(struct ifsubq_stage), 634 M_DEVBUF, 635 M_WAITOK | M_ZERO | M_CACHEALIGN); 636 for (i = 0; i < ncpus; ++i) 637 ifsq->ifsq_stage[i].stg_subq = ifsq; 638 639 /* 640 * Allocate one if_start message for each CPU, since 641 * the hardware TX ring could be assigned to any CPU. 642 * 643 * NOTE: 644 * If the hardware TX ring polling CPU and the hardware 645 * TX ring interrupt CPU are same, one if_start message 646 * should be enough. 647 */ 648 ifsq->ifsq_ifstart_nmsg = 649 kmalloc(ncpus * sizeof(struct netmsg_base), 650 M_LWKTMSG, M_WAITOK); 651 for (i = 0; i < ncpus; ++i) { 652 netmsg_init(&ifsq->ifsq_ifstart_nmsg[i], NULL, 653 &netisr_adone_rport, 0, ifsq_ifstart_dispatch); 654 ifsq->ifsq_ifstart_nmsg[i].lmsg.u.ms_resultp = ifsq; 655 } 656 } 657 ifq_set_classic(ifq); 658 659 /* 660 * Increase mbuf cluster/jcluster limits for the mbufs that 661 * could sit on the device queues for quite some time. 662 */ 663 if (ifp->if_nmbclusters > 0) 664 mcl_inclimit(ifp->if_nmbclusters); 665 if (ifp->if_nmbjclusters > 0) 666 mjcl_inclimit(ifp->if_nmbjclusters); 667 668 /* 669 * Install this ifp into ifindex2inet, ifnet queue and ifnet 670 * array after it is setup. 671 * 672 * Protect ifindex2ifnet, ifnet queue and ifnet array changes 673 * by ifnet lock, so that non-netisr threads could get a 674 * consistent view. 675 */ 676 ifnet_lock(); 677 678 /* Don't update if_index until ifindex2ifnet is setup */ 679 ifp->if_index = if_index + 1; 680 sdl_addr->sdl_index = ifp->if_index; 681 682 /* 683 * Install this ifp into ifindex2ifnet 684 */ 685 if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) { 686 unsigned int n; 687 struct ifnet **q; 688 689 /* 690 * Grow ifindex2ifnet 691 */ 692 if_indexlim <<= 1; 693 n = if_indexlim * sizeof(*q); 694 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO); 695 if (ifindex2ifnet != NULL) { 696 bcopy(ifindex2ifnet, q, n/2); 697 /* Free old ifindex2ifnet after sync all netisrs */ 698 old_ifindex2ifnet = ifindex2ifnet; 699 } 700 ifindex2ifnet = q; 701 } 702 ifindex2ifnet[ifp->if_index] = ifp; 703 /* 704 * Update if_index after this ifp is installed into ifindex2ifnet, 705 * so that netisrs could get a consistent view of ifindex2ifnet. 706 */ 707 cpu_sfence(); 708 if_index = ifp->if_index; 709 710 /* 711 * Install this ifp into ifnet array. 712 */ 713 /* Free old ifnet array after sync all netisrs */ 714 old_ifnet_array = ifnet_array; 715 ifnet_array = ifnet_array_add(ifp, old_ifnet_array); 716 717 /* 718 * Install this ifp into ifnet queue. 719 */ 720 TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_link); 721 722 ifnet_unlock(); 723 724 /* 725 * Sync all netisrs so that the old ifindex2ifnet and ifnet array 726 * are no longer accessed and we can free them safely later on. 727 */ 728 netmsg_service_sync(); 729 if (old_ifindex2ifnet != NULL) 730 kfree(old_ifindex2ifnet, M_IFADDR); 731 ifnet_array_free(old_ifnet_array); 732 733 if (!SLIST_EMPTY(&domains)) 734 if_attachdomain1(ifp); 735 736 /* Announce the interface. */ 737 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 738 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); 739 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 740 } 741 742 static void 743 if_attachdomain(void *dummy) 744 { 745 struct ifnet *ifp; 746 747 ifnet_lock(); 748 TAILQ_FOREACH(ifp, &ifnetlist, if_list) 749 if_attachdomain1(ifp); 750 ifnet_unlock(); 751 } 752 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, 753 if_attachdomain, NULL); 754 755 static void 756 if_attachdomain1(struct ifnet *ifp) 757 { 758 struct domain *dp; 759 760 crit_enter(); 761 762 /* address family dependent data region */ 763 bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); 764 SLIST_FOREACH(dp, &domains, dom_next) 765 if (dp->dom_ifattach) 766 ifp->if_afdata[dp->dom_family] = 767 (*dp->dom_ifattach)(ifp); 768 crit_exit(); 769 } 770 771 /* 772 * Purge all addresses whose type is _not_ AF_LINK 773 */ 774 static void 775 if_purgeaddrs_nolink_dispatch(netmsg_t nmsg) 776 { 777 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp; 778 struct ifaddr_container *ifac, *next; 779 780 ASSERT_NETISR0; 781 782 /* 783 * The ifaddr processing in the following loop will block, 784 * however, this function is called in netisr0, in which 785 * ifaddr list changes happen, so we don't care about the 786 * blockness of the ifaddr processing here. 787 */ 788 TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid], 789 ifa_link, next) { 790 struct ifaddr *ifa = ifac->ifa; 791 792 /* Ignore marker */ 793 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 794 continue; 795 796 /* Leave link ifaddr as it is */ 797 if (ifa->ifa_addr->sa_family == AF_LINK) 798 continue; 799 #ifdef INET 800 /* XXX: Ugly!! ad hoc just for INET */ 801 if (ifa->ifa_addr->sa_family == AF_INET) { 802 struct ifaliasreq ifr; 803 struct sockaddr_in saved_addr, saved_dst; 804 #ifdef IFADDR_DEBUG_VERBOSE 805 int i; 806 807 kprintf("purge in4 addr %p: ", ifa); 808 for (i = 0; i < ncpus; ++i) { 809 kprintf("%d ", 810 ifa->ifa_containers[i].ifa_refcnt); 811 } 812 kprintf("\n"); 813 #endif 814 815 /* Save information for panic. */ 816 memcpy(&saved_addr, ifa->ifa_addr, sizeof(saved_addr)); 817 if (ifa->ifa_dstaddr != NULL) { 818 memcpy(&saved_dst, ifa->ifa_dstaddr, 819 sizeof(saved_dst)); 820 } else { 821 memset(&saved_dst, 0, sizeof(saved_dst)); 822 } 823 824 bzero(&ifr, sizeof ifr); 825 ifr.ifra_addr = *ifa->ifa_addr; 826 if (ifa->ifa_dstaddr) 827 ifr.ifra_broadaddr = *ifa->ifa_dstaddr; 828 if (in_control(SIOCDIFADDR, (caddr_t)&ifr, ifp, 829 NULL) == 0) 830 continue; 831 832 /* MUST NOT HAPPEN */ 833 panic("%s: in_control failed %x, dst %x", ifp->if_xname, 834 ntohl(saved_addr.sin_addr.s_addr), 835 ntohl(saved_dst.sin_addr.s_addr)); 836 } 837 #endif /* INET */ 838 #ifdef INET6 839 if (ifa->ifa_addr->sa_family == AF_INET6) { 840 #ifdef IFADDR_DEBUG_VERBOSE 841 int i; 842 843 kprintf("purge in6 addr %p: ", ifa); 844 for (i = 0; i < ncpus; ++i) { 845 kprintf("%d ", 846 ifa->ifa_containers[i].ifa_refcnt); 847 } 848 kprintf("\n"); 849 #endif 850 851 in6_purgeaddr(ifa); 852 /* ifp_addrhead is already updated */ 853 continue; 854 } 855 #endif /* INET6 */ 856 if_printf(ifp, "destroy ifaddr family %d\n", 857 ifa->ifa_addr->sa_family); 858 ifa_ifunlink(ifa, ifp); 859 ifa_destroy(ifa); 860 } 861 862 netisr_replymsg(&nmsg->base, 0); 863 } 864 865 void 866 if_purgeaddrs_nolink(struct ifnet *ifp) 867 { 868 struct netmsg_base nmsg; 869 870 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 871 if_purgeaddrs_nolink_dispatch); 872 nmsg.lmsg.u.ms_resultp = ifp; 873 netisr_domsg(&nmsg, 0); 874 } 875 876 static void 877 ifq_stage_detach_handler(netmsg_t nmsg) 878 { 879 struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp; 880 int q; 881 882 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 883 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 884 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 885 886 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) 887 ifsq_stage_remove(&ifsubq_stage_heads[mycpuid], stage); 888 } 889 lwkt_replymsg(&nmsg->lmsg, 0); 890 } 891 892 static void 893 ifq_stage_detach(struct ifaltq *ifq) 894 { 895 struct netmsg_base base; 896 int cpu; 897 898 netmsg_init(&base, NULL, &curthread->td_msgport, 0, 899 ifq_stage_detach_handler); 900 base.lmsg.u.ms_resultp = ifq; 901 902 /* XXX netisr_ncpus */ 903 for (cpu = 0; cpu < ncpus; ++cpu) 904 lwkt_domsg(netisr_cpuport(cpu), &base.lmsg, 0); 905 } 906 907 struct netmsg_if_rtdel { 908 struct netmsg_base base; 909 struct ifnet *ifp; 910 }; 911 912 static void 913 if_rtdel_dispatch(netmsg_t msg) 914 { 915 struct netmsg_if_rtdel *rmsg = (void *)msg; 916 int i, cpu; 917 918 cpu = mycpuid; 919 ASSERT_NETISR_NCPUS(cpu); 920 921 for (i = 1; i <= AF_MAX; i++) { 922 struct radix_node_head *rnh; 923 924 if ((rnh = rt_tables[cpu][i]) == NULL) 925 continue; 926 rnh->rnh_walktree(rnh, if_rtdel, rmsg->ifp); 927 } 928 netisr_forwardmsg(&msg->base, cpu + 1); 929 } 930 931 /* 932 * Detach an interface, removing it from the 933 * list of "active" interfaces. 934 */ 935 void 936 if_detach(struct ifnet *ifp) 937 { 938 struct ifnet_array *old_ifnet_array; 939 struct ifg_list *ifgl; 940 struct netmsg_if_rtdel msg; 941 struct domain *dp; 942 int q; 943 944 /* Announce that the interface is gone. */ 945 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 946 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 947 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); 948 949 /* 950 * Remove this ifp from ifindex2inet, ifnet queue and ifnet 951 * array before it is whacked. 952 * 953 * Protect ifindex2ifnet, ifnet queue and ifnet array changes 954 * by ifnet lock, so that non-netisr threads could get a 955 * consistent view. 956 */ 957 ifnet_lock(); 958 959 /* 960 * Remove this ifp from ifindex2ifnet and maybe decrement if_index. 961 */ 962 ifindex2ifnet[ifp->if_index] = NULL; 963 while (if_index > 0 && ifindex2ifnet[if_index] == NULL) 964 if_index--; 965 966 /* 967 * Remove this ifp from ifnet queue. 968 */ 969 TAILQ_REMOVE(&ifnetlist, ifp, if_link); 970 971 /* 972 * Remove this ifp from ifnet array. 973 */ 974 /* Free old ifnet array after sync all netisrs */ 975 old_ifnet_array = ifnet_array; 976 ifnet_array = ifnet_array_del(ifp, old_ifnet_array); 977 978 ifnet_unlock(); 979 980 ifgroup_lockmgr(LK_EXCLUSIVE); 981 while ((ifgl = TAILQ_FIRST(&ifp->if_groups)) != NULL) 982 if_delgroup_locked(ifp, ifgl->ifgl_group->ifg_group); 983 ifgroup_lockmgr(LK_RELEASE); 984 985 /* 986 * Sync all netisrs so that the old ifnet array is no longer 987 * accessed and we can free it safely later on. 988 */ 989 netmsg_service_sync(); 990 ifnet_array_free(old_ifnet_array); 991 992 /* 993 * Remove routes and flush queues. 994 */ 995 crit_enter(); 996 #ifdef IFPOLL_ENABLE 997 if (ifp->if_flags & IFF_NPOLLING) 998 ifpoll_deregister(ifp); 999 #endif 1000 if_down(ifp); 1001 1002 /* Decrease the mbuf clusters/jclusters limits increased by us */ 1003 if (ifp->if_nmbclusters > 0) 1004 mcl_inclimit(-ifp->if_nmbclusters); 1005 if (ifp->if_nmbjclusters > 0) 1006 mjcl_inclimit(-ifp->if_nmbjclusters); 1007 1008 #ifdef ALTQ 1009 if (ifq_is_enabled(&ifp->if_snd)) 1010 altq_disable(&ifp->if_snd); 1011 if (ifq_is_attached(&ifp->if_snd)) 1012 altq_detach(&ifp->if_snd); 1013 #endif 1014 1015 /* 1016 * Clean up all addresses. 1017 */ 1018 ifp->if_lladdr = NULL; 1019 1020 if_purgeaddrs_nolink(ifp); 1021 if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) { 1022 struct ifaddr *ifa; 1023 1024 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 1025 KASSERT(ifa->ifa_addr->sa_family == AF_LINK, 1026 ("non-link ifaddr is left on if_addrheads")); 1027 1028 ifa_ifunlink(ifa, ifp); 1029 ifa_destroy(ifa); 1030 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]), 1031 ("there are still ifaddrs left on if_addrheads")); 1032 } 1033 1034 #ifdef INET 1035 /* 1036 * Remove all IPv4 kernel structures related to ifp. 1037 */ 1038 in_ifdetach(ifp); 1039 #endif 1040 1041 #ifdef INET6 1042 /* 1043 * Remove all IPv6 kernel structs related to ifp. This should be done 1044 * before removing routing entries below, since IPv6 interface direct 1045 * routes are expected to be removed by the IPv6-specific kernel API. 1046 * Otherwise, the kernel will detect some inconsistency and bark it. 1047 */ 1048 in6_ifdetach(ifp); 1049 #endif 1050 1051 /* 1052 * Delete all remaining routes using this interface 1053 */ 1054 netmsg_init(&msg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 1055 if_rtdel_dispatch); 1056 msg.ifp = ifp; 1057 netisr_domsg_global(&msg.base); 1058 1059 SLIST_FOREACH(dp, &domains, dom_next) { 1060 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) 1061 (*dp->dom_ifdetach)(ifp, 1062 ifp->if_afdata[dp->dom_family]); 1063 } 1064 1065 kfree(ifp->if_addrheads, M_IFADDR); 1066 1067 lwkt_synchronize_ipiqs("if_detach"); 1068 ifq_stage_detach(&ifp->if_snd); 1069 1070 for (q = 0; q < ifp->if_snd.altq_subq_cnt; ++q) { 1071 struct ifaltq_subque *ifsq = &ifp->if_snd.altq_subq[q]; 1072 1073 kfree(ifsq->ifsq_ifstart_nmsg, M_LWKTMSG); 1074 kfree(ifsq->ifsq_stage, M_DEVBUF); 1075 } 1076 kfree(ifp->if_snd.altq_subq, M_DEVBUF); 1077 1078 kfree(ifp->if_data_pcpu, M_DEVBUF); 1079 1080 crit_exit(); 1081 } 1082 1083 int 1084 ifgroup_lockmgr(u_int flags) 1085 { 1086 return lockmgr(&ifgroup_lock, flags); 1087 } 1088 1089 /* 1090 * Create an empty interface group. 1091 */ 1092 static struct ifg_group * 1093 if_creategroup(const char *groupname) 1094 { 1095 struct ifg_group *ifg; 1096 1097 ifg = kmalloc(sizeof(*ifg), M_IFNET, M_WAITOK); 1098 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); 1099 ifg->ifg_refcnt = 0; 1100 ifg->ifg_carp_demoted = 0; 1101 TAILQ_INIT(&ifg->ifg_members); 1102 1103 ifgroup_lockmgr(LK_EXCLUSIVE); 1104 TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next); 1105 ifgroup_lockmgr(LK_RELEASE); 1106 1107 EVENTHANDLER_INVOKE(group_attach_event, ifg); 1108 1109 return (ifg); 1110 } 1111 1112 /* 1113 * Destroy an empty interface group. 1114 */ 1115 static int 1116 if_destroygroup(struct ifg_group *ifg) 1117 { 1118 KASSERT(ifg->ifg_refcnt == 0, 1119 ("trying to delete a non-empty interface group")); 1120 1121 ifgroup_lockmgr(LK_EXCLUSIVE); 1122 TAILQ_REMOVE(&ifg_head, ifg, ifg_next); 1123 ifgroup_lockmgr(LK_RELEASE); 1124 1125 EVENTHANDLER_INVOKE(group_detach_event, ifg); 1126 kfree(ifg, M_IFNET); 1127 1128 return (0); 1129 } 1130 1131 /* 1132 * Add the interface to a group. 1133 * The target group will be created if it doesn't exist. 1134 */ 1135 int 1136 if_addgroup(struct ifnet *ifp, const char *groupname) 1137 { 1138 struct ifg_list *ifgl; 1139 struct ifg_group *ifg; 1140 struct ifg_member *ifgm; 1141 1142 if (groupname[0] && 1143 groupname[strlen(groupname) - 1] >= '0' && 1144 groupname[strlen(groupname) - 1] <= '9') 1145 return (EINVAL); 1146 1147 ifgroup_lockmgr(LK_SHARED); 1148 1149 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1150 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) { 1151 ifgroup_lockmgr(LK_RELEASE); 1152 return (EEXIST); 1153 } 1154 } 1155 1156 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) { 1157 if (strcmp(ifg->ifg_group, groupname) == 0) 1158 break; 1159 } 1160 1161 ifgroup_lockmgr(LK_RELEASE); 1162 1163 if (ifg == NULL) 1164 ifg = if_creategroup(groupname); 1165 1166 ifgl = kmalloc(sizeof(*ifgl), M_IFNET, M_WAITOK); 1167 ifgm = kmalloc(sizeof(*ifgm), M_IFNET, M_WAITOK); 1168 ifgl->ifgl_group = ifg; 1169 ifgm->ifgm_ifp = ifp; 1170 ifg->ifg_refcnt++; 1171 1172 ifgroup_lockmgr(LK_EXCLUSIVE); 1173 TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); 1174 TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); 1175 ifgroup_lockmgr(LK_RELEASE); 1176 1177 EVENTHANDLER_INVOKE(group_change_event, groupname); 1178 1179 return (0); 1180 } 1181 1182 /* 1183 * Remove the interface from a group. 1184 * The group will be destroyed if it becomes empty. 1185 * 1186 * The 'ifgroup_lock' must be hold exclusively when calling this. 1187 */ 1188 static int 1189 if_delgroup_locked(struct ifnet *ifp, const char *groupname) 1190 { 1191 struct ifg_list *ifgl; 1192 struct ifg_member *ifgm; 1193 1194 KKASSERT(lockstatus(&ifgroup_lock, curthread) == LK_EXCLUSIVE); 1195 1196 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1197 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) 1198 break; 1199 } 1200 if (ifgl == NULL) 1201 return (ENOENT); 1202 1203 TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); 1204 1205 TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) { 1206 if (ifgm->ifgm_ifp == ifp) 1207 break; 1208 } 1209 1210 if (ifgm != NULL) { 1211 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); 1212 1213 ifgroup_lockmgr(LK_RELEASE); 1214 EVENTHANDLER_INVOKE(group_change_event, groupname); 1215 ifgroup_lockmgr(LK_EXCLUSIVE); 1216 1217 kfree(ifgm, M_IFNET); 1218 ifgl->ifgl_group->ifg_refcnt--; 1219 } 1220 1221 if (ifgl->ifgl_group->ifg_refcnt == 0) { 1222 ifgroup_lockmgr(LK_RELEASE); 1223 if_destroygroup(ifgl->ifgl_group); 1224 ifgroup_lockmgr(LK_EXCLUSIVE); 1225 } 1226 1227 kfree(ifgl, M_IFNET); 1228 1229 return (0); 1230 } 1231 1232 int 1233 if_delgroup(struct ifnet *ifp, const char *groupname) 1234 { 1235 int error; 1236 1237 ifgroup_lockmgr(LK_EXCLUSIVE); 1238 error = if_delgroup_locked(ifp, groupname); 1239 ifgroup_lockmgr(LK_RELEASE); 1240 1241 return (error); 1242 } 1243 1244 /* 1245 * Store all the groups that the interface belongs to in memory 1246 * pointed to by data. 1247 */ 1248 static int 1249 if_getgroups(struct ifgroupreq *ifgr, struct ifnet *ifp) 1250 { 1251 struct ifg_list *ifgl; 1252 struct ifg_req *ifgrq, *p; 1253 int len, error; 1254 1255 len = 0; 1256 ifgroup_lockmgr(LK_SHARED); 1257 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1258 len += sizeof(struct ifg_req); 1259 ifgroup_lockmgr(LK_RELEASE); 1260 1261 if (ifgr->ifgr_len == 0) { 1262 /* 1263 * Caller is asking how much memory should be allocated in 1264 * the next request in order to hold all the groups. 1265 */ 1266 ifgr->ifgr_len = len; 1267 return (0); 1268 } else if (ifgr->ifgr_len != len) { 1269 return (EINVAL); 1270 } 1271 1272 ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO); 1273 if (ifgrq == NULL) 1274 return (ENOMEM); 1275 1276 ifgroup_lockmgr(LK_SHARED); 1277 p = ifgrq; 1278 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1279 if (len < sizeof(struct ifg_req)) { 1280 ifgroup_lockmgr(LK_RELEASE); 1281 error = EINVAL; 1282 goto failed; 1283 } 1284 1285 strlcpy(p->ifgrq_group, ifgl->ifgl_group->ifg_group, 1286 sizeof(ifgrq->ifgrq_group)); 1287 len -= sizeof(struct ifg_req); 1288 p++; 1289 } 1290 ifgroup_lockmgr(LK_RELEASE); 1291 1292 error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len); 1293 failed: 1294 kfree(ifgrq, M_TEMP); 1295 return error; 1296 } 1297 1298 /* 1299 * Store all the members of a group in memory pointed to by data. 1300 */ 1301 static int 1302 if_getgroupmembers(struct ifgroupreq *ifgr) 1303 { 1304 struct ifg_group *ifg; 1305 struct ifg_member *ifgm; 1306 struct ifg_req *ifgrq, *p; 1307 int len, error; 1308 1309 ifgroup_lockmgr(LK_SHARED); 1310 1311 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) { 1312 if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0) 1313 break; 1314 } 1315 if (ifg == NULL) { 1316 ifgroup_lockmgr(LK_RELEASE); 1317 return (ENOENT); 1318 } 1319 1320 len = 0; 1321 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) 1322 len += sizeof(struct ifg_req); 1323 1324 ifgroup_lockmgr(LK_RELEASE); 1325 1326 if (ifgr->ifgr_len == 0) { 1327 ifgr->ifgr_len = len; 1328 return (0); 1329 } else if (ifgr->ifgr_len != len) { 1330 return (EINVAL); 1331 } 1332 1333 ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO); 1334 if (ifgrq == NULL) 1335 return (ENOMEM); 1336 1337 ifgroup_lockmgr(LK_SHARED); 1338 p = ifgrq; 1339 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { 1340 if (len < sizeof(struct ifg_req)) { 1341 ifgroup_lockmgr(LK_RELEASE); 1342 error = EINVAL; 1343 goto failed; 1344 } 1345 1346 strlcpy(p->ifgrq_member, ifgm->ifgm_ifp->if_xname, 1347 sizeof(p->ifgrq_member)); 1348 len -= sizeof(struct ifg_req); 1349 p++; 1350 } 1351 ifgroup_lockmgr(LK_RELEASE); 1352 1353 error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len); 1354 failed: 1355 kfree(ifgrq, M_TEMP); 1356 return error; 1357 } 1358 1359 /* 1360 * Delete Routes for a Network Interface 1361 * 1362 * Called for each routing entry via the rnh->rnh_walktree() call above 1363 * to delete all route entries referencing a detaching network interface. 1364 * 1365 * Arguments: 1366 * rn pointer to node in the routing table 1367 * arg argument passed to rnh->rnh_walktree() - detaching interface 1368 * 1369 * Returns: 1370 * 0 successful 1371 * errno failed - reason indicated 1372 * 1373 */ 1374 static int 1375 if_rtdel(struct radix_node *rn, void *arg) 1376 { 1377 struct rtentry *rt = (struct rtentry *)rn; 1378 struct ifnet *ifp = arg; 1379 int err; 1380 1381 if (rt->rt_ifp == ifp) { 1382 1383 /* 1384 * Protect (sorta) against walktree recursion problems 1385 * with cloned routes 1386 */ 1387 if (!(rt->rt_flags & RTF_UP)) 1388 return (0); 1389 1390 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, 1391 rt_mask(rt), rt->rt_flags, 1392 NULL); 1393 if (err) { 1394 log(LOG_WARNING, "if_rtdel: error %d\n", err); 1395 } 1396 } 1397 1398 return (0); 1399 } 1400 1401 static __inline boolean_t 1402 ifa_prefer(const struct ifaddr *cur_ifa, const struct ifaddr *old_ifa) 1403 { 1404 if (old_ifa == NULL) 1405 return TRUE; 1406 1407 if ((old_ifa->ifa_ifp->if_flags & IFF_UP) == 0 && 1408 (cur_ifa->ifa_ifp->if_flags & IFF_UP)) 1409 return TRUE; 1410 if ((old_ifa->ifa_flags & IFA_ROUTE) == 0 && 1411 (cur_ifa->ifa_flags & IFA_ROUTE)) 1412 return TRUE; 1413 return FALSE; 1414 } 1415 1416 /* 1417 * Locate an interface based on a complete address. 1418 */ 1419 struct ifaddr * 1420 ifa_ifwithaddr(struct sockaddr *addr) 1421 { 1422 const struct ifnet_array *arr; 1423 int i; 1424 1425 arr = ifnet_array_get(); 1426 for (i = 0; i < arr->ifnet_count; ++i) { 1427 struct ifnet *ifp = arr->ifnet_arr[i]; 1428 struct ifaddr_container *ifac; 1429 1430 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1431 struct ifaddr *ifa = ifac->ifa; 1432 1433 if (ifa->ifa_addr->sa_family != addr->sa_family) 1434 continue; 1435 if (sa_equal(addr, ifa->ifa_addr)) 1436 return (ifa); 1437 if ((ifp->if_flags & IFF_BROADCAST) && 1438 ifa->ifa_broadaddr && 1439 /* IPv6 doesn't have broadcast */ 1440 ifa->ifa_broadaddr->sa_len != 0 && 1441 sa_equal(ifa->ifa_broadaddr, addr)) 1442 return (ifa); 1443 } 1444 } 1445 return (NULL); 1446 } 1447 1448 /* 1449 * Locate the point to point interface with a given destination address. 1450 */ 1451 struct ifaddr * 1452 ifa_ifwithdstaddr(struct sockaddr *addr) 1453 { 1454 const struct ifnet_array *arr; 1455 int i; 1456 1457 arr = ifnet_array_get(); 1458 for (i = 0; i < arr->ifnet_count; ++i) { 1459 struct ifnet *ifp = arr->ifnet_arr[i]; 1460 struct ifaddr_container *ifac; 1461 1462 if (!(ifp->if_flags & IFF_POINTOPOINT)) 1463 continue; 1464 1465 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1466 struct ifaddr *ifa = ifac->ifa; 1467 1468 if (ifa->ifa_addr->sa_family != addr->sa_family) 1469 continue; 1470 if (ifa->ifa_dstaddr && 1471 sa_equal(addr, ifa->ifa_dstaddr)) 1472 return (ifa); 1473 } 1474 } 1475 return (NULL); 1476 } 1477 1478 /* 1479 * Find an interface on a specific network. If many, choice 1480 * is most specific found. 1481 */ 1482 struct ifaddr * 1483 ifa_ifwithnet(struct sockaddr *addr) 1484 { 1485 struct ifaddr *ifa_maybe = NULL; 1486 u_int af = addr->sa_family; 1487 char *addr_data = addr->sa_data, *cplim; 1488 const struct ifnet_array *arr; 1489 int i; 1490 1491 /* 1492 * AF_LINK addresses can be looked up directly by their index number, 1493 * so do that if we can. 1494 */ 1495 if (af == AF_LINK) { 1496 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; 1497 1498 if (sdl->sdl_index && sdl->sdl_index <= if_index) 1499 return (ifindex2ifnet[sdl->sdl_index]->if_lladdr); 1500 } 1501 1502 /* 1503 * Scan though each interface, looking for ones that have 1504 * addresses in this address family. 1505 */ 1506 arr = ifnet_array_get(); 1507 for (i = 0; i < arr->ifnet_count; ++i) { 1508 struct ifnet *ifp = arr->ifnet_arr[i]; 1509 struct ifaddr_container *ifac; 1510 1511 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1512 struct ifaddr *ifa = ifac->ifa; 1513 char *cp, *cp2, *cp3; 1514 1515 if (ifa->ifa_addr->sa_family != af) 1516 next: continue; 1517 if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) { 1518 /* 1519 * This is a bit broken as it doesn't 1520 * take into account that the remote end may 1521 * be a single node in the network we are 1522 * looking for. 1523 * The trouble is that we don't know the 1524 * netmask for the remote end. 1525 */ 1526 if (ifa->ifa_dstaddr != NULL && 1527 sa_equal(addr, ifa->ifa_dstaddr)) 1528 return (ifa); 1529 } else { 1530 /* 1531 * if we have a special address handler, 1532 * then use it instead of the generic one. 1533 */ 1534 if (ifa->ifa_claim_addr) { 1535 if ((*ifa->ifa_claim_addr)(ifa, addr)) { 1536 return (ifa); 1537 } else { 1538 continue; 1539 } 1540 } 1541 1542 /* 1543 * Scan all the bits in the ifa's address. 1544 * If a bit dissagrees with what we are 1545 * looking for, mask it with the netmask 1546 * to see if it really matters. 1547 * (A byte at a time) 1548 */ 1549 if (ifa->ifa_netmask == 0) 1550 continue; 1551 cp = addr_data; 1552 cp2 = ifa->ifa_addr->sa_data; 1553 cp3 = ifa->ifa_netmask->sa_data; 1554 cplim = ifa->ifa_netmask->sa_len + 1555 (char *)ifa->ifa_netmask; 1556 while (cp3 < cplim) 1557 if ((*cp++ ^ *cp2++) & *cp3++) 1558 goto next; /* next address! */ 1559 /* 1560 * If the netmask of what we just found 1561 * is more specific than what we had before 1562 * (if we had one) then remember the new one 1563 * before continuing to search for an even 1564 * better one. If the netmasks are equal, 1565 * we prefer the this ifa based on the result 1566 * of ifa_prefer(). 1567 */ 1568 if (ifa_maybe == NULL || 1569 rn_refines((char *)ifa->ifa_netmask, 1570 (char *)ifa_maybe->ifa_netmask) || 1571 (sa_equal(ifa_maybe->ifa_netmask, 1572 ifa->ifa_netmask) && 1573 ifa_prefer(ifa, ifa_maybe))) 1574 ifa_maybe = ifa; 1575 } 1576 } 1577 } 1578 return (ifa_maybe); 1579 } 1580 1581 /* 1582 * Find an interface address specific to an interface best matching 1583 * a given address. 1584 */ 1585 struct ifaddr * 1586 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp) 1587 { 1588 struct ifaddr_container *ifac; 1589 char *cp, *cp2, *cp3; 1590 char *cplim; 1591 struct ifaddr *ifa_maybe = NULL; 1592 u_int af = addr->sa_family; 1593 1594 if (af >= AF_MAX) 1595 return (0); 1596 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1597 struct ifaddr *ifa = ifac->ifa; 1598 1599 if (ifa->ifa_addr->sa_family != af) 1600 continue; 1601 if (ifa_maybe == NULL) 1602 ifa_maybe = ifa; 1603 if (ifa->ifa_netmask == NULL) { 1604 if (sa_equal(addr, ifa->ifa_addr) || 1605 (ifa->ifa_dstaddr != NULL && 1606 sa_equal(addr, ifa->ifa_dstaddr))) 1607 return (ifa); 1608 continue; 1609 } 1610 if (ifp->if_flags & IFF_POINTOPOINT) { 1611 if (sa_equal(addr, ifa->ifa_dstaddr)) 1612 return (ifa); 1613 } else { 1614 cp = addr->sa_data; 1615 cp2 = ifa->ifa_addr->sa_data; 1616 cp3 = ifa->ifa_netmask->sa_data; 1617 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; 1618 for (; cp3 < cplim; cp3++) 1619 if ((*cp++ ^ *cp2++) & *cp3) 1620 break; 1621 if (cp3 == cplim) 1622 return (ifa); 1623 } 1624 } 1625 return (ifa_maybe); 1626 } 1627 1628 /* 1629 * Default action when installing a route with a Link Level gateway. 1630 * Lookup an appropriate real ifa to point to. 1631 * This should be moved to /sys/net/link.c eventually. 1632 */ 1633 static void 1634 link_rtrequest(int cmd, struct rtentry *rt) 1635 { 1636 struct ifaddr *ifa; 1637 struct sockaddr *dst; 1638 struct ifnet *ifp; 1639 1640 if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL || 1641 (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL) 1642 return; 1643 ifa = ifaof_ifpforaddr(dst, ifp); 1644 if (ifa != NULL) { 1645 IFAFREE(rt->rt_ifa); 1646 IFAREF(ifa); 1647 rt->rt_ifa = ifa; 1648 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) 1649 ifa->ifa_rtrequest(cmd, rt); 1650 } 1651 } 1652 1653 struct netmsg_if { 1654 struct netmsg_base base; 1655 struct ifnet *ifp; 1656 }; 1657 1658 /* 1659 * Mark an interface down and notify protocols of the transition. 1660 */ 1661 static void 1662 if_down_dispatch(netmsg_t nmsg) 1663 { 1664 struct netmsg_if *msg = (struct netmsg_if *)nmsg; 1665 struct ifnet *ifp = msg->ifp; 1666 struct ifaddr_container *ifac; 1667 struct domain *dp; 1668 1669 ASSERT_NETISR0; 1670 1671 ifp->if_flags &= ~IFF_UP; 1672 getmicrotime(&ifp->if_lastchange); 1673 rt_ifmsg(ifp); 1674 1675 /* 1676 * The ifaddr processing in the following loop will block, 1677 * however, this function is called in netisr0, in which 1678 * ifaddr list changes happen, so we don't care about the 1679 * blockness of the ifaddr processing here. 1680 */ 1681 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1682 struct ifaddr *ifa = ifac->ifa; 1683 1684 /* Ignore marker */ 1685 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 1686 continue; 1687 1688 kpfctlinput(PRC_IFDOWN, ifa->ifa_addr); 1689 } 1690 1691 SLIST_FOREACH(dp, &domains, dom_next) 1692 if (dp->dom_if_down != NULL) 1693 dp->dom_if_down(ifp); 1694 1695 ifq_purge_all(&ifp->if_snd); 1696 netisr_replymsg(&nmsg->base, 0); 1697 } 1698 1699 /* 1700 * Mark an interface up and notify protocols of the transition. 1701 */ 1702 static void 1703 if_up_dispatch(netmsg_t nmsg) 1704 { 1705 struct netmsg_if *msg = (struct netmsg_if *)nmsg; 1706 struct ifnet *ifp = msg->ifp; 1707 struct ifaddr_container *ifac; 1708 struct domain *dp; 1709 1710 ASSERT_NETISR0; 1711 1712 ifq_purge_all(&ifp->if_snd); 1713 ifp->if_flags |= IFF_UP; 1714 getmicrotime(&ifp->if_lastchange); 1715 rt_ifmsg(ifp); 1716 1717 /* 1718 * The ifaddr processing in the following loop will block, 1719 * however, this function is called in netisr0, in which 1720 * ifaddr list changes happen, so we don't care about the 1721 * blockness of the ifaddr processing here. 1722 */ 1723 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1724 struct ifaddr *ifa = ifac->ifa; 1725 1726 /* Ignore marker */ 1727 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 1728 continue; 1729 1730 kpfctlinput(PRC_IFUP, ifa->ifa_addr); 1731 } 1732 1733 SLIST_FOREACH(dp, &domains, dom_next) 1734 if (dp->dom_if_up != NULL) 1735 dp->dom_if_up(ifp); 1736 1737 netisr_replymsg(&nmsg->base, 0); 1738 } 1739 1740 /* 1741 * Mark an interface down and notify protocols of the transition. An 1742 * interface going down is also considered to be a synchronizing event. 1743 * We must ensure that all packet processing related to the interface 1744 * has completed before we return so e.g. the caller can free the ifnet 1745 * structure that the mbufs may be referencing. 1746 * 1747 * NOTE: must be called at splnet or eqivalent. 1748 */ 1749 void 1750 if_down(struct ifnet *ifp) 1751 { 1752 struct netmsg_if msg; 1753 1754 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN); 1755 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, 1756 if_down_dispatch); 1757 msg.ifp = ifp; 1758 netisr_domsg(&msg.base, 0); 1759 netmsg_service_sync(); 1760 } 1761 1762 /* 1763 * Mark an interface up and notify protocols of 1764 * the transition. 1765 * NOTE: must be called at splnet or eqivalent. 1766 */ 1767 void 1768 if_up(struct ifnet *ifp) 1769 { 1770 struct netmsg_if msg; 1771 1772 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, 1773 if_up_dispatch); 1774 msg.ifp = ifp; 1775 netisr_domsg(&msg.base, 0); 1776 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP); 1777 } 1778 1779 /* 1780 * Process a link state change. 1781 * NOTE: must be called at splsoftnet or equivalent. 1782 */ 1783 void 1784 if_link_state_change(struct ifnet *ifp) 1785 { 1786 int link_state = ifp->if_link_state; 1787 1788 rt_ifmsg(ifp); 1789 devctl_notify("IFNET", ifp->if_xname, 1790 (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); 1791 1792 EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state); 1793 } 1794 1795 /* 1796 * Handle interface watchdog timer routines. Called 1797 * from softclock, we decrement timers (if set) and 1798 * call the appropriate interface routine on expiration. 1799 */ 1800 static void 1801 if_slowtimo_dispatch(netmsg_t nmsg) 1802 { 1803 struct globaldata *gd = mycpu; 1804 const struct ifnet_array *arr; 1805 int i; 1806 1807 ASSERT_NETISR0; 1808 1809 crit_enter_gd(gd); 1810 lwkt_replymsg(&nmsg->lmsg, 0); /* reply ASAP */ 1811 crit_exit_gd(gd); 1812 1813 arr = ifnet_array_get(); 1814 for (i = 0; i < arr->ifnet_count; ++i) { 1815 struct ifnet *ifp = arr->ifnet_arr[i]; 1816 1817 crit_enter_gd(gd); 1818 1819 if (if_stats_compat) { 1820 IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets); 1821 IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors); 1822 IFNET_STAT_GET(ifp, opackets, ifp->if_opackets); 1823 IFNET_STAT_GET(ifp, oerrors, ifp->if_oerrors); 1824 IFNET_STAT_GET(ifp, collisions, ifp->if_collisions); 1825 IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes); 1826 IFNET_STAT_GET(ifp, obytes, ifp->if_obytes); 1827 IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts); 1828 IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts); 1829 IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops); 1830 IFNET_STAT_GET(ifp, noproto, ifp->if_noproto); 1831 IFNET_STAT_GET(ifp, oqdrops, ifp->if_oqdrops); 1832 } 1833 1834 if (ifp->if_timer == 0 || --ifp->if_timer) { 1835 crit_exit_gd(gd); 1836 continue; 1837 } 1838 if (ifp->if_watchdog) { 1839 if (ifnet_tryserialize_all(ifp)) { 1840 (*ifp->if_watchdog)(ifp); 1841 ifnet_deserialize_all(ifp); 1842 } else { 1843 /* try again next timeout */ 1844 ++ifp->if_timer; 1845 } 1846 } 1847 1848 crit_exit_gd(gd); 1849 } 1850 1851 callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL); 1852 } 1853 1854 static void 1855 if_slowtimo(void *arg __unused) 1856 { 1857 struct lwkt_msg *lmsg = &if_slowtimo_netmsg.lmsg; 1858 1859 KASSERT(mycpuid == 0, ("not on cpu0")); 1860 crit_enter(); 1861 if (lmsg->ms_flags & MSGF_DONE) 1862 lwkt_sendmsg_oncpu(netisr_cpuport(0), lmsg); 1863 crit_exit(); 1864 } 1865 1866 /* 1867 * Map interface name to 1868 * interface structure pointer. 1869 */ 1870 struct ifnet * 1871 ifunit(const char *name) 1872 { 1873 struct ifnet *ifp; 1874 1875 /* 1876 * Search all the interfaces for this name/number 1877 */ 1878 KASSERT(mtx_owned(&ifnet_mtx), ("ifnet is not locked")); 1879 1880 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 1881 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1882 break; 1883 } 1884 return (ifp); 1885 } 1886 1887 struct ifnet * 1888 ifunit_netisr(const char *name) 1889 { 1890 const struct ifnet_array *arr; 1891 int i; 1892 1893 /* 1894 * Search all the interfaces for this name/number 1895 */ 1896 1897 arr = ifnet_array_get(); 1898 for (i = 0; i < arr->ifnet_count; ++i) { 1899 struct ifnet *ifp = arr->ifnet_arr[i]; 1900 1901 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1902 return ifp; 1903 } 1904 return NULL; 1905 } 1906 1907 /* 1908 * Interface ioctls. 1909 */ 1910 int 1911 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred) 1912 { 1913 struct ifnet *ifp; 1914 struct ifgroupreq *ifgr; 1915 struct ifreq *ifr; 1916 struct ifstat *ifs; 1917 int error, do_ifup = 0; 1918 short oif_flags; 1919 int new_flags; 1920 size_t namelen, onamelen; 1921 size_t descrlen; 1922 char *descrbuf, *odescrbuf; 1923 char new_name[IFNAMSIZ]; 1924 struct ifaddr *ifa; 1925 struct sockaddr_dl *sdl; 1926 1927 switch (cmd) { 1928 case SIOCGIFCONF: 1929 return (ifconf(cmd, data, cred)); 1930 default: 1931 break; 1932 } 1933 1934 ifr = (struct ifreq *)data; 1935 1936 switch (cmd) { 1937 case SIOCIFCREATE: 1938 case SIOCIFCREATE2: 1939 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 1940 return (error); 1941 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), 1942 (cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL), NULL)); 1943 case SIOCIFDESTROY: 1944 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 1945 return (error); 1946 return (if_clone_destroy(ifr->ifr_name)); 1947 case SIOCIFGCLONERS: 1948 return (if_clone_list((struct if_clonereq *)data)); 1949 case SIOCGIFGMEMB: 1950 return (if_getgroupmembers((struct ifgroupreq *)data)); 1951 default: 1952 break; 1953 } 1954 1955 /* 1956 * Nominal ioctl through interface, lookup the ifp and obtain a 1957 * lock to serialize the ifconfig ioctl operation. 1958 */ 1959 ifnet_lock(); 1960 1961 ifp = ifunit(ifr->ifr_name); 1962 if (ifp == NULL) { 1963 ifnet_unlock(); 1964 return (ENXIO); 1965 } 1966 error = 0; 1967 1968 switch (cmd) { 1969 case SIOCGIFINDEX: 1970 ifr->ifr_index = ifp->if_index; 1971 break; 1972 1973 case SIOCGIFFLAGS: 1974 ifr->ifr_flags = ifp->if_flags; 1975 ifr->ifr_flagshigh = ifp->if_flags >> 16; 1976 break; 1977 1978 case SIOCGIFCAP: 1979 ifr->ifr_reqcap = ifp->if_capabilities; 1980 ifr->ifr_curcap = ifp->if_capenable; 1981 break; 1982 1983 case SIOCGIFMETRIC: 1984 ifr->ifr_metric = ifp->if_metric; 1985 break; 1986 1987 case SIOCGIFMTU: 1988 ifr->ifr_mtu = ifp->if_mtu; 1989 break; 1990 1991 case SIOCGIFTSOLEN: 1992 ifr->ifr_tsolen = ifp->if_tsolen; 1993 break; 1994 1995 case SIOCGIFDATA: 1996 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data, 1997 sizeof(ifp->if_data)); 1998 break; 1999 2000 case SIOCGIFPHYS: 2001 ifr->ifr_phys = ifp->if_physical; 2002 break; 2003 2004 case SIOCGIFPOLLCPU: 2005 ifr->ifr_pollcpu = -1; 2006 break; 2007 2008 case SIOCSIFPOLLCPU: 2009 break; 2010 2011 case SIOCGIFDESCR: 2012 error = 0; 2013 ifnet_lock(); 2014 if (ifp->if_description == NULL) { 2015 ifr->ifr_buffer.length = 0; 2016 error = ENOMSG; 2017 } else { 2018 /* space for terminating nul */ 2019 descrlen = strlen(ifp->if_description) + 1; 2020 if (ifr->ifr_buffer.length < descrlen) 2021 error = ENAMETOOLONG; 2022 else 2023 error = copyout(ifp->if_description, 2024 ifr->ifr_buffer.buffer, descrlen); 2025 ifr->ifr_buffer.length = descrlen; 2026 } 2027 ifnet_unlock(); 2028 break; 2029 2030 case SIOCSIFDESCR: 2031 error = priv_check_cred(cred, PRIV_ROOT, 0); 2032 if (error) 2033 break; 2034 2035 /* 2036 * Copy only (length-1) bytes to make sure that 2037 * if_description is always nul terminated. The 2038 * length parameter is supposed to count the 2039 * terminating nul in. 2040 */ 2041 if (ifr->ifr_buffer.length > ifdescr_maxlen) 2042 return (ENAMETOOLONG); 2043 else if (ifr->ifr_buffer.length == 0) 2044 descrbuf = NULL; 2045 else { 2046 descrbuf = kmalloc(ifr->ifr_buffer.length, M_IFDESCR, 2047 M_WAITOK | M_ZERO); 2048 error = copyin(ifr->ifr_buffer.buffer, descrbuf, 2049 ifr->ifr_buffer.length - 1); 2050 if (error) { 2051 kfree(descrbuf, M_IFDESCR); 2052 break; 2053 } 2054 } 2055 2056 ifnet_lock(); 2057 odescrbuf = ifp->if_description; 2058 ifp->if_description = descrbuf; 2059 ifnet_unlock(); 2060 2061 if (odescrbuf) 2062 kfree(odescrbuf, M_IFDESCR); 2063 2064 case SIOCSIFFLAGS: 2065 error = priv_check_cred(cred, PRIV_ROOT, 0); 2066 if (error) 2067 break; 2068 new_flags = (ifr->ifr_flags & 0xffff) | 2069 (ifr->ifr_flagshigh << 16); 2070 if (ifp->if_flags & IFF_SMART) { 2071 /* Smart drivers twiddle their own routes */ 2072 } else if (ifp->if_flags & IFF_UP && 2073 (new_flags & IFF_UP) == 0) { 2074 if_down(ifp); 2075 } else if (new_flags & IFF_UP && 2076 (ifp->if_flags & IFF_UP) == 0) { 2077 do_ifup = 1; 2078 } 2079 2080 #ifdef IFPOLL_ENABLE 2081 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) { 2082 if (new_flags & IFF_NPOLLING) 2083 ifpoll_register(ifp); 2084 else 2085 ifpoll_deregister(ifp); 2086 } 2087 #endif 2088 2089 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | 2090 (new_flags &~ IFF_CANTCHANGE); 2091 if (new_flags & IFF_PPROMISC) { 2092 /* Permanently promiscuous mode requested */ 2093 ifp->if_flags |= IFF_PROMISC; 2094 } else if (ifp->if_pcount == 0) { 2095 ifp->if_flags &= ~IFF_PROMISC; 2096 } 2097 if (ifp->if_ioctl) { 2098 ifnet_serialize_all(ifp); 2099 ifp->if_ioctl(ifp, cmd, data, cred); 2100 ifnet_deserialize_all(ifp); 2101 } 2102 if (do_ifup) 2103 if_up(ifp); 2104 getmicrotime(&ifp->if_lastchange); 2105 break; 2106 2107 case SIOCSIFCAP: 2108 error = priv_check_cred(cred, PRIV_ROOT, 0); 2109 if (error) 2110 break; 2111 if (ifr->ifr_reqcap & ~ifp->if_capabilities) { 2112 error = EINVAL; 2113 break; 2114 } 2115 ifnet_serialize_all(ifp); 2116 ifp->if_ioctl(ifp, cmd, data, cred); 2117 ifnet_deserialize_all(ifp); 2118 break; 2119 2120 case SIOCSIFNAME: 2121 error = priv_check_cred(cred, PRIV_ROOT, 0); 2122 if (error) 2123 break; 2124 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL); 2125 if (error) 2126 break; 2127 if (new_name[0] == '\0') { 2128 error = EINVAL; 2129 break; 2130 } 2131 if (ifunit(new_name) != NULL) { 2132 error = EEXIST; 2133 break; 2134 } 2135 2136 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 2137 2138 /* Announce the departure of the interface. */ 2139 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 2140 2141 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); 2142 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 2143 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 2144 namelen = strlen(new_name); 2145 onamelen = sdl->sdl_nlen; 2146 /* 2147 * Move the address if needed. This is safe because we 2148 * allocate space for a name of length IFNAMSIZ when we 2149 * create this in if_attach(). 2150 */ 2151 if (namelen != onamelen) { 2152 bcopy(sdl->sdl_data + onamelen, 2153 sdl->sdl_data + namelen, sdl->sdl_alen); 2154 } 2155 bcopy(new_name, sdl->sdl_data, namelen); 2156 sdl->sdl_nlen = namelen; 2157 sdl = (struct sockaddr_dl *)ifa->ifa_netmask; 2158 bzero(sdl->sdl_data, onamelen); 2159 while (namelen != 0) 2160 sdl->sdl_data[--namelen] = 0xff; 2161 2162 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 2163 2164 /* Announce the return of the interface. */ 2165 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 2166 break; 2167 2168 case SIOCSIFMETRIC: 2169 error = priv_check_cred(cred, PRIV_ROOT, 0); 2170 if (error) 2171 break; 2172 ifp->if_metric = ifr->ifr_metric; 2173 getmicrotime(&ifp->if_lastchange); 2174 break; 2175 2176 case SIOCSIFPHYS: 2177 error = priv_check_cred(cred, PRIV_ROOT, 0); 2178 if (error) 2179 break; 2180 if (ifp->if_ioctl == NULL) { 2181 error = EOPNOTSUPP; 2182 break; 2183 } 2184 ifnet_serialize_all(ifp); 2185 error = ifp->if_ioctl(ifp, cmd, data, cred); 2186 ifnet_deserialize_all(ifp); 2187 if (error == 0) 2188 getmicrotime(&ifp->if_lastchange); 2189 break; 2190 2191 case SIOCSIFMTU: 2192 { 2193 u_long oldmtu = ifp->if_mtu; 2194 2195 error = priv_check_cred(cred, PRIV_ROOT, 0); 2196 if (error) 2197 break; 2198 if (ifp->if_ioctl == NULL) { 2199 error = EOPNOTSUPP; 2200 break; 2201 } 2202 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) { 2203 error = EINVAL; 2204 break; 2205 } 2206 ifnet_serialize_all(ifp); 2207 error = ifp->if_ioctl(ifp, cmd, data, cred); 2208 ifnet_deserialize_all(ifp); 2209 if (error == 0) { 2210 getmicrotime(&ifp->if_lastchange); 2211 rt_ifmsg(ifp); 2212 } 2213 /* 2214 * If the link MTU changed, do network layer specific procedure. 2215 */ 2216 if (ifp->if_mtu != oldmtu) { 2217 #ifdef INET6 2218 nd6_setmtu(ifp); 2219 #endif 2220 } 2221 break; 2222 } 2223 2224 case SIOCSIFTSOLEN: 2225 error = priv_check_cred(cred, PRIV_ROOT, 0); 2226 if (error) 2227 break; 2228 2229 /* XXX need driver supplied upper limit */ 2230 if (ifr->ifr_tsolen <= 0) { 2231 error = EINVAL; 2232 break; 2233 } 2234 ifp->if_tsolen = ifr->ifr_tsolen; 2235 break; 2236 2237 case SIOCADDMULTI: 2238 case SIOCDELMULTI: 2239 error = priv_check_cred(cred, PRIV_ROOT, 0); 2240 if (error) 2241 break; 2242 2243 /* Don't allow group membership on non-multicast interfaces. */ 2244 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2245 error = EOPNOTSUPP; 2246 break; 2247 } 2248 2249 /* Don't let users screw up protocols' entries. */ 2250 if (ifr->ifr_addr.sa_family != AF_LINK) { 2251 error = EINVAL; 2252 break; 2253 } 2254 2255 if (cmd == SIOCADDMULTI) { 2256 struct ifmultiaddr *ifma; 2257 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); 2258 } else { 2259 error = if_delmulti(ifp, &ifr->ifr_addr); 2260 } 2261 if (error == 0) 2262 getmicrotime(&ifp->if_lastchange); 2263 break; 2264 2265 case SIOCSIFPHYADDR: 2266 case SIOCDIFPHYADDR: 2267 #ifdef INET6 2268 case SIOCSIFPHYADDR_IN6: 2269 #endif 2270 case SIOCSLIFPHYADDR: 2271 case SIOCSIFMEDIA: 2272 case SIOCSIFGENERIC: 2273 error = priv_check_cred(cred, PRIV_ROOT, 0); 2274 if (error) 2275 break; 2276 if (ifp->if_ioctl == NULL) { 2277 error = EOPNOTSUPP; 2278 break; 2279 } 2280 ifnet_serialize_all(ifp); 2281 error = ifp->if_ioctl(ifp, cmd, data, cred); 2282 ifnet_deserialize_all(ifp); 2283 if (error == 0) 2284 getmicrotime(&ifp->if_lastchange); 2285 break; 2286 2287 case SIOCGIFSTATUS: 2288 ifs = (struct ifstat *)data; 2289 ifs->ascii[0] = '\0'; 2290 /* fall through */ 2291 case SIOCGIFPSRCADDR: 2292 case SIOCGIFPDSTADDR: 2293 case SIOCGLIFPHYADDR: 2294 case SIOCGIFMEDIA: 2295 case SIOCGIFXMEDIA: 2296 case SIOCGIFGENERIC: 2297 if (ifp->if_ioctl == NULL) { 2298 error = EOPNOTSUPP; 2299 break; 2300 } 2301 ifnet_serialize_all(ifp); 2302 error = ifp->if_ioctl(ifp, cmd, data, cred); 2303 ifnet_deserialize_all(ifp); 2304 break; 2305 2306 case SIOCSIFLLADDR: 2307 error = priv_check_cred(cred, PRIV_ROOT, 0); 2308 if (error) 2309 break; 2310 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, 2311 ifr->ifr_addr.sa_len); 2312 EVENTHANDLER_INVOKE(iflladdr_event, ifp); 2313 break; 2314 2315 case SIOCAIFGROUP: 2316 ifgr = (struct ifgroupreq *)ifr; 2317 if ((error = priv_check_cred(cred, PRIV_NET_ADDIFGROUP, 0))) 2318 return (error); 2319 if ((error = if_addgroup(ifp, ifgr->ifgr_group))) 2320 return (error); 2321 break; 2322 2323 case SIOCDIFGROUP: 2324 ifgr = (struct ifgroupreq *)ifr; 2325 if ((error = priv_check_cred(cred, PRIV_NET_DELIFGROUP, 0))) 2326 return (error); 2327 if ((error = if_delgroup(ifp, ifgr->ifgr_group))) 2328 return (error); 2329 break; 2330 2331 case SIOCGIFGROUP: 2332 ifgr = (struct ifgroupreq *)ifr; 2333 if ((error = if_getgroups(ifgr, ifp))) 2334 return (error); 2335 break; 2336 2337 default: 2338 oif_flags = ifp->if_flags; 2339 if (so->so_proto == 0) { 2340 error = EOPNOTSUPP; 2341 break; 2342 } 2343 error = so_pru_control_direct(so, cmd, data, ifp); 2344 2345 /* 2346 * If the socket control method returns EOPNOTSUPP, pass the 2347 * request directly to the interface. 2348 * 2349 * Exclude the SIOCSIF{ADDR,BRDADDR,DSTADDR,NETMASK} ioctls, 2350 * because drivers may trust these ioctls to come from an 2351 * already privileged layer and thus do not perform credentials 2352 * checks or input validation. 2353 */ 2354 if (error == EOPNOTSUPP && 2355 ifp->if_ioctl != NULL && 2356 cmd != SIOCSIFADDR && 2357 cmd != SIOCSIFBRDADDR && 2358 cmd != SIOCSIFDSTADDR && 2359 cmd != SIOCSIFNETMASK) { 2360 ifnet_serialize_all(ifp); 2361 error = ifp->if_ioctl(ifp, cmd, data, cred); 2362 ifnet_deserialize_all(ifp); 2363 } 2364 2365 if ((oif_flags ^ ifp->if_flags) & IFF_UP) { 2366 #ifdef INET6 2367 DELAY(100);/* XXX: temporary workaround for fxp issue*/ 2368 if (ifp->if_flags & IFF_UP) { 2369 crit_enter(); 2370 in6_if_up(ifp); 2371 crit_exit(); 2372 } 2373 #endif 2374 } 2375 break; 2376 } 2377 2378 ifnet_unlock(); 2379 return (error); 2380 } 2381 2382 /* 2383 * Set/clear promiscuous mode on interface ifp based on the truth value 2384 * of pswitch. The calls are reference counted so that only the first 2385 * "on" request actually has an effect, as does the final "off" request. 2386 * Results are undefined if the "off" and "on" requests are not matched. 2387 */ 2388 int 2389 ifpromisc(struct ifnet *ifp, int pswitch) 2390 { 2391 struct ifreq ifr; 2392 int error; 2393 int oldflags; 2394 2395 oldflags = ifp->if_flags; 2396 if (ifp->if_flags & IFF_PPROMISC) { 2397 /* Do nothing if device is in permanently promiscuous mode */ 2398 ifp->if_pcount += pswitch ? 1 : -1; 2399 return (0); 2400 } 2401 if (pswitch) { 2402 /* 2403 * If the device is not configured up, we cannot put it in 2404 * promiscuous mode. 2405 */ 2406 if ((ifp->if_flags & IFF_UP) == 0) 2407 return (ENETDOWN); 2408 if (ifp->if_pcount++ != 0) 2409 return (0); 2410 ifp->if_flags |= IFF_PROMISC; 2411 log(LOG_INFO, "%s: promiscuous mode enabled\n", 2412 ifp->if_xname); 2413 } else { 2414 if (--ifp->if_pcount > 0) 2415 return (0); 2416 ifp->if_flags &= ~IFF_PROMISC; 2417 log(LOG_INFO, "%s: promiscuous mode disabled\n", 2418 ifp->if_xname); 2419 } 2420 ifr.ifr_flags = ifp->if_flags; 2421 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2422 ifnet_serialize_all(ifp); 2423 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL); 2424 ifnet_deserialize_all(ifp); 2425 if (error == 0) 2426 rt_ifmsg(ifp); 2427 else 2428 ifp->if_flags = oldflags; 2429 return error; 2430 } 2431 2432 /* 2433 * Return interface configuration 2434 * of system. List may be used 2435 * in later ioctl's (above) to get 2436 * other information. 2437 */ 2438 static int 2439 ifconf(u_long cmd, caddr_t data, struct ucred *cred) 2440 { 2441 struct ifconf *ifc = (struct ifconf *)data; 2442 struct ifnet *ifp; 2443 struct sockaddr *sa; 2444 struct ifreq ifr, *ifrp; 2445 int space = ifc->ifc_len, error = 0; 2446 2447 ifrp = ifc->ifc_req; 2448 2449 ifnet_lock(); 2450 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 2451 struct ifaddr_container *ifac, *ifac_mark; 2452 struct ifaddr_marker mark; 2453 struct ifaddrhead *head; 2454 int addrs; 2455 2456 if (space <= sizeof ifr) 2457 break; 2458 2459 /* 2460 * Zero the stack declared structure first to prevent 2461 * memory disclosure. 2462 */ 2463 bzero(&ifr, sizeof(ifr)); 2464 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) 2465 >= sizeof(ifr.ifr_name)) { 2466 error = ENAMETOOLONG; 2467 break; 2468 } 2469 2470 /* 2471 * Add a marker, since copyout() could block and during that 2472 * period the list could be changed. Inserting the marker to 2473 * the header of the list will not cause trouble for the code 2474 * assuming that the first element of the list is AF_LINK; the 2475 * marker will be moved to the next position w/o blocking. 2476 */ 2477 ifa_marker_init(&mark, ifp); 2478 ifac_mark = &mark.ifac; 2479 head = &ifp->if_addrheads[mycpuid]; 2480 2481 addrs = 0; 2482 TAILQ_INSERT_HEAD(head, ifac_mark, ifa_link); 2483 while ((ifac = TAILQ_NEXT(ifac_mark, ifa_link)) != NULL) { 2484 struct ifaddr *ifa = ifac->ifa; 2485 2486 TAILQ_REMOVE(head, ifac_mark, ifa_link); 2487 TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link); 2488 2489 /* Ignore marker */ 2490 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 2491 continue; 2492 2493 if (space <= sizeof ifr) 2494 break; 2495 sa = ifa->ifa_addr; 2496 if (cred->cr_prison && prison_if(cred, sa)) 2497 continue; 2498 addrs++; 2499 /* 2500 * Keep a reference on this ifaddr, so that it will 2501 * not be destroyed when its address is copied to 2502 * the userland, which could block. 2503 */ 2504 IFAREF(ifa); 2505 if (sa->sa_len <= sizeof(*sa)) { 2506 ifr.ifr_addr = *sa; 2507 error = copyout(&ifr, ifrp, sizeof ifr); 2508 ifrp++; 2509 } else { 2510 if (space < (sizeof ifr) + sa->sa_len - 2511 sizeof(*sa)) { 2512 IFAFREE(ifa); 2513 break; 2514 } 2515 space -= sa->sa_len - sizeof(*sa); 2516 error = copyout(&ifr, ifrp, 2517 sizeof ifr.ifr_name); 2518 if (error == 0) 2519 error = copyout(sa, &ifrp->ifr_addr, 2520 sa->sa_len); 2521 ifrp = (struct ifreq *) 2522 (sa->sa_len + (caddr_t)&ifrp->ifr_addr); 2523 } 2524 IFAFREE(ifa); 2525 if (error) 2526 break; 2527 space -= sizeof ifr; 2528 } 2529 TAILQ_REMOVE(head, ifac_mark, ifa_link); 2530 if (error) 2531 break; 2532 if (!addrs) { 2533 bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr); 2534 error = copyout(&ifr, ifrp, sizeof ifr); 2535 if (error) 2536 break; 2537 space -= sizeof ifr; 2538 ifrp++; 2539 } 2540 } 2541 ifnet_unlock(); 2542 2543 ifc->ifc_len -= space; 2544 return (error); 2545 } 2546 2547 /* 2548 * Just like if_promisc(), but for all-multicast-reception mode. 2549 */ 2550 int 2551 if_allmulti(struct ifnet *ifp, int onswitch) 2552 { 2553 int error = 0; 2554 struct ifreq ifr; 2555 2556 crit_enter(); 2557 2558 if (onswitch) { 2559 if (ifp->if_amcount++ == 0) { 2560 ifp->if_flags |= IFF_ALLMULTI; 2561 ifr.ifr_flags = ifp->if_flags; 2562 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2563 ifnet_serialize_all(ifp); 2564 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2565 NULL); 2566 ifnet_deserialize_all(ifp); 2567 } 2568 } else { 2569 if (ifp->if_amcount > 1) { 2570 ifp->if_amcount--; 2571 } else { 2572 ifp->if_amcount = 0; 2573 ifp->if_flags &= ~IFF_ALLMULTI; 2574 ifr.ifr_flags = ifp->if_flags; 2575 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2576 ifnet_serialize_all(ifp); 2577 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2578 NULL); 2579 ifnet_deserialize_all(ifp); 2580 } 2581 } 2582 2583 crit_exit(); 2584 2585 if (error == 0) 2586 rt_ifmsg(ifp); 2587 return error; 2588 } 2589 2590 /* 2591 * Add a multicast listenership to the interface in question. 2592 * The link layer provides a routine which converts 2593 */ 2594 int 2595 if_addmulti_serialized(struct ifnet *ifp, struct sockaddr *sa, 2596 struct ifmultiaddr **retifma) 2597 { 2598 struct sockaddr *llsa, *dupsa; 2599 int error; 2600 struct ifmultiaddr *ifma; 2601 2602 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2603 2604 /* 2605 * If the matching multicast address already exists 2606 * then don't add a new one, just add a reference 2607 */ 2608 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2609 if (sa_equal(sa, ifma->ifma_addr)) { 2610 ifma->ifma_refcount++; 2611 if (retifma) 2612 *retifma = ifma; 2613 return 0; 2614 } 2615 } 2616 2617 /* 2618 * Give the link layer a chance to accept/reject it, and also 2619 * find out which AF_LINK address this maps to, if it isn't one 2620 * already. 2621 */ 2622 if (ifp->if_resolvemulti) { 2623 error = ifp->if_resolvemulti(ifp, &llsa, sa); 2624 if (error) 2625 return error; 2626 } else { 2627 llsa = NULL; 2628 } 2629 2630 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT); 2631 dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_INTWAIT); 2632 bcopy(sa, dupsa, sa->sa_len); 2633 2634 ifma->ifma_addr = dupsa; 2635 ifma->ifma_lladdr = llsa; 2636 ifma->ifma_ifp = ifp; 2637 ifma->ifma_refcount = 1; 2638 ifma->ifma_protospec = NULL; 2639 rt_newmaddrmsg(RTM_NEWMADDR, ifma); 2640 2641 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2642 if (retifma) 2643 *retifma = ifma; 2644 2645 if (llsa != NULL) { 2646 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2647 if (sa_equal(ifma->ifma_addr, llsa)) 2648 break; 2649 } 2650 if (ifma) { 2651 ifma->ifma_refcount++; 2652 } else { 2653 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT); 2654 dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_INTWAIT); 2655 bcopy(llsa, dupsa, llsa->sa_len); 2656 ifma->ifma_addr = dupsa; 2657 ifma->ifma_ifp = ifp; 2658 ifma->ifma_refcount = 1; 2659 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2660 } 2661 } 2662 /* 2663 * We are certain we have added something, so call down to the 2664 * interface to let them know about it. 2665 */ 2666 if (ifp->if_ioctl) 2667 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL); 2668 2669 return 0; 2670 } 2671 2672 int 2673 if_addmulti(struct ifnet *ifp, struct sockaddr *sa, 2674 struct ifmultiaddr **retifma) 2675 { 2676 int error; 2677 2678 ifnet_serialize_all(ifp); 2679 error = if_addmulti_serialized(ifp, sa, retifma); 2680 ifnet_deserialize_all(ifp); 2681 2682 return error; 2683 } 2684 2685 /* 2686 * Remove a reference to a multicast address on this interface. Yell 2687 * if the request does not match an existing membership. 2688 */ 2689 static int 2690 if_delmulti_serialized(struct ifnet *ifp, struct sockaddr *sa) 2691 { 2692 struct ifmultiaddr *ifma; 2693 2694 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2695 2696 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2697 if (sa_equal(sa, ifma->ifma_addr)) 2698 break; 2699 if (ifma == NULL) 2700 return ENOENT; 2701 2702 if (ifma->ifma_refcount > 1) { 2703 ifma->ifma_refcount--; 2704 return 0; 2705 } 2706 2707 rt_newmaddrmsg(RTM_DELMADDR, ifma); 2708 sa = ifma->ifma_lladdr; 2709 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2710 /* 2711 * Make sure the interface driver is notified 2712 * in the case of a link layer mcast group being left. 2713 */ 2714 if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) 2715 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2716 kfree(ifma->ifma_addr, M_IFMADDR); 2717 kfree(ifma, M_IFMADDR); 2718 if (sa == NULL) 2719 return 0; 2720 2721 /* 2722 * Now look for the link-layer address which corresponds to 2723 * this network address. It had been squirreled away in 2724 * ifma->ifma_lladdr for this purpose (so we don't have 2725 * to call ifp->if_resolvemulti() again), and we saved that 2726 * value in sa above. If some nasty deleted the 2727 * link-layer address out from underneath us, we can deal because 2728 * the address we stored was is not the same as the one which was 2729 * in the record for the link-layer address. (So we don't complain 2730 * in that case.) 2731 */ 2732 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2733 if (sa_equal(sa, ifma->ifma_addr)) 2734 break; 2735 if (ifma == NULL) 2736 return 0; 2737 2738 if (ifma->ifma_refcount > 1) { 2739 ifma->ifma_refcount--; 2740 return 0; 2741 } 2742 2743 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2744 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2745 kfree(ifma->ifma_addr, M_IFMADDR); 2746 kfree(sa, M_IFMADDR); 2747 kfree(ifma, M_IFMADDR); 2748 2749 return 0; 2750 } 2751 2752 int 2753 if_delmulti(struct ifnet *ifp, struct sockaddr *sa) 2754 { 2755 int error; 2756 2757 ifnet_serialize_all(ifp); 2758 error = if_delmulti_serialized(ifp, sa); 2759 ifnet_deserialize_all(ifp); 2760 2761 return error; 2762 } 2763 2764 /* 2765 * Delete all multicast group membership for an interface. 2766 * Should be used to quickly flush all multicast filters. 2767 */ 2768 void 2769 if_delallmulti_serialized(struct ifnet *ifp) 2770 { 2771 struct ifmultiaddr *ifma, mark; 2772 struct sockaddr sa; 2773 2774 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2775 2776 bzero(&sa, sizeof(sa)); 2777 sa.sa_family = AF_UNSPEC; 2778 sa.sa_len = sizeof(sa); 2779 2780 bzero(&mark, sizeof(mark)); 2781 mark.ifma_addr = &sa; 2782 2783 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, &mark, ifma_link); 2784 while ((ifma = TAILQ_NEXT(&mark, ifma_link)) != NULL) { 2785 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); 2786 TAILQ_INSERT_AFTER(&ifp->if_multiaddrs, ifma, &mark, 2787 ifma_link); 2788 2789 if (ifma->ifma_addr->sa_family == AF_UNSPEC) 2790 continue; 2791 2792 if_delmulti_serialized(ifp, ifma->ifma_addr); 2793 } 2794 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); 2795 } 2796 2797 2798 /* 2799 * Set the link layer address on an interface. 2800 * 2801 * At this time we only support certain types of interfaces, 2802 * and we don't allow the length of the address to change. 2803 */ 2804 int 2805 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) 2806 { 2807 struct sockaddr_dl *sdl; 2808 struct ifreq ifr; 2809 2810 sdl = IF_LLSOCKADDR(ifp); 2811 if (sdl == NULL) 2812 return (EINVAL); 2813 if (len != sdl->sdl_alen) /* don't allow length to change */ 2814 return (EINVAL); 2815 switch (ifp->if_type) { 2816 case IFT_ETHER: /* these types use struct arpcom */ 2817 case IFT_XETHER: 2818 case IFT_L2VLAN: 2819 case IFT_IEEE8023ADLAG: 2820 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len); 2821 bcopy(lladdr, LLADDR(sdl), len); 2822 break; 2823 default: 2824 return (ENODEV); 2825 } 2826 /* 2827 * If the interface is already up, we need 2828 * to re-init it in order to reprogram its 2829 * address filter. 2830 */ 2831 ifnet_serialize_all(ifp); 2832 if ((ifp->if_flags & IFF_UP) != 0) { 2833 #ifdef INET 2834 struct ifaddr_container *ifac; 2835 #endif 2836 2837 ifp->if_flags &= ~IFF_UP; 2838 ifr.ifr_flags = ifp->if_flags; 2839 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2840 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2841 NULL); 2842 ifp->if_flags |= IFF_UP; 2843 ifr.ifr_flags = ifp->if_flags; 2844 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2845 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2846 NULL); 2847 #ifdef INET 2848 /* 2849 * Also send gratuitous ARPs to notify other nodes about 2850 * the address change. 2851 */ 2852 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 2853 struct ifaddr *ifa = ifac->ifa; 2854 2855 if (ifa->ifa_addr != NULL && 2856 ifa->ifa_addr->sa_family == AF_INET) 2857 arp_gratuitous(ifp, ifa); 2858 } 2859 #endif 2860 } 2861 ifnet_deserialize_all(ifp); 2862 return (0); 2863 } 2864 2865 2866 /* 2867 * Locate an interface based on a complete address. 2868 */ 2869 struct ifnet * 2870 if_bylla(const void *lla, unsigned char lla_len) 2871 { 2872 const struct ifnet_array *arr; 2873 struct ifnet *ifp; 2874 struct sockaddr_dl *sdl; 2875 int i; 2876 2877 arr = ifnet_array_get(); 2878 for (i = 0; i < arr->ifnet_count; ++i) { 2879 ifp = arr->ifnet_arr[i]; 2880 if (ifp->if_addrlen != lla_len) 2881 continue; 2882 2883 sdl = IF_LLSOCKADDR(ifp); 2884 if (memcmp(lla, LLADDR(sdl), lla_len) == 0) 2885 return (ifp); 2886 } 2887 return (NULL); 2888 } 2889 2890 struct ifmultiaddr * 2891 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp) 2892 { 2893 struct ifmultiaddr *ifma; 2894 2895 /* TODO: need ifnet_serialize_main */ 2896 ifnet_serialize_all(ifp); 2897 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2898 if (sa_equal(ifma->ifma_addr, sa)) 2899 break; 2900 ifnet_deserialize_all(ifp); 2901 2902 return ifma; 2903 } 2904 2905 /* 2906 * This function locates the first real ethernet MAC from a network 2907 * card and loads it into node, returning 0 on success or ENOENT if 2908 * no suitable interfaces were found. It is used by the uuid code to 2909 * generate a unique 6-byte number. 2910 */ 2911 int 2912 if_getanyethermac(uint16_t *node, int minlen) 2913 { 2914 struct ifnet *ifp; 2915 struct sockaddr_dl *sdl; 2916 2917 ifnet_lock(); 2918 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 2919 if (ifp->if_type != IFT_ETHER) 2920 continue; 2921 sdl = IF_LLSOCKADDR(ifp); 2922 if (sdl->sdl_alen < minlen) 2923 continue; 2924 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node, 2925 minlen); 2926 ifnet_unlock(); 2927 return(0); 2928 } 2929 ifnet_unlock(); 2930 return (ENOENT); 2931 } 2932 2933 /* 2934 * The name argument must be a pointer to storage which will last as 2935 * long as the interface does. For physical devices, the result of 2936 * device_get_name(dev) is a good choice and for pseudo-devices a 2937 * static string works well. 2938 */ 2939 void 2940 if_initname(struct ifnet *ifp, const char *name, int unit) 2941 { 2942 ifp->if_dname = name; 2943 ifp->if_dunit = unit; 2944 if (unit != IF_DUNIT_NONE) 2945 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); 2946 else 2947 strlcpy(ifp->if_xname, name, IFNAMSIZ); 2948 } 2949 2950 int 2951 if_printf(struct ifnet *ifp, const char *fmt, ...) 2952 { 2953 __va_list ap; 2954 int retval; 2955 2956 retval = kprintf("%s: ", ifp->if_xname); 2957 __va_start(ap, fmt); 2958 retval += kvprintf(fmt, ap); 2959 __va_end(ap); 2960 return (retval); 2961 } 2962 2963 struct ifnet * 2964 if_alloc(uint8_t type) 2965 { 2966 struct ifnet *ifp; 2967 size_t size; 2968 2969 /* 2970 * XXX temporary hack until arpcom is setup in if_l2com 2971 */ 2972 if (type == IFT_ETHER) 2973 size = sizeof(struct arpcom); 2974 else 2975 size = sizeof(struct ifnet); 2976 2977 ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO); 2978 2979 ifp->if_type = type; 2980 2981 if (if_com_alloc[type] != NULL) { 2982 ifp->if_l2com = if_com_alloc[type](type, ifp); 2983 if (ifp->if_l2com == NULL) { 2984 kfree(ifp, M_IFNET); 2985 return (NULL); 2986 } 2987 } 2988 return (ifp); 2989 } 2990 2991 void 2992 if_free(struct ifnet *ifp) 2993 { 2994 if (ifp->if_description != NULL) 2995 kfree(ifp->if_description, M_IFDESCR); 2996 kfree(ifp, M_IFNET); 2997 } 2998 2999 void 3000 ifq_set_classic(struct ifaltq *ifq) 3001 { 3002 ifq_set_methods(ifq, ifq->altq_ifp->if_mapsubq, 3003 ifsq_classic_enqueue, ifsq_classic_dequeue, ifsq_classic_request); 3004 } 3005 3006 void 3007 ifq_set_methods(struct ifaltq *ifq, altq_mapsubq_t mapsubq, 3008 ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request) 3009 { 3010 int q; 3011 3012 KASSERT(mapsubq != NULL, ("mapsubq is not specified")); 3013 KASSERT(enqueue != NULL, ("enqueue is not specified")); 3014 KASSERT(dequeue != NULL, ("dequeue is not specified")); 3015 KASSERT(request != NULL, ("request is not specified")); 3016 3017 ifq->altq_mapsubq = mapsubq; 3018 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 3019 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 3020 3021 ifsq->ifsq_enqueue = enqueue; 3022 ifsq->ifsq_dequeue = dequeue; 3023 ifsq->ifsq_request = request; 3024 } 3025 } 3026 3027 static void 3028 ifsq_norm_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) 3029 { 3030 3031 classq_add(&ifsq->ifsq_norm, m); 3032 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); 3033 } 3034 3035 static void 3036 ifsq_prio_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) 3037 { 3038 3039 classq_add(&ifsq->ifsq_prio, m); 3040 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); 3041 ALTQ_SQ_PRIO_CNTR_INC(ifsq, m->m_pkthdr.len); 3042 } 3043 3044 static struct mbuf * 3045 ifsq_norm_dequeue(struct ifaltq_subque *ifsq) 3046 { 3047 struct mbuf *m; 3048 3049 m = classq_get(&ifsq->ifsq_norm); 3050 if (m != NULL) 3051 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); 3052 return (m); 3053 } 3054 3055 static struct mbuf * 3056 ifsq_prio_dequeue(struct ifaltq_subque *ifsq) 3057 { 3058 struct mbuf *m; 3059 3060 m = classq_get(&ifsq->ifsq_prio); 3061 if (m != NULL) { 3062 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); 3063 ALTQ_SQ_PRIO_CNTR_DEC(ifsq, m->m_pkthdr.len); 3064 } 3065 return (m); 3066 } 3067 3068 int 3069 ifsq_classic_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m, 3070 struct altq_pktattr *pa __unused) 3071 { 3072 3073 M_ASSERTPKTHDR(m); 3074 again: 3075 if (ifsq->ifsq_len >= ifsq->ifsq_maxlen || 3076 ifsq->ifsq_bcnt >= ifsq->ifsq_maxbcnt) { 3077 struct mbuf *m_drop; 3078 3079 if (m->m_flags & M_PRIO) { 3080 m_drop = NULL; 3081 if (ifsq->ifsq_prio_len < (ifsq->ifsq_maxlen >> 1) && 3082 ifsq->ifsq_prio_bcnt < (ifsq->ifsq_maxbcnt >> 1)) { 3083 /* Try dropping some from normal queue. */ 3084 m_drop = ifsq_norm_dequeue(ifsq); 3085 } 3086 if (m_drop == NULL) 3087 m_drop = ifsq_prio_dequeue(ifsq); 3088 } else { 3089 m_drop = ifsq_norm_dequeue(ifsq); 3090 } 3091 if (m_drop != NULL) { 3092 IFNET_STAT_INC(ifsq->ifsq_ifp, oqdrops, 1); 3093 m_freem(m_drop); 3094 goto again; 3095 } 3096 /* 3097 * No old packets could be dropped! 3098 * NOTE: Caller increases oqdrops. 3099 */ 3100 m_freem(m); 3101 return (ENOBUFS); 3102 } else { 3103 if (m->m_flags & M_PRIO) 3104 ifsq_prio_enqueue(ifsq, m); 3105 else 3106 ifsq_norm_enqueue(ifsq, m); 3107 return (0); 3108 } 3109 } 3110 3111 struct mbuf * 3112 ifsq_classic_dequeue(struct ifaltq_subque *ifsq, int op) 3113 { 3114 struct mbuf *m; 3115 3116 switch (op) { 3117 case ALTDQ_POLL: 3118 m = classq_head(&ifsq->ifsq_prio); 3119 if (m == NULL) 3120 m = classq_head(&ifsq->ifsq_norm); 3121 break; 3122 3123 case ALTDQ_REMOVE: 3124 m = ifsq_prio_dequeue(ifsq); 3125 if (m == NULL) 3126 m = ifsq_norm_dequeue(ifsq); 3127 break; 3128 3129 default: 3130 panic("unsupported ALTQ dequeue op: %d", op); 3131 } 3132 return m; 3133 } 3134 3135 int 3136 ifsq_classic_request(struct ifaltq_subque *ifsq, int req, void *arg) 3137 { 3138 switch (req) { 3139 case ALTRQ_PURGE: 3140 for (;;) { 3141 struct mbuf *m; 3142 3143 m = ifsq_classic_dequeue(ifsq, ALTDQ_REMOVE); 3144 if (m == NULL) 3145 break; 3146 m_freem(m); 3147 } 3148 break; 3149 3150 default: 3151 panic("unsupported ALTQ request: %d", req); 3152 } 3153 return 0; 3154 } 3155 3156 static void 3157 ifsq_ifstart_try(struct ifaltq_subque *ifsq, int force_sched) 3158 { 3159 struct ifnet *ifp = ifsq_get_ifp(ifsq); 3160 int running = 0, need_sched; 3161 3162 /* 3163 * Try to do direct ifnet.if_start on the subqueue first, if there is 3164 * contention on the subqueue hardware serializer, ifnet.if_start on 3165 * the subqueue will be scheduled on the subqueue owner CPU. 3166 */ 3167 if (!ifsq_tryserialize_hw(ifsq)) { 3168 /* 3169 * Subqueue hardware serializer contention happened, 3170 * ifnet.if_start on the subqueue is scheduled on 3171 * the subqueue owner CPU, and we keep going. 3172 */ 3173 ifsq_ifstart_schedule(ifsq, 1); 3174 return; 3175 } 3176 3177 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 3178 ifp->if_start(ifp, ifsq); 3179 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 3180 running = 1; 3181 } 3182 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 3183 3184 ifsq_deserialize_hw(ifsq); 3185 3186 if (need_sched) { 3187 /* 3188 * More data need to be transmitted, ifnet.if_start on the 3189 * subqueue is scheduled on the subqueue owner CPU, and we 3190 * keep going. 3191 * NOTE: ifnet.if_start subqueue interlock is not released. 3192 */ 3193 ifsq_ifstart_schedule(ifsq, force_sched); 3194 } 3195 } 3196 3197 /* 3198 * Subqeue packets staging mechanism: 3199 * 3200 * The packets enqueued into the subqueue are staged to a certain amount 3201 * before the ifnet.if_start on the subqueue is called. In this way, the 3202 * driver could avoid writing to hardware registers upon every packet, 3203 * instead, hardware registers could be written when certain amount of 3204 * packets are put onto hardware TX ring. The measurement on several modern 3205 * NICs (emx(4), igb(4), bnx(4), bge(4), jme(4)) shows that the hardware 3206 * registers writing aggregation could save ~20% CPU time when 18bytes UDP 3207 * datagrams are transmitted at 1.48Mpps. The performance improvement by 3208 * hardware registers writing aggeregation is also mentioned by Luigi Rizzo's 3209 * netmap paper (http://info.iet.unipi.it/~luigi/netmap/). 3210 * 3211 * Subqueue packets staging is performed for two entry points into drivers' 3212 * transmission function: 3213 * - Direct ifnet.if_start calling on the subqueue, i.e. ifsq_ifstart_try() 3214 * - ifnet.if_start scheduling on the subqueue, i.e. ifsq_ifstart_schedule() 3215 * 3216 * Subqueue packets staging will be stopped upon any of the following 3217 * conditions: 3218 * - If the count of packets enqueued on the current CPU is great than or 3219 * equal to ifsq_stage_cntmax. (XXX this should be per-interface) 3220 * - If the total length of packets enqueued on the current CPU is great 3221 * than or equal to the hardware's MTU - max_protohdr. max_protohdr is 3222 * cut from the hardware's MTU mainly bacause a full TCP segment's size 3223 * is usually less than hardware's MTU. 3224 * - ifsq_ifstart_schedule() is not pending on the current CPU and 3225 * ifnet.if_start subqueue interlock (ifaltq_subq.ifsq_started) is not 3226 * released. 3227 * - The if_start_rollup(), which is registered as low priority netisr 3228 * rollup function, is called; probably because no more work is pending 3229 * for netisr. 3230 * 3231 * NOTE: 3232 * Currently subqueue packet staging is only performed in netisr threads. 3233 */ 3234 int 3235 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa) 3236 { 3237 struct ifaltq *ifq = &ifp->if_snd; 3238 struct ifaltq_subque *ifsq; 3239 int error, start = 0, len, mcast = 0, avoid_start = 0; 3240 struct ifsubq_stage_head *head = NULL; 3241 struct ifsubq_stage *stage = NULL; 3242 struct globaldata *gd = mycpu; 3243 struct thread *td = gd->gd_curthread; 3244 3245 crit_enter_quick(td); 3246 3247 ifsq = ifq_map_subq(ifq, gd->gd_cpuid); 3248 ASSERT_ALTQ_SQ_NOT_SERIALIZED_HW(ifsq); 3249 3250 len = m->m_pkthdr.len; 3251 if (m->m_flags & M_MCAST) 3252 mcast = 1; 3253 3254 if (td->td_type == TD_TYPE_NETISR) { 3255 head = &ifsubq_stage_heads[mycpuid]; 3256 stage = ifsq_get_stage(ifsq, mycpuid); 3257 3258 stage->stg_cnt++; 3259 stage->stg_len += len; 3260 if (stage->stg_cnt < ifsq_stage_cntmax && 3261 stage->stg_len < (ifp->if_mtu - max_protohdr)) 3262 avoid_start = 1; 3263 } 3264 3265 ALTQ_SQ_LOCK(ifsq); 3266 error = ifsq_enqueue_locked(ifsq, m, pa); 3267 if (error) { 3268 IFNET_STAT_INC(ifp, oqdrops, 1); 3269 if (!ifsq_data_ready(ifsq)) { 3270 ALTQ_SQ_UNLOCK(ifsq); 3271 crit_exit_quick(td); 3272 return error; 3273 } 3274 avoid_start = 0; 3275 } 3276 if (!ifsq_is_started(ifsq)) { 3277 if (avoid_start) { 3278 ALTQ_SQ_UNLOCK(ifsq); 3279 3280 KKASSERT(!error); 3281 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 3282 ifsq_stage_insert(head, stage); 3283 3284 IFNET_STAT_INC(ifp, obytes, len); 3285 if (mcast) 3286 IFNET_STAT_INC(ifp, omcasts, 1); 3287 crit_exit_quick(td); 3288 return error; 3289 } 3290 3291 /* 3292 * Hold the subqueue interlock of ifnet.if_start 3293 */ 3294 ifsq_set_started(ifsq); 3295 start = 1; 3296 } 3297 ALTQ_SQ_UNLOCK(ifsq); 3298 3299 if (!error) { 3300 IFNET_STAT_INC(ifp, obytes, len); 3301 if (mcast) 3302 IFNET_STAT_INC(ifp, omcasts, 1); 3303 } 3304 3305 if (stage != NULL) { 3306 if (!start && (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)) { 3307 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 3308 if (!avoid_start) { 3309 ifsq_stage_remove(head, stage); 3310 ifsq_ifstart_schedule(ifsq, 1); 3311 } 3312 crit_exit_quick(td); 3313 return error; 3314 } 3315 3316 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) { 3317 ifsq_stage_remove(head, stage); 3318 } else { 3319 stage->stg_cnt = 0; 3320 stage->stg_len = 0; 3321 } 3322 } 3323 3324 if (!start) { 3325 crit_exit_quick(td); 3326 return error; 3327 } 3328 3329 ifsq_ifstart_try(ifsq, 0); 3330 3331 crit_exit_quick(td); 3332 return error; 3333 } 3334 3335 void * 3336 ifa_create(int size) 3337 { 3338 struct ifaddr *ifa; 3339 int i; 3340 3341 KASSERT(size >= sizeof(*ifa), ("ifaddr size too small")); 3342 3343 ifa = kmalloc(size, M_IFADDR, M_INTWAIT | M_ZERO); 3344 3345 /* 3346 * Make ifa_container availabel on all CPUs, since they 3347 * could be accessed by any threads. 3348 */ 3349 ifa->ifa_containers = 3350 kmalloc(ncpus * sizeof(struct ifaddr_container), 3351 M_IFADDR, 3352 M_INTWAIT | M_ZERO | M_CACHEALIGN); 3353 3354 ifa->ifa_ncnt = ncpus; 3355 for (i = 0; i < ncpus; ++i) { 3356 struct ifaddr_container *ifac = &ifa->ifa_containers[i]; 3357 3358 ifac->ifa_magic = IFA_CONTAINER_MAGIC; 3359 ifac->ifa = ifa; 3360 ifac->ifa_refcnt = 1; 3361 } 3362 #ifdef IFADDR_DEBUG 3363 kprintf("alloc ifa %p %d\n", ifa, size); 3364 #endif 3365 return ifa; 3366 } 3367 3368 void 3369 ifac_free(struct ifaddr_container *ifac, int cpu_id) 3370 { 3371 struct ifaddr *ifa = ifac->ifa; 3372 3373 KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC); 3374 KKASSERT(ifac->ifa_refcnt == 0); 3375 KASSERT(ifac->ifa_listmask == 0, 3376 ("ifa is still on %#x lists", ifac->ifa_listmask)); 3377 3378 ifac->ifa_magic = IFA_CONTAINER_DEAD; 3379 3380 #ifdef IFADDR_DEBUG_VERBOSE 3381 kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id); 3382 #endif 3383 3384 KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus, 3385 ("invalid # of ifac, %d", ifa->ifa_ncnt)); 3386 if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) { 3387 #ifdef IFADDR_DEBUG 3388 kprintf("free ifa %p\n", ifa); 3389 #endif 3390 kfree(ifa->ifa_containers, M_IFADDR); 3391 kfree(ifa, M_IFADDR); 3392 } 3393 } 3394 3395 static void 3396 ifa_iflink_dispatch(netmsg_t nmsg) 3397 { 3398 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3399 struct ifaddr *ifa = msg->ifa; 3400 struct ifnet *ifp = msg->ifp; 3401 int cpu = mycpuid; 3402 struct ifaddr_container *ifac; 3403 3404 crit_enter(); 3405 3406 ifac = &ifa->ifa_containers[cpu]; 3407 ASSERT_IFAC_VALID(ifac); 3408 KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0, 3409 ("ifaddr is on if_addrheads")); 3410 3411 ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD; 3412 if (msg->tail) 3413 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link); 3414 else 3415 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link); 3416 3417 crit_exit(); 3418 3419 netisr_forwardmsg_all(&nmsg->base, cpu + 1); 3420 } 3421 3422 void 3423 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail) 3424 { 3425 struct netmsg_ifaddr msg; 3426 3427 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3428 0, ifa_iflink_dispatch); 3429 msg.ifa = ifa; 3430 msg.ifp = ifp; 3431 msg.tail = tail; 3432 3433 netisr_domsg(&msg.base, 0); 3434 } 3435 3436 static void 3437 ifa_ifunlink_dispatch(netmsg_t nmsg) 3438 { 3439 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3440 struct ifaddr *ifa = msg->ifa; 3441 struct ifnet *ifp = msg->ifp; 3442 int cpu = mycpuid; 3443 struct ifaddr_container *ifac; 3444 3445 crit_enter(); 3446 3447 ifac = &ifa->ifa_containers[cpu]; 3448 ASSERT_IFAC_VALID(ifac); 3449 KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD, 3450 ("ifaddr is not on if_addrhead")); 3451 3452 TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link); 3453 ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD; 3454 3455 crit_exit(); 3456 3457 netisr_forwardmsg_all(&nmsg->base, cpu + 1); 3458 } 3459 3460 void 3461 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp) 3462 { 3463 struct netmsg_ifaddr msg; 3464 3465 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3466 0, ifa_ifunlink_dispatch); 3467 msg.ifa = ifa; 3468 msg.ifp = ifp; 3469 3470 netisr_domsg(&msg.base, 0); 3471 } 3472 3473 static void 3474 ifa_destroy_dispatch(netmsg_t nmsg) 3475 { 3476 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3477 3478 IFAFREE(msg->ifa); 3479 netisr_forwardmsg_all(&nmsg->base, mycpuid + 1); 3480 } 3481 3482 void 3483 ifa_destroy(struct ifaddr *ifa) 3484 { 3485 struct netmsg_ifaddr msg; 3486 3487 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3488 0, ifa_destroy_dispatch); 3489 msg.ifa = ifa; 3490 3491 netisr_domsg(&msg.base, 0); 3492 } 3493 3494 static void 3495 if_start_rollup(void) 3496 { 3497 struct ifsubq_stage_head *head = &ifsubq_stage_heads[mycpuid]; 3498 struct ifsubq_stage *stage; 3499 3500 crit_enter(); 3501 3502 while ((stage = TAILQ_FIRST(&head->stg_head)) != NULL) { 3503 struct ifaltq_subque *ifsq = stage->stg_subq; 3504 int is_sched = 0; 3505 3506 if (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED) 3507 is_sched = 1; 3508 ifsq_stage_remove(head, stage); 3509 3510 if (is_sched) { 3511 ifsq_ifstart_schedule(ifsq, 1); 3512 } else { 3513 int start = 0; 3514 3515 ALTQ_SQ_LOCK(ifsq); 3516 if (!ifsq_is_started(ifsq)) { 3517 /* 3518 * Hold the subqueue interlock of 3519 * ifnet.if_start 3520 */ 3521 ifsq_set_started(ifsq); 3522 start = 1; 3523 } 3524 ALTQ_SQ_UNLOCK(ifsq); 3525 3526 if (start) 3527 ifsq_ifstart_try(ifsq, 1); 3528 } 3529 KKASSERT((stage->stg_flags & 3530 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 3531 } 3532 3533 crit_exit(); 3534 } 3535 3536 static void 3537 ifnetinit(void *dummy __unused) 3538 { 3539 int i; 3540 3541 /* XXX netisr_ncpus */ 3542 for (i = 0; i < ncpus; ++i) 3543 TAILQ_INIT(&ifsubq_stage_heads[i].stg_head); 3544 netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART); 3545 } 3546 3547 void 3548 if_register_com_alloc(u_char type, 3549 if_com_alloc_t *a, if_com_free_t *f) 3550 { 3551 3552 KASSERT(if_com_alloc[type] == NULL, 3553 ("if_register_com_alloc: %d already registered", type)); 3554 KASSERT(if_com_free[type] == NULL, 3555 ("if_register_com_alloc: %d free already registered", type)); 3556 3557 if_com_alloc[type] = a; 3558 if_com_free[type] = f; 3559 } 3560 3561 void 3562 if_deregister_com_alloc(u_char type) 3563 { 3564 3565 KASSERT(if_com_alloc[type] != NULL, 3566 ("if_deregister_com_alloc: %d not registered", type)); 3567 KASSERT(if_com_free[type] != NULL, 3568 ("if_deregister_com_alloc: %d free not registered", type)); 3569 if_com_alloc[type] = NULL; 3570 if_com_free[type] = NULL; 3571 } 3572 3573 void 3574 ifq_set_maxlen(struct ifaltq *ifq, int len) 3575 { 3576 ifq->altq_maxlen = len + (ncpus * ifsq_stage_cntmax); 3577 } 3578 3579 int 3580 ifq_mapsubq_default(struct ifaltq *ifq __unused, int cpuid __unused) 3581 { 3582 return ALTQ_SUBQ_INDEX_DEFAULT; 3583 } 3584 3585 int 3586 ifq_mapsubq_modulo(struct ifaltq *ifq, int cpuid) 3587 { 3588 3589 return (cpuid % ifq->altq_subq_mappriv); 3590 } 3591 3592 /* 3593 * Watchdog timeout. Process callback as appropriate. If we cannot 3594 * serialize the ifnet just try again on the next timeout. 3595 * 3596 * NOTE: The ifnet can adjust wd_timer while holding the serializer. We 3597 * can only safely adjust it under the same circumstances. 3598 */ 3599 static void 3600 ifsq_watchdog(void *arg) 3601 { 3602 struct ifsubq_watchdog *wd = arg; 3603 struct ifnet *ifp; 3604 int count; 3605 3606 /* 3607 * Fast track. Try to avoid acquiring the serializer when not 3608 * near the terminal count, unless asked to. If the atomic op 3609 * to decrement the count fails just retry on the next callout. 3610 */ 3611 count = wd->wd_timer; 3612 cpu_ccfence(); 3613 if (count == 0) 3614 goto done; 3615 if (count > 2 && (wd->wd_flags & IF_WDOG_ALLTICKS) == 0) { 3616 (void)atomic_cmpset_int(&wd->wd_timer, count, count - 1); 3617 goto done; 3618 } 3619 3620 /* 3621 * Obtain the serializer and then re-test all wd_timer conditions 3622 * as it may have changed. NICs do not mess with wd_timer without 3623 * holding the serializer. 3624 * 3625 * If we are unable to obtain the serializer just retry the same 3626 * count on the next callout. 3627 * 3628 * - call watchdog in terminal count (0) 3629 * - call watchdog on last tick (1) if requested 3630 * - call watchdog on all ticks if requested 3631 */ 3632 ifp = ifsq_get_ifp(wd->wd_subq); 3633 if (ifnet_tryserialize_all(ifp) == 0) 3634 goto done; 3635 if (atomic_cmpset_int(&wd->wd_timer, count, count - 1)) { 3636 --count; 3637 if (count == 0 || 3638 (wd->wd_flags & IF_WDOG_ALLTICKS) || 3639 ((wd->wd_flags & IF_WDOG_LASTTICK) && count == 1)) { 3640 wd->wd_watchdog(wd->wd_subq); 3641 } 3642 } 3643 ifnet_deserialize_all(ifp); 3644 done: 3645 ifsq_watchdog_reset(wd); 3646 } 3647 3648 static void 3649 ifsq_watchdog_reset(struct ifsubq_watchdog *wd) 3650 { 3651 callout_reset_bycpu(&wd->wd_callout, hz, ifsq_watchdog, wd, 3652 ifsq_get_cpuid(wd->wd_subq)); 3653 } 3654 3655 void 3656 ifsq_watchdog_init(struct ifsubq_watchdog *wd, struct ifaltq_subque *ifsq, 3657 ifsq_watchdog_t watchdog, int flags) 3658 { 3659 callout_init_mp(&wd->wd_callout); 3660 wd->wd_timer = 0; 3661 wd->wd_flags = flags; 3662 wd->wd_subq = ifsq; 3663 wd->wd_watchdog = watchdog; 3664 } 3665 3666 void 3667 ifsq_watchdog_start(struct ifsubq_watchdog *wd) 3668 { 3669 atomic_swap_int(&wd->wd_timer, 0); 3670 ifsq_watchdog_reset(wd); 3671 } 3672 3673 void 3674 ifsq_watchdog_stop(struct ifsubq_watchdog *wd) 3675 { 3676 atomic_swap_int(&wd->wd_timer, 0); 3677 callout_stop(&wd->wd_callout); 3678 } 3679 3680 void 3681 ifsq_watchdog_set_count(struct ifsubq_watchdog *wd, int count) 3682 { 3683 atomic_swap_int(&wd->wd_timer, count); 3684 } 3685 3686 void 3687 ifnet_lock(void) 3688 { 3689 KASSERT(curthread->td_type != TD_TYPE_NETISR, 3690 ("try holding ifnet lock in netisr")); 3691 mtx_lock(&ifnet_mtx); 3692 } 3693 3694 void 3695 ifnet_unlock(void) 3696 { 3697 KASSERT(curthread->td_type != TD_TYPE_NETISR, 3698 ("try holding ifnet lock in netisr")); 3699 mtx_unlock(&ifnet_mtx); 3700 } 3701 3702 static struct ifnet_array * 3703 ifnet_array_alloc(int count) 3704 { 3705 struct ifnet_array *arr; 3706 3707 arr = kmalloc(__offsetof(struct ifnet_array, ifnet_arr[count]), 3708 M_IFNET, M_WAITOK); 3709 arr->ifnet_count = count; 3710 3711 return arr; 3712 } 3713 3714 static void 3715 ifnet_array_free(struct ifnet_array *arr) 3716 { 3717 if (arr == &ifnet_array0) 3718 return; 3719 kfree(arr, M_IFNET); 3720 } 3721 3722 static struct ifnet_array * 3723 ifnet_array_add(struct ifnet *ifp, const struct ifnet_array *old_arr) 3724 { 3725 struct ifnet_array *arr; 3726 int count, i; 3727 3728 KASSERT(old_arr->ifnet_count >= 0, 3729 ("invalid ifnet array count %d", old_arr->ifnet_count)); 3730 count = old_arr->ifnet_count + 1; 3731 arr = ifnet_array_alloc(count); 3732 3733 /* 3734 * Save the old ifnet array and append this ifp to the end of 3735 * the new ifnet array. 3736 */ 3737 for (i = 0; i < old_arr->ifnet_count; ++i) { 3738 KASSERT(old_arr->ifnet_arr[i] != ifp, 3739 ("%s is already in ifnet array", ifp->if_xname)); 3740 arr->ifnet_arr[i] = old_arr->ifnet_arr[i]; 3741 } 3742 KASSERT(i == count - 1, 3743 ("add %s, ifnet array index mismatch, should be %d, but got %d", 3744 ifp->if_xname, count - 1, i)); 3745 arr->ifnet_arr[i] = ifp; 3746 3747 return arr; 3748 } 3749 3750 static struct ifnet_array * 3751 ifnet_array_del(struct ifnet *ifp, const struct ifnet_array *old_arr) 3752 { 3753 struct ifnet_array *arr; 3754 int count, i, idx, found = 0; 3755 3756 KASSERT(old_arr->ifnet_count > 0, 3757 ("invalid ifnet array count %d", old_arr->ifnet_count)); 3758 count = old_arr->ifnet_count - 1; 3759 arr = ifnet_array_alloc(count); 3760 3761 /* 3762 * Save the old ifnet array, but skip this ifp. 3763 */ 3764 idx = 0; 3765 for (i = 0; i < old_arr->ifnet_count; ++i) { 3766 if (old_arr->ifnet_arr[i] == ifp) { 3767 KASSERT(!found, 3768 ("dup %s is in ifnet array", ifp->if_xname)); 3769 found = 1; 3770 continue; 3771 } 3772 KASSERT(idx < count, 3773 ("invalid ifnet array index %d, count %d", idx, count)); 3774 arr->ifnet_arr[idx] = old_arr->ifnet_arr[i]; 3775 ++idx; 3776 } 3777 KASSERT(found, ("%s is not in ifnet array", ifp->if_xname)); 3778 KASSERT(idx == count, 3779 ("del %s, ifnet array count mismatch, should be %d, but got %d ", 3780 ifp->if_xname, count, idx)); 3781 3782 return arr; 3783 } 3784 3785 const struct ifnet_array * 3786 ifnet_array_get(void) 3787 { 3788 const struct ifnet_array *ret; 3789 3790 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 3791 ret = ifnet_array; 3792 /* Make sure 'ret' is really used. */ 3793 cpu_ccfence(); 3794 return (ret); 3795 } 3796 3797 int 3798 ifnet_array_isempty(void) 3799 { 3800 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 3801 if (ifnet_array->ifnet_count == 0) 3802 return 1; 3803 else 3804 return 0; 3805 } 3806 3807 void 3808 ifa_marker_init(struct ifaddr_marker *mark, struct ifnet *ifp) 3809 { 3810 struct ifaddr *ifa; 3811 3812 memset(mark, 0, sizeof(*mark)); 3813 ifa = &mark->ifa; 3814 3815 mark->ifac.ifa = ifa; 3816 3817 ifa->ifa_addr = &mark->addr; 3818 ifa->ifa_dstaddr = &mark->dstaddr; 3819 ifa->ifa_netmask = &mark->netmask; 3820 ifa->ifa_ifp = ifp; 3821 } 3822 3823 static int 3824 if_ringcnt_fixup(int ring_cnt, int ring_cntmax) 3825 { 3826 3827 KASSERT(ring_cntmax > 0, ("invalid ring count max %d", ring_cntmax)); 3828 3829 if (ring_cnt <= 0 || ring_cnt > ring_cntmax) 3830 ring_cnt = ring_cntmax; 3831 if (ring_cnt > netisr_ncpus) 3832 ring_cnt = netisr_ncpus; 3833 return (ring_cnt); 3834 } 3835 3836 static void 3837 if_ringmap_set_grid(device_t dev, struct if_ringmap *rm, int grid) 3838 { 3839 int i, offset; 3840 3841 KASSERT(grid > 0, ("invalid if_ringmap grid %d", grid)); 3842 KASSERT(grid >= rm->rm_cnt, ("invalid if_ringmap grid %d, count %d", 3843 grid, rm->rm_cnt)); 3844 rm->rm_grid = grid; 3845 3846 offset = (rm->rm_grid * device_get_unit(dev)) % netisr_ncpus; 3847 for (i = 0; i < rm->rm_cnt; ++i) { 3848 rm->rm_cpumap[i] = offset + i; 3849 KASSERT(rm->rm_cpumap[i] < netisr_ncpus, 3850 ("invalid cpumap[%d] = %d, offset %d", i, 3851 rm->rm_cpumap[i], offset)); 3852 } 3853 } 3854 3855 static struct if_ringmap * 3856 if_ringmap_alloc_flags(device_t dev, int ring_cnt, int ring_cntmax, 3857 uint32_t flags) 3858 { 3859 struct if_ringmap *rm; 3860 int i, grid = 0, prev_grid; 3861 3862 ring_cnt = if_ringcnt_fixup(ring_cnt, ring_cntmax); 3863 rm = kmalloc(__offsetof(struct if_ringmap, rm_cpumap[ring_cnt]), 3864 M_DEVBUF, M_WAITOK | M_ZERO); 3865 3866 rm->rm_cnt = ring_cnt; 3867 if (flags & RINGMAP_FLAG_POWEROF2) 3868 rm->rm_cnt = 1 << (fls(rm->rm_cnt) - 1); 3869 3870 prev_grid = netisr_ncpus; 3871 for (i = 0; i < netisr_ncpus; ++i) { 3872 if (netisr_ncpus % (i + 1) != 0) 3873 continue; 3874 3875 grid = netisr_ncpus / (i + 1); 3876 if (rm->rm_cnt > grid) { 3877 grid = prev_grid; 3878 break; 3879 } 3880 3881 if (rm->rm_cnt > netisr_ncpus / (i + 2)) 3882 break; 3883 prev_grid = grid; 3884 } 3885 if_ringmap_set_grid(dev, rm, grid); 3886 3887 return (rm); 3888 } 3889 3890 struct if_ringmap * 3891 if_ringmap_alloc(device_t dev, int ring_cnt, int ring_cntmax) 3892 { 3893 3894 return (if_ringmap_alloc_flags(dev, ring_cnt, ring_cntmax, 3895 RINGMAP_FLAG_NONE)); 3896 } 3897 3898 struct if_ringmap * 3899 if_ringmap_alloc2(device_t dev, int ring_cnt, int ring_cntmax) 3900 { 3901 3902 return (if_ringmap_alloc_flags(dev, ring_cnt, ring_cntmax, 3903 RINGMAP_FLAG_POWEROF2)); 3904 } 3905 3906 void 3907 if_ringmap_free(struct if_ringmap *rm) 3908 { 3909 3910 kfree(rm, M_DEVBUF); 3911 } 3912 3913 /* 3914 * Align the two ringmaps. 3915 * 3916 * e.g. 8 netisrs, rm0 contains 4 rings, rm1 contains 2 rings. 3917 * 3918 * Before: 3919 * 3920 * CPU 0 1 2 3 4 5 6 7 3921 * NIC_RX n0 n1 n2 n3 3922 * NIC_TX N0 N1 3923 * 3924 * After: 3925 * 3926 * CPU 0 1 2 3 4 5 6 7 3927 * NIC_RX n0 n1 n2 n3 3928 * NIC_TX N0 N1 3929 */ 3930 void 3931 if_ringmap_align(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1) 3932 { 3933 3934 if (rm0->rm_grid > rm1->rm_grid) 3935 if_ringmap_set_grid(dev, rm1, rm0->rm_grid); 3936 else if (rm0->rm_grid < rm1->rm_grid) 3937 if_ringmap_set_grid(dev, rm0, rm1->rm_grid); 3938 } 3939 3940 void 3941 if_ringmap_match(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1) 3942 { 3943 int subset_grid, cnt, divisor, mod, offset, i; 3944 struct if_ringmap *subset_rm, *rm; 3945 int old_rm0_grid, old_rm1_grid; 3946 3947 if (rm0->rm_grid == rm1->rm_grid) 3948 return; 3949 3950 /* Save grid for later use */ 3951 old_rm0_grid = rm0->rm_grid; 3952 old_rm1_grid = rm1->rm_grid; 3953 3954 if_ringmap_align(dev, rm0, rm1); 3955 3956 /* 3957 * Re-shuffle rings to get more even distribution. 3958 * 3959 * e.g. 12 netisrs, rm0 contains 4 rings, rm1 contains 2 rings. 3960 * 3961 * CPU 0 1 2 3 4 5 6 7 8 9 10 11 3962 * 3963 * NIC_RX a0 a1 a2 a3 b0 b1 b2 b3 c0 c1 c2 c3 3964 * NIC_TX A0 A1 B0 B1 C0 C1 3965 * 3966 * NIC_RX d0 d1 d2 d3 e0 e1 e2 e3 f0 f1 f2 f3 3967 * NIC_TX D0 D1 E0 E1 F0 F1 3968 */ 3969 3970 if (rm0->rm_cnt >= (2 * old_rm1_grid)) { 3971 cnt = rm0->rm_cnt; 3972 subset_grid = old_rm1_grid; 3973 subset_rm = rm1; 3974 rm = rm0; 3975 } else if (rm1->rm_cnt > (2 * old_rm0_grid)) { 3976 cnt = rm1->rm_cnt; 3977 subset_grid = old_rm0_grid; 3978 subset_rm = rm0; 3979 rm = rm1; 3980 } else { 3981 /* No space to shuffle. */ 3982 return; 3983 } 3984 3985 mod = cnt / subset_grid; 3986 KKASSERT(mod >= 2); 3987 divisor = netisr_ncpus / rm->rm_grid; 3988 offset = ((device_get_unit(dev) / divisor) % mod) * subset_grid; 3989 3990 for (i = 0; i < subset_rm->rm_cnt; ++i) { 3991 subset_rm->rm_cpumap[i] += offset; 3992 KASSERT(subset_rm->rm_cpumap[i] < netisr_ncpus, 3993 ("match: invalid cpumap[%d] = %d, offset %d", 3994 i, subset_rm->rm_cpumap[i], offset)); 3995 } 3996 #ifdef INVARIANTS 3997 for (i = 0; i < subset_rm->rm_cnt; ++i) { 3998 int j; 3999 4000 for (j = 0; j < rm->rm_cnt; ++j) { 4001 if (rm->rm_cpumap[j] == subset_rm->rm_cpumap[i]) 4002 break; 4003 } 4004 KASSERT(j < rm->rm_cnt, 4005 ("subset cpumap[%d] = %d not found in superset", 4006 i, subset_rm->rm_cpumap[i])); 4007 } 4008 #endif 4009 } 4010 4011 int 4012 if_ringmap_count(const struct if_ringmap *rm) 4013 { 4014 4015 return (rm->rm_cnt); 4016 } 4017 4018 int 4019 if_ringmap_cpumap(const struct if_ringmap *rm, int ring) 4020 { 4021 4022 KASSERT(ring >= 0 && ring < rm->rm_cnt, ("invalid ring %d", ring)); 4023 return (rm->rm_cpumap[ring]); 4024 } 4025 4026 void 4027 if_ringmap_rdrtable(const struct if_ringmap *rm, int table[], int table_nent) 4028 { 4029 int i, grid_idx, grid_cnt, patch_off, patch_cnt, ncopy; 4030 4031 KASSERT(table_nent > 0 && (table_nent & NETISR_CPUMASK) == 0, 4032 ("invalid redirect table entries %d", table_nent)); 4033 4034 grid_idx = 0; 4035 for (i = 0; i < NETISR_CPUMAX; ++i) { 4036 table[i] = grid_idx++ % rm->rm_cnt; 4037 4038 if (grid_idx == rm->rm_grid) 4039 grid_idx = 0; 4040 } 4041 4042 /* 4043 * Make the ring distributed more evenly for the remainder 4044 * of each grid. 4045 * 4046 * e.g. 12 netisrs, rm contains 8 rings. 4047 * 4048 * Redirect table before: 4049 * 4050 * 0 1 2 3 4 5 6 7 0 1 2 3 0 1 2 3 4051 * 4 5 6 7 0 1 2 3 0 1 2 3 4 5 6 7 4052 * 0 1 2 3 0 1 2 3 4 5 6 7 0 1 2 3 4053 * .... 4054 * 4055 * Redirect table after being patched (pX, patched entries): 4056 * 4057 * 0 1 2 3 4 5 6 7 p0 p1 p2 p3 0 1 2 3 4058 * 4 5 6 7 p4 p5 p6 p7 0 1 2 3 4 5 6 7 4059 * p0 p1 p2 p3 0 1 2 3 4 5 6 7 p4 p5 p6 p7 4060 * .... 4061 */ 4062 patch_cnt = rm->rm_grid % rm->rm_cnt; 4063 if (patch_cnt == 0) 4064 goto done; 4065 patch_off = rm->rm_grid - (rm->rm_grid % rm->rm_cnt); 4066 4067 grid_cnt = roundup(NETISR_CPUMAX, rm->rm_grid) / rm->rm_grid; 4068 grid_idx = 0; 4069 for (i = 0; i < grid_cnt; ++i) { 4070 int j; 4071 4072 for (j = 0; j < patch_cnt; ++j) { 4073 int fix_idx; 4074 4075 fix_idx = (i * rm->rm_grid) + patch_off + j; 4076 if (fix_idx >= NETISR_CPUMAX) 4077 goto done; 4078 table[fix_idx] = grid_idx++ % rm->rm_cnt; 4079 } 4080 } 4081 done: 4082 /* 4083 * If the device supports larger redirect table, duplicate 4084 * the first NETISR_CPUMAX entries to the rest of the table, 4085 * so that it matches upper layer's expectation: 4086 * (hash & NETISR_CPUMASK) % netisr_ncpus 4087 */ 4088 ncopy = table_nent / NETISR_CPUMAX; 4089 for (i = 1; i < ncopy; ++i) { 4090 memcpy(&table[i * NETISR_CPUMAX], table, 4091 NETISR_CPUMAX * sizeof(table[0])); 4092 } 4093 if (if_ringmap_dumprdr) { 4094 for (i = 0; i < table_nent; ++i) { 4095 if (i != 0 && i % 16 == 0) 4096 kprintf("\n"); 4097 kprintf("%03d ", table[i]); 4098 } 4099 kprintf("\n"); 4100 } 4101 } 4102 4103 int 4104 if_ringmap_cpumap_sysctl(SYSCTL_HANDLER_ARGS) 4105 { 4106 struct if_ringmap *rm = arg1; 4107 int i, error = 0; 4108 4109 for (i = 0; i < rm->rm_cnt; ++i) { 4110 int cpu = rm->rm_cpumap[i]; 4111 4112 error = SYSCTL_OUT(req, &cpu, sizeof(cpu)); 4113 if (error) 4114 break; 4115 } 4116 return (error); 4117 } 4118