1 /* 2 * Copyright (c) 1980, 1986, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)if.c 8.3 (Berkeley) 1/4/94 30 * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $ 31 */ 32 33 #include "opt_inet6.h" 34 #include "opt_inet.h" 35 #include "opt_ifpoll.h" 36 37 #include <sys/param.h> 38 #include <sys/malloc.h> 39 #include <sys/mbuf.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/priv.h> 43 #include <sys/protosw.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/socketops.h> 47 #include <sys/kernel.h> 48 #include <sys/ktr.h> 49 #include <sys/mutex.h> 50 #include <sys/lock.h> 51 #include <sys/sockio.h> 52 #include <sys/syslog.h> 53 #include <sys/sysctl.h> 54 #include <sys/domain.h> 55 #include <sys/thread.h> 56 #include <sys/serialize.h> 57 #include <sys/bus.h> 58 #include <sys/jail.h> 59 60 #include <sys/thread2.h> 61 #include <sys/msgport2.h> 62 #include <sys/mutex2.h> 63 64 #include <net/if.h> 65 #include <net/if_arp.h> 66 #include <net/if_dl.h> 67 #include <net/if_types.h> 68 #include <net/if_var.h> 69 #include <net/if_ringmap.h> 70 #include <net/ifq_var.h> 71 #include <net/radix.h> 72 #include <net/route.h> 73 #include <net/if_clone.h> 74 #include <net/netisr2.h> 75 #include <net/netmsg2.h> 76 77 #include <machine/atomic.h> 78 #include <machine/stdarg.h> 79 #include <machine/smp.h> 80 81 #if defined(INET) || defined(INET6) 82 #include <netinet/in.h> 83 #include <netinet/in_var.h> 84 #include <netinet/if_ether.h> 85 #ifdef INET6 86 #include <netinet6/in6_var.h> 87 #include <netinet6/in6_ifattach.h> 88 #endif /* INET6 */ 89 #endif /* INET || INET6 */ 90 91 struct netmsg_ifaddr { 92 struct netmsg_base base; 93 struct ifaddr *ifa; 94 struct ifnet *ifp; 95 int tail; 96 }; 97 98 struct ifsubq_stage_head { 99 TAILQ_HEAD(, ifsubq_stage) stg_head; 100 } __cachealign; 101 102 struct if_ringmap { 103 int rm_cnt; 104 int rm_grid; 105 int rm_cpumap[]; 106 }; 107 108 #define RINGMAP_FLAG_NONE 0x0 109 #define RINGMAP_FLAG_POWEROF2 0x1 110 111 /* 112 * System initialization 113 */ 114 static void if_attachdomain(void *); 115 static void if_attachdomain1(struct ifnet *); 116 static int ifconf(u_long, caddr_t, struct ucred *); 117 static void ifinit(void *); 118 static void ifnetinit(void *); 119 static void if_slowtimo(void *); 120 static void link_rtrequest(int, struct rtentry *); 121 static int if_rtdel(struct radix_node *, void *); 122 static void if_slowtimo_dispatch(netmsg_t); 123 124 /* Helper functions */ 125 static void ifsq_watchdog_reset(struct ifsubq_watchdog *); 126 static int if_delmulti_serialized(struct ifnet *, struct sockaddr *); 127 static struct ifnet_array *ifnet_array_alloc(int); 128 static void ifnet_array_free(struct ifnet_array *); 129 static struct ifnet_array *ifnet_array_add(struct ifnet *, 130 const struct ifnet_array *); 131 static struct ifnet_array *ifnet_array_del(struct ifnet *, 132 const struct ifnet_array *); 133 static struct ifg_group *if_creategroup(const char *); 134 static int if_destroygroup(struct ifg_group *); 135 static int if_delgroup_locked(struct ifnet *, const char *); 136 static int if_getgroups(struct ifgroupreq *, struct ifnet *); 137 static int if_getgroupmembers(struct ifgroupreq *); 138 139 #ifdef INET6 140 /* 141 * XXX: declare here to avoid to include many inet6 related files.. 142 * should be more generalized? 143 */ 144 extern void nd6_setmtu(struct ifnet *); 145 #endif 146 147 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); 148 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); 149 SYSCTL_NODE(_net_link, OID_AUTO, ringmap, CTLFLAG_RW, 0, "link ringmap"); 150 151 static int ifsq_stage_cntmax = 16; 152 TUNABLE_INT("net.link.stage_cntmax", &ifsq_stage_cntmax); 153 SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW, 154 &ifsq_stage_cntmax, 0, "ifq staging packet count max"); 155 156 static int if_stats_compat = 0; 157 SYSCTL_INT(_net_link, OID_AUTO, stats_compat, CTLFLAG_RW, 158 &if_stats_compat, 0, "Compat the old ifnet stats"); 159 160 static int if_ringmap_dumprdr = 0; 161 SYSCTL_INT(_net_link_ringmap, OID_AUTO, dump_rdr, CTLFLAG_RW, 162 &if_ringmap_dumprdr, 0, "dump redirect table"); 163 164 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL); 165 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, ifnetinit, NULL); 166 167 static if_com_alloc_t *if_com_alloc[256]; 168 static if_com_free_t *if_com_free[256]; 169 170 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); 171 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); 172 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure"); 173 174 int ifqmaxlen = IFQ_MAXLEN; 175 struct ifnethead ifnet = TAILQ_HEAD_INITIALIZER(ifnet); 176 struct ifgrouphead ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head); 177 static struct lock ifgroup_lock; 178 179 static struct ifnet_array ifnet_array0; 180 static struct ifnet_array *ifnet_array = &ifnet_array0; 181 182 static struct callout if_slowtimo_timer; 183 static struct netmsg_base if_slowtimo_netmsg; 184 185 int if_index = 0; 186 struct ifnet **ifindex2ifnet = NULL; 187 static struct mtx ifnet_mtx = MTX_INITIALIZER("ifnet"); 188 189 static struct ifsubq_stage_head ifsubq_stage_heads[MAXCPU]; 190 191 #ifdef notyet 192 #define IFQ_KTR_STRING "ifq=%p" 193 #define IFQ_KTR_ARGS struct ifaltq *ifq 194 #ifndef KTR_IFQ 195 #define KTR_IFQ KTR_ALL 196 #endif 197 KTR_INFO_MASTER(ifq); 198 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS); 199 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS); 200 #define logifq(name, arg) KTR_LOG(ifq_ ## name, arg) 201 202 #define IF_START_KTR_STRING "ifp=%p" 203 #define IF_START_KTR_ARGS struct ifnet *ifp 204 #ifndef KTR_IF_START 205 #define KTR_IF_START KTR_ALL 206 #endif 207 KTR_INFO_MASTER(if_start); 208 KTR_INFO(KTR_IF_START, if_start, run, 0, 209 IF_START_KTR_STRING, IF_START_KTR_ARGS); 210 KTR_INFO(KTR_IF_START, if_start, sched, 1, 211 IF_START_KTR_STRING, IF_START_KTR_ARGS); 212 KTR_INFO(KTR_IF_START, if_start, avoid, 2, 213 IF_START_KTR_STRING, IF_START_KTR_ARGS); 214 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3, 215 IF_START_KTR_STRING, IF_START_KTR_ARGS); 216 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4, 217 IF_START_KTR_STRING, IF_START_KTR_ARGS); 218 #define logifstart(name, arg) KTR_LOG(if_start_ ## name, arg) 219 #endif /* notyet */ 220 221 /* 222 * Network interface utility routines. 223 * 224 * Routines with ifa_ifwith* names take sockaddr *'s as 225 * parameters. 226 */ 227 /* ARGSUSED */ 228 static void 229 ifinit(void *dummy) 230 { 231 lockinit(&ifgroup_lock, "ifgroup", 0, 0); 232 233 callout_init_mp(&if_slowtimo_timer); 234 netmsg_init(&if_slowtimo_netmsg, NULL, &netisr_adone_rport, 235 MSGF_PRIORITY, if_slowtimo_dispatch); 236 237 /* Start if_slowtimo */ 238 lwkt_sendmsg(netisr_cpuport(0), &if_slowtimo_netmsg.lmsg); 239 } 240 241 static void 242 ifsq_ifstart_ipifunc(void *arg) 243 { 244 struct ifaltq_subque *ifsq = arg; 245 struct lwkt_msg *lmsg = ifsq_get_ifstart_lmsg(ifsq, mycpuid); 246 247 crit_enter(); 248 if (lmsg->ms_flags & MSGF_DONE) 249 lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), lmsg); 250 crit_exit(); 251 } 252 253 static __inline void 254 ifsq_stage_remove(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 255 { 256 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 257 TAILQ_REMOVE(&head->stg_head, stage, stg_link); 258 stage->stg_flags &= ~(IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED); 259 stage->stg_cnt = 0; 260 stage->stg_len = 0; 261 } 262 263 static __inline void 264 ifsq_stage_insert(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 265 { 266 KKASSERT((stage->stg_flags & 267 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 268 stage->stg_flags |= IFSQ_STAGE_FLAG_QUED; 269 TAILQ_INSERT_TAIL(&head->stg_head, stage, stg_link); 270 } 271 272 /* 273 * Schedule ifnet.if_start on the subqueue owner CPU 274 */ 275 static void 276 ifsq_ifstart_schedule(struct ifaltq_subque *ifsq, int force) 277 { 278 int cpu; 279 280 if (!force && curthread->td_type == TD_TYPE_NETISR && 281 ifsq_stage_cntmax > 0) { 282 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 283 284 stage->stg_cnt = 0; 285 stage->stg_len = 0; 286 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 287 ifsq_stage_insert(&ifsubq_stage_heads[mycpuid], stage); 288 stage->stg_flags |= IFSQ_STAGE_FLAG_SCHED; 289 return; 290 } 291 292 cpu = ifsq_get_cpuid(ifsq); 293 if (cpu != mycpuid) 294 lwkt_send_ipiq(globaldata_find(cpu), ifsq_ifstart_ipifunc, ifsq); 295 else 296 ifsq_ifstart_ipifunc(ifsq); 297 } 298 299 /* 300 * NOTE: 301 * This function will release ifnet.if_start subqueue interlock, 302 * if ifnet.if_start for the subqueue does not need to be scheduled 303 */ 304 static __inline int 305 ifsq_ifstart_need_schedule(struct ifaltq_subque *ifsq, int running) 306 { 307 if (!running || ifsq_is_empty(ifsq) 308 #ifdef ALTQ 309 || ifsq->ifsq_altq->altq_tbr != NULL 310 #endif 311 ) { 312 ALTQ_SQ_LOCK(ifsq); 313 /* 314 * ifnet.if_start subqueue interlock is released, if: 315 * 1) Hardware can not take any packets, due to 316 * o interface is marked down 317 * o hardware queue is full (ifsq_is_oactive) 318 * Under the second situation, hardware interrupt 319 * or polling(4) will call/schedule ifnet.if_start 320 * on the subqueue when hardware queue is ready 321 * 2) There is no packet in the subqueue. 322 * Further ifq_dispatch or ifq_handoff will call/ 323 * schedule ifnet.if_start on the subqueue. 324 * 3) TBR is used and it does not allow further 325 * dequeueing. 326 * TBR callout will call ifnet.if_start on the 327 * subqueue. 328 */ 329 if (!running || !ifsq_data_ready(ifsq)) { 330 ifsq_clr_started(ifsq); 331 ALTQ_SQ_UNLOCK(ifsq); 332 return 0; 333 } 334 ALTQ_SQ_UNLOCK(ifsq); 335 } 336 return 1; 337 } 338 339 static void 340 ifsq_ifstart_dispatch(netmsg_t msg) 341 { 342 struct lwkt_msg *lmsg = &msg->base.lmsg; 343 struct ifaltq_subque *ifsq = lmsg->u.ms_resultp; 344 struct ifnet *ifp = ifsq_get_ifp(ifsq); 345 struct globaldata *gd = mycpu; 346 int running = 0, need_sched; 347 348 crit_enter_gd(gd); 349 350 lwkt_replymsg(lmsg, 0); /* reply ASAP */ 351 352 if (gd->gd_cpuid != ifsq_get_cpuid(ifsq)) { 353 /* 354 * We need to chase the subqueue owner CPU change. 355 */ 356 ifsq_ifstart_schedule(ifsq, 1); 357 crit_exit_gd(gd); 358 return; 359 } 360 361 ifsq_serialize_hw(ifsq); 362 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 363 ifp->if_start(ifp, ifsq); 364 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 365 running = 1; 366 } 367 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 368 ifsq_deserialize_hw(ifsq); 369 370 if (need_sched) { 371 /* 372 * More data need to be transmitted, ifnet.if_start is 373 * scheduled on the subqueue owner CPU, and we keep going. 374 * NOTE: ifnet.if_start subqueue interlock is not released. 375 */ 376 ifsq_ifstart_schedule(ifsq, 0); 377 } 378 379 crit_exit_gd(gd); 380 } 381 382 /* Device driver ifnet.if_start helper function */ 383 void 384 ifsq_devstart(struct ifaltq_subque *ifsq) 385 { 386 struct ifnet *ifp = ifsq_get_ifp(ifsq); 387 int running = 0; 388 389 ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq); 390 391 ALTQ_SQ_LOCK(ifsq); 392 if (ifsq_is_started(ifsq) || !ifsq_data_ready(ifsq)) { 393 ALTQ_SQ_UNLOCK(ifsq); 394 return; 395 } 396 ifsq_set_started(ifsq); 397 ALTQ_SQ_UNLOCK(ifsq); 398 399 ifp->if_start(ifp, ifsq); 400 401 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 402 running = 1; 403 404 if (ifsq_ifstart_need_schedule(ifsq, running)) { 405 /* 406 * More data need to be transmitted, ifnet.if_start is 407 * scheduled on ifnet's CPU, and we keep going. 408 * NOTE: ifnet.if_start interlock is not released. 409 */ 410 ifsq_ifstart_schedule(ifsq, 0); 411 } 412 } 413 414 void 415 if_devstart(struct ifnet *ifp) 416 { 417 ifsq_devstart(ifq_get_subq_default(&ifp->if_snd)); 418 } 419 420 /* Device driver ifnet.if_start schedule helper function */ 421 void 422 ifsq_devstart_sched(struct ifaltq_subque *ifsq) 423 { 424 ifsq_ifstart_schedule(ifsq, 1); 425 } 426 427 void 428 if_devstart_sched(struct ifnet *ifp) 429 { 430 ifsq_devstart_sched(ifq_get_subq_default(&ifp->if_snd)); 431 } 432 433 static void 434 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 435 { 436 lwkt_serialize_enter(ifp->if_serializer); 437 } 438 439 static void 440 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 441 { 442 lwkt_serialize_exit(ifp->if_serializer); 443 } 444 445 static int 446 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 447 { 448 return lwkt_serialize_try(ifp->if_serializer); 449 } 450 451 #ifdef INVARIANTS 452 static void 453 if_default_serialize_assert(struct ifnet *ifp, 454 enum ifnet_serialize slz __unused, 455 boolean_t serialized) 456 { 457 if (serialized) 458 ASSERT_SERIALIZED(ifp->if_serializer); 459 else 460 ASSERT_NOT_SERIALIZED(ifp->if_serializer); 461 } 462 #endif 463 464 /* 465 * Attach an interface to the list of "active" interfaces. 466 * 467 * The serializer is optional. 468 */ 469 void 470 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer) 471 { 472 unsigned socksize; 473 int namelen, masklen; 474 struct sockaddr_dl *sdl, *sdl_addr; 475 struct ifaddr *ifa; 476 struct ifaltq *ifq; 477 struct ifnet **old_ifindex2ifnet = NULL; 478 struct ifnet_array *old_ifnet_array; 479 int i, q, qlen; 480 char qlenname[64]; 481 482 static int if_indexlim = 8; 483 484 if (ifp->if_serialize != NULL) { 485 KASSERT(ifp->if_deserialize != NULL && 486 ifp->if_tryserialize != NULL && 487 ifp->if_serialize_assert != NULL, 488 ("serialize functions are partially setup")); 489 490 /* 491 * If the device supplies serialize functions, 492 * then clear if_serializer to catch any invalid 493 * usage of this field. 494 */ 495 KASSERT(serializer == NULL, 496 ("both serialize functions and default serializer " 497 "are supplied")); 498 ifp->if_serializer = NULL; 499 } else { 500 KASSERT(ifp->if_deserialize == NULL && 501 ifp->if_tryserialize == NULL && 502 ifp->if_serialize_assert == NULL, 503 ("serialize functions are partially setup")); 504 ifp->if_serialize = if_default_serialize; 505 ifp->if_deserialize = if_default_deserialize; 506 ifp->if_tryserialize = if_default_tryserialize; 507 #ifdef INVARIANTS 508 ifp->if_serialize_assert = if_default_serialize_assert; 509 #endif 510 511 /* 512 * The serializer can be passed in from the device, 513 * allowing the same serializer to be used for both 514 * the interrupt interlock and the device queue. 515 * If not specified, the netif structure will use an 516 * embedded serializer. 517 */ 518 if (serializer == NULL) { 519 serializer = &ifp->if_default_serializer; 520 lwkt_serialize_init(serializer); 521 } 522 ifp->if_serializer = serializer; 523 } 524 525 /* 526 * Make if_addrhead available on all CPUs, since they 527 * could be accessed by any threads. 528 */ 529 ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead), 530 M_IFADDR, M_WAITOK | M_ZERO); 531 for (i = 0; i < ncpus; ++i) 532 TAILQ_INIT(&ifp->if_addrheads[i]); 533 534 TAILQ_INIT(&ifp->if_multiaddrs); 535 TAILQ_INIT(&ifp->if_groups); 536 getmicrotime(&ifp->if_lastchange); 537 if_addgroup(ifp, IFG_ALL); 538 539 /* 540 * create a Link Level name for this device 541 */ 542 namelen = strlen(ifp->if_xname); 543 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; 544 socksize = masklen + ifp->if_addrlen; 545 if (socksize < sizeof(*sdl)) 546 socksize = sizeof(*sdl); 547 socksize = RT_ROUNDUP(socksize); 548 ifa = ifa_create(sizeof(struct ifaddr) + 2 * socksize); 549 sdl = sdl_addr = (struct sockaddr_dl *)(ifa + 1); 550 sdl->sdl_len = socksize; 551 sdl->sdl_family = AF_LINK; 552 bcopy(ifp->if_xname, sdl->sdl_data, namelen); 553 sdl->sdl_nlen = namelen; 554 sdl->sdl_type = ifp->if_type; 555 ifp->if_lladdr = ifa; 556 ifa->ifa_ifp = ifp; 557 ifa->ifa_rtrequest = link_rtrequest; 558 ifa->ifa_addr = (struct sockaddr *)sdl; 559 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); 560 ifa->ifa_netmask = (struct sockaddr *)sdl; 561 sdl->sdl_len = masklen; 562 while (namelen != 0) 563 sdl->sdl_data[--namelen] = 0xff; 564 ifa_iflink(ifa, ifp, 0 /* Insert head */); 565 566 /* 567 * Make if_data available on all CPUs, since they could 568 * be updated by hardware interrupt routing, which could 569 * be bound to any CPU. 570 */ 571 ifp->if_data_pcpu = kmalloc(ncpus * sizeof(struct ifdata_pcpu), 572 M_DEVBUF, 573 M_WAITOK | M_ZERO | M_CACHEALIGN); 574 575 if (ifp->if_mapsubq == NULL) 576 ifp->if_mapsubq = ifq_mapsubq_default; 577 578 ifq = &ifp->if_snd; 579 ifq->altq_type = 0; 580 ifq->altq_disc = NULL; 581 ifq->altq_flags &= ALTQF_CANTCHANGE; 582 ifq->altq_tbr = NULL; 583 ifq->altq_ifp = ifp; 584 585 if (ifq->altq_subq_cnt <= 0) 586 ifq->altq_subq_cnt = 1; 587 ifq->altq_subq = 588 kmalloc(ifq->altq_subq_cnt * sizeof(struct ifaltq_subque), 589 M_DEVBUF, 590 M_WAITOK | M_ZERO | M_CACHEALIGN); 591 592 if (ifq->altq_maxlen == 0) { 593 if_printf(ifp, "driver didn't set altq_maxlen\n"); 594 ifq_set_maxlen(ifq, ifqmaxlen); 595 } 596 597 /* Allow user to override driver's setting. */ 598 ksnprintf(qlenname, sizeof(qlenname), "net.%s.qlenmax", ifp->if_xname); 599 qlen = -1; 600 TUNABLE_INT_FETCH(qlenname, &qlen); 601 if (qlen > 0) { 602 if_printf(ifp, "qlenmax -> %d\n", qlen); 603 ifq_set_maxlen(ifq, qlen); 604 } 605 606 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 607 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 608 609 ALTQ_SQ_LOCK_INIT(ifsq); 610 ifsq->ifsq_index = q; 611 612 ifsq->ifsq_altq = ifq; 613 ifsq->ifsq_ifp = ifp; 614 615 ifsq->ifsq_maxlen = ifq->altq_maxlen; 616 ifsq->ifsq_maxbcnt = ifsq->ifsq_maxlen * MCLBYTES; 617 ifsq->ifsq_prepended = NULL; 618 ifsq->ifsq_started = 0; 619 ifsq->ifsq_hw_oactive = 0; 620 ifsq_set_cpuid(ifsq, 0); 621 if (ifp->if_serializer != NULL) 622 ifsq_set_hw_serialize(ifsq, ifp->if_serializer); 623 624 /* XXX: netisr_ncpus */ 625 ifsq->ifsq_stage = 626 kmalloc(ncpus * sizeof(struct ifsubq_stage), 627 M_DEVBUF, 628 M_WAITOK | M_ZERO | M_CACHEALIGN); 629 for (i = 0; i < ncpus; ++i) 630 ifsq->ifsq_stage[i].stg_subq = ifsq; 631 632 /* 633 * Allocate one if_start message for each CPU, since 634 * the hardware TX ring could be assigned to any CPU. 635 * 636 * NOTE: 637 * If the hardware TX ring polling CPU and the hardware 638 * TX ring interrupt CPU are same, one if_start message 639 * should be enough. 640 */ 641 ifsq->ifsq_ifstart_nmsg = 642 kmalloc(ncpus * sizeof(struct netmsg_base), 643 M_LWKTMSG, M_WAITOK); 644 for (i = 0; i < ncpus; ++i) { 645 netmsg_init(&ifsq->ifsq_ifstart_nmsg[i], NULL, 646 &netisr_adone_rport, 0, ifsq_ifstart_dispatch); 647 ifsq->ifsq_ifstart_nmsg[i].lmsg.u.ms_resultp = ifsq; 648 } 649 } 650 ifq_set_classic(ifq); 651 652 /* 653 * Increase mbuf cluster/jcluster limits for the mbufs that 654 * could sit on the device queues for quite some time. 655 */ 656 if (ifp->if_nmbclusters > 0) 657 mcl_inclimit(ifp->if_nmbclusters); 658 if (ifp->if_nmbjclusters > 0) 659 mjcl_inclimit(ifp->if_nmbjclusters); 660 661 /* 662 * Install this ifp into ifindex2inet, ifnet queue and ifnet 663 * array after it is setup. 664 * 665 * Protect ifindex2ifnet, ifnet queue and ifnet array changes 666 * by ifnet lock, so that non-netisr threads could get a 667 * consistent view. 668 */ 669 ifnet_lock(); 670 671 /* Don't update if_index until ifindex2ifnet is setup */ 672 ifp->if_index = if_index + 1; 673 sdl_addr->sdl_index = ifp->if_index; 674 675 /* 676 * Install this ifp into ifindex2ifnet 677 */ 678 if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) { 679 unsigned int n; 680 struct ifnet **q; 681 682 /* 683 * Grow ifindex2ifnet 684 */ 685 if_indexlim <<= 1; 686 n = if_indexlim * sizeof(*q); 687 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO); 688 if (ifindex2ifnet != NULL) { 689 bcopy(ifindex2ifnet, q, n/2); 690 /* Free old ifindex2ifnet after sync all netisrs */ 691 old_ifindex2ifnet = ifindex2ifnet; 692 } 693 ifindex2ifnet = q; 694 } 695 ifindex2ifnet[ifp->if_index] = ifp; 696 /* 697 * Update if_index after this ifp is installed into ifindex2ifnet, 698 * so that netisrs could get a consistent view of ifindex2ifnet. 699 */ 700 cpu_sfence(); 701 if_index = ifp->if_index; 702 703 /* 704 * Install this ifp into ifnet array. 705 */ 706 /* Free old ifnet array after sync all netisrs */ 707 old_ifnet_array = ifnet_array; 708 ifnet_array = ifnet_array_add(ifp, old_ifnet_array); 709 710 /* 711 * Install this ifp into ifnet queue. 712 */ 713 TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_link); 714 715 ifnet_unlock(); 716 717 /* 718 * Sync all netisrs so that the old ifindex2ifnet and ifnet array 719 * are no longer accessed and we can free them safely later on. 720 */ 721 netmsg_service_sync(); 722 if (old_ifindex2ifnet != NULL) 723 kfree(old_ifindex2ifnet, M_IFADDR); 724 ifnet_array_free(old_ifnet_array); 725 726 if (!SLIST_EMPTY(&domains)) 727 if_attachdomain1(ifp); 728 729 /* Announce the interface. */ 730 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 731 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); 732 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 733 } 734 735 static void 736 if_attachdomain(void *dummy) 737 { 738 struct ifnet *ifp; 739 740 ifnet_lock(); 741 TAILQ_FOREACH(ifp, &ifnetlist, if_list) 742 if_attachdomain1(ifp); 743 ifnet_unlock(); 744 } 745 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, 746 if_attachdomain, NULL); 747 748 static void 749 if_attachdomain1(struct ifnet *ifp) 750 { 751 struct domain *dp; 752 753 crit_enter(); 754 755 /* address family dependent data region */ 756 bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); 757 SLIST_FOREACH(dp, &domains, dom_next) 758 if (dp->dom_ifattach) 759 ifp->if_afdata[dp->dom_family] = 760 (*dp->dom_ifattach)(ifp); 761 crit_exit(); 762 } 763 764 /* 765 * Purge all addresses whose type is _not_ AF_LINK 766 */ 767 static void 768 if_purgeaddrs_nolink_dispatch(netmsg_t nmsg) 769 { 770 struct ifnet *ifp = nmsg->lmsg.u.ms_resultp; 771 struct ifaddr_container *ifac, *next; 772 773 ASSERT_NETISR0; 774 775 /* 776 * The ifaddr processing in the following loop will block, 777 * however, this function is called in netisr0, in which 778 * ifaddr list changes happen, so we don't care about the 779 * blockness of the ifaddr processing here. 780 */ 781 TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid], 782 ifa_link, next) { 783 struct ifaddr *ifa = ifac->ifa; 784 785 /* Ignore marker */ 786 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 787 continue; 788 789 /* Leave link ifaddr as it is */ 790 if (ifa->ifa_addr->sa_family == AF_LINK) 791 continue; 792 #ifdef INET 793 /* XXX: Ugly!! ad hoc just for INET */ 794 if (ifa->ifa_addr->sa_family == AF_INET) { 795 struct ifaliasreq ifr; 796 struct sockaddr_in saved_addr, saved_dst; 797 #ifdef IFADDR_DEBUG_VERBOSE 798 int i; 799 800 kprintf("purge in4 addr %p: ", ifa); 801 for (i = 0; i < ncpus; ++i) { 802 kprintf("%d ", 803 ifa->ifa_containers[i].ifa_refcnt); 804 } 805 kprintf("\n"); 806 #endif 807 808 /* Save information for panic. */ 809 memcpy(&saved_addr, ifa->ifa_addr, sizeof(saved_addr)); 810 if (ifa->ifa_dstaddr != NULL) { 811 memcpy(&saved_dst, ifa->ifa_dstaddr, 812 sizeof(saved_dst)); 813 } else { 814 memset(&saved_dst, 0, sizeof(saved_dst)); 815 } 816 817 bzero(&ifr, sizeof ifr); 818 ifr.ifra_addr = *ifa->ifa_addr; 819 if (ifa->ifa_dstaddr) 820 ifr.ifra_broadaddr = *ifa->ifa_dstaddr; 821 if (in_control(SIOCDIFADDR, (caddr_t)&ifr, ifp, 822 NULL) == 0) 823 continue; 824 825 /* MUST NOT HAPPEN */ 826 panic("%s: in_control failed %x, dst %x", ifp->if_xname, 827 ntohl(saved_addr.sin_addr.s_addr), 828 ntohl(saved_dst.sin_addr.s_addr)); 829 } 830 #endif /* INET */ 831 #ifdef INET6 832 if (ifa->ifa_addr->sa_family == AF_INET6) { 833 #ifdef IFADDR_DEBUG_VERBOSE 834 int i; 835 836 kprintf("purge in6 addr %p: ", ifa); 837 for (i = 0; i < ncpus; ++i) { 838 kprintf("%d ", 839 ifa->ifa_containers[i].ifa_refcnt); 840 } 841 kprintf("\n"); 842 #endif 843 844 in6_purgeaddr(ifa); 845 /* ifp_addrhead is already updated */ 846 continue; 847 } 848 #endif /* INET6 */ 849 if_printf(ifp, "destroy ifaddr family %d\n", 850 ifa->ifa_addr->sa_family); 851 ifa_ifunlink(ifa, ifp); 852 ifa_destroy(ifa); 853 } 854 855 netisr_replymsg(&nmsg->base, 0); 856 } 857 858 void 859 if_purgeaddrs_nolink(struct ifnet *ifp) 860 { 861 struct netmsg_base nmsg; 862 863 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 864 if_purgeaddrs_nolink_dispatch); 865 nmsg.lmsg.u.ms_resultp = ifp; 866 netisr_domsg(&nmsg, 0); 867 } 868 869 static void 870 ifq_stage_detach_handler(netmsg_t nmsg) 871 { 872 struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp; 873 int q; 874 875 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 876 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 877 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 878 879 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) 880 ifsq_stage_remove(&ifsubq_stage_heads[mycpuid], stage); 881 } 882 lwkt_replymsg(&nmsg->lmsg, 0); 883 } 884 885 static void 886 ifq_stage_detach(struct ifaltq *ifq) 887 { 888 struct netmsg_base base; 889 int cpu; 890 891 netmsg_init(&base, NULL, &curthread->td_msgport, 0, 892 ifq_stage_detach_handler); 893 base.lmsg.u.ms_resultp = ifq; 894 895 /* XXX netisr_ncpus */ 896 for (cpu = 0; cpu < ncpus; ++cpu) 897 lwkt_domsg(netisr_cpuport(cpu), &base.lmsg, 0); 898 } 899 900 struct netmsg_if_rtdel { 901 struct netmsg_base base; 902 struct ifnet *ifp; 903 }; 904 905 static void 906 if_rtdel_dispatch(netmsg_t msg) 907 { 908 struct netmsg_if_rtdel *rmsg = (void *)msg; 909 int i, cpu; 910 911 cpu = mycpuid; 912 ASSERT_NETISR_NCPUS(cpu); 913 914 for (i = 1; i <= AF_MAX; i++) { 915 struct radix_node_head *rnh; 916 917 if ((rnh = rt_tables[cpu][i]) == NULL) 918 continue; 919 rnh->rnh_walktree(rnh, if_rtdel, rmsg->ifp); 920 } 921 netisr_forwardmsg(&msg->base, cpu + 1); 922 } 923 924 /* 925 * Detach an interface, removing it from the 926 * list of "active" interfaces. 927 */ 928 void 929 if_detach(struct ifnet *ifp) 930 { 931 struct ifnet_array *old_ifnet_array; 932 struct ifg_list *ifgl; 933 struct netmsg_if_rtdel msg; 934 struct domain *dp; 935 int q; 936 937 /* Announce that the interface is gone. */ 938 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 939 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 940 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); 941 942 /* 943 * Remove this ifp from ifindex2inet, ifnet queue and ifnet 944 * array before it is whacked. 945 * 946 * Protect ifindex2ifnet, ifnet queue and ifnet array changes 947 * by ifnet lock, so that non-netisr threads could get a 948 * consistent view. 949 */ 950 ifnet_lock(); 951 952 /* 953 * Remove this ifp from ifindex2ifnet and maybe decrement if_index. 954 */ 955 ifindex2ifnet[ifp->if_index] = NULL; 956 while (if_index > 0 && ifindex2ifnet[if_index] == NULL) 957 if_index--; 958 959 /* 960 * Remove this ifp from ifnet queue. 961 */ 962 TAILQ_REMOVE(&ifnetlist, ifp, if_link); 963 964 /* 965 * Remove this ifp from ifnet array. 966 */ 967 /* Free old ifnet array after sync all netisrs */ 968 old_ifnet_array = ifnet_array; 969 ifnet_array = ifnet_array_del(ifp, old_ifnet_array); 970 971 ifnet_unlock(); 972 973 ifgroup_lockmgr(LK_EXCLUSIVE); 974 while ((ifgl = TAILQ_FIRST(&ifp->if_groups)) != NULL) 975 if_delgroup_locked(ifp, ifgl->ifgl_group->ifg_group); 976 ifgroup_lockmgr(LK_RELEASE); 977 978 /* 979 * Sync all netisrs so that the old ifnet array is no longer 980 * accessed and we can free it safely later on. 981 */ 982 netmsg_service_sync(); 983 ifnet_array_free(old_ifnet_array); 984 985 /* 986 * Remove routes and flush queues. 987 */ 988 crit_enter(); 989 #ifdef IFPOLL_ENABLE 990 if (ifp->if_flags & IFF_NPOLLING) 991 ifpoll_deregister(ifp); 992 #endif 993 if_down(ifp); 994 995 /* Decrease the mbuf clusters/jclusters limits increased by us */ 996 if (ifp->if_nmbclusters > 0) 997 mcl_inclimit(-ifp->if_nmbclusters); 998 if (ifp->if_nmbjclusters > 0) 999 mjcl_inclimit(-ifp->if_nmbjclusters); 1000 1001 #ifdef ALTQ 1002 if (ifq_is_enabled(&ifp->if_snd)) 1003 altq_disable(&ifp->if_snd); 1004 if (ifq_is_attached(&ifp->if_snd)) 1005 altq_detach(&ifp->if_snd); 1006 #endif 1007 1008 /* 1009 * Clean up all addresses. 1010 */ 1011 ifp->if_lladdr = NULL; 1012 1013 if_purgeaddrs_nolink(ifp); 1014 if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) { 1015 struct ifaddr *ifa; 1016 1017 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 1018 KASSERT(ifa->ifa_addr->sa_family == AF_LINK, 1019 ("non-link ifaddr is left on if_addrheads")); 1020 1021 ifa_ifunlink(ifa, ifp); 1022 ifa_destroy(ifa); 1023 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]), 1024 ("there are still ifaddrs left on if_addrheads")); 1025 } 1026 1027 #ifdef INET 1028 /* 1029 * Remove all IPv4 kernel structures related to ifp. 1030 */ 1031 in_ifdetach(ifp); 1032 #endif 1033 1034 #ifdef INET6 1035 /* 1036 * Remove all IPv6 kernel structs related to ifp. This should be done 1037 * before removing routing entries below, since IPv6 interface direct 1038 * routes are expected to be removed by the IPv6-specific kernel API. 1039 * Otherwise, the kernel will detect some inconsistency and bark it. 1040 */ 1041 in6_ifdetach(ifp); 1042 #endif 1043 1044 /* 1045 * Delete all remaining routes using this interface 1046 */ 1047 netmsg_init(&msg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 1048 if_rtdel_dispatch); 1049 msg.ifp = ifp; 1050 netisr_domsg_global(&msg.base); 1051 1052 SLIST_FOREACH(dp, &domains, dom_next) { 1053 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) 1054 (*dp->dom_ifdetach)(ifp, 1055 ifp->if_afdata[dp->dom_family]); 1056 } 1057 1058 kfree(ifp->if_addrheads, M_IFADDR); 1059 1060 lwkt_synchronize_ipiqs("if_detach"); 1061 ifq_stage_detach(&ifp->if_snd); 1062 1063 for (q = 0; q < ifp->if_snd.altq_subq_cnt; ++q) { 1064 struct ifaltq_subque *ifsq = &ifp->if_snd.altq_subq[q]; 1065 1066 kfree(ifsq->ifsq_ifstart_nmsg, M_LWKTMSG); 1067 kfree(ifsq->ifsq_stage, M_DEVBUF); 1068 } 1069 kfree(ifp->if_snd.altq_subq, M_DEVBUF); 1070 1071 kfree(ifp->if_data_pcpu, M_DEVBUF); 1072 1073 crit_exit(); 1074 } 1075 1076 int 1077 ifgroup_lockmgr(u_int flags) 1078 { 1079 return lockmgr(&ifgroup_lock, flags); 1080 } 1081 1082 /* 1083 * Create an empty interface group. 1084 */ 1085 static struct ifg_group * 1086 if_creategroup(const char *groupname) 1087 { 1088 struct ifg_group *ifg; 1089 1090 ifg = kmalloc(sizeof(*ifg), M_IFNET, M_WAITOK); 1091 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); 1092 ifg->ifg_refcnt = 0; 1093 ifg->ifg_carp_demoted = 0; 1094 TAILQ_INIT(&ifg->ifg_members); 1095 1096 ifgroup_lockmgr(LK_EXCLUSIVE); 1097 TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next); 1098 ifgroup_lockmgr(LK_RELEASE); 1099 1100 EVENTHANDLER_INVOKE(group_attach_event, ifg); 1101 1102 return (ifg); 1103 } 1104 1105 /* 1106 * Destroy an empty interface group. 1107 */ 1108 static int 1109 if_destroygroup(struct ifg_group *ifg) 1110 { 1111 KASSERT(ifg->ifg_refcnt == 0, 1112 ("trying to delete a non-empty interface group")); 1113 1114 ifgroup_lockmgr(LK_EXCLUSIVE); 1115 TAILQ_REMOVE(&ifg_head, ifg, ifg_next); 1116 ifgroup_lockmgr(LK_RELEASE); 1117 1118 EVENTHANDLER_INVOKE(group_detach_event, ifg); 1119 kfree(ifg, M_IFNET); 1120 1121 return (0); 1122 } 1123 1124 /* 1125 * Add the interface to a group. 1126 * The target group will be created if it doesn't exist. 1127 */ 1128 int 1129 if_addgroup(struct ifnet *ifp, const char *groupname) 1130 { 1131 struct ifg_list *ifgl; 1132 struct ifg_group *ifg; 1133 struct ifg_member *ifgm; 1134 1135 if (groupname[0] && 1136 groupname[strlen(groupname) - 1] >= '0' && 1137 groupname[strlen(groupname) - 1] <= '9') 1138 return (EINVAL); 1139 1140 ifgroup_lockmgr(LK_SHARED); 1141 1142 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1143 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) { 1144 ifgroup_lockmgr(LK_RELEASE); 1145 return (EEXIST); 1146 } 1147 } 1148 1149 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) { 1150 if (strcmp(ifg->ifg_group, groupname) == 0) 1151 break; 1152 } 1153 1154 ifgroup_lockmgr(LK_RELEASE); 1155 1156 if (ifg == NULL) 1157 ifg = if_creategroup(groupname); 1158 1159 ifgl = kmalloc(sizeof(*ifgl), M_IFNET, M_WAITOK); 1160 ifgm = kmalloc(sizeof(*ifgm), M_IFNET, M_WAITOK); 1161 ifgl->ifgl_group = ifg; 1162 ifgm->ifgm_ifp = ifp; 1163 ifg->ifg_refcnt++; 1164 1165 ifgroup_lockmgr(LK_EXCLUSIVE); 1166 TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); 1167 TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); 1168 ifgroup_lockmgr(LK_RELEASE); 1169 1170 EVENTHANDLER_INVOKE(group_change_event, groupname); 1171 1172 return (0); 1173 } 1174 1175 /* 1176 * Remove the interface from a group. 1177 * The group will be destroyed if it becomes empty. 1178 * 1179 * The 'ifgroup_lock' must be hold exclusively when calling this. 1180 */ 1181 static int 1182 if_delgroup_locked(struct ifnet *ifp, const char *groupname) 1183 { 1184 struct ifg_list *ifgl; 1185 struct ifg_member *ifgm; 1186 1187 KKASSERT(lockstatus(&ifgroup_lock, curthread) == LK_EXCLUSIVE); 1188 1189 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1190 if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0) 1191 break; 1192 } 1193 if (ifgl == NULL) 1194 return (ENOENT); 1195 1196 TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); 1197 1198 TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) { 1199 if (ifgm->ifgm_ifp == ifp) 1200 break; 1201 } 1202 1203 if (ifgm != NULL) { 1204 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); 1205 1206 ifgroup_lockmgr(LK_RELEASE); 1207 EVENTHANDLER_INVOKE(group_change_event, groupname); 1208 ifgroup_lockmgr(LK_EXCLUSIVE); 1209 1210 kfree(ifgm, M_IFNET); 1211 ifgl->ifgl_group->ifg_refcnt--; 1212 } 1213 1214 if (ifgl->ifgl_group->ifg_refcnt == 0) { 1215 ifgroup_lockmgr(LK_RELEASE); 1216 if_destroygroup(ifgl->ifgl_group); 1217 ifgroup_lockmgr(LK_EXCLUSIVE); 1218 } 1219 1220 kfree(ifgl, M_IFNET); 1221 1222 return (0); 1223 } 1224 1225 int 1226 if_delgroup(struct ifnet *ifp, const char *groupname) 1227 { 1228 int error; 1229 1230 ifgroup_lockmgr(LK_EXCLUSIVE); 1231 error = if_delgroup_locked(ifp, groupname); 1232 ifgroup_lockmgr(LK_RELEASE); 1233 1234 return (error); 1235 } 1236 1237 /* 1238 * Store all the groups that the interface belongs to in memory 1239 * pointed to by data. 1240 */ 1241 static int 1242 if_getgroups(struct ifgroupreq *ifgr, struct ifnet *ifp) 1243 { 1244 struct ifg_list *ifgl; 1245 struct ifg_req *ifgrq, *p; 1246 int len, error; 1247 1248 len = 0; 1249 ifgroup_lockmgr(LK_SHARED); 1250 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1251 len += sizeof(struct ifg_req); 1252 ifgroup_lockmgr(LK_RELEASE); 1253 1254 if (ifgr->ifgr_len == 0) { 1255 /* 1256 * Caller is asking how much memory should be allocated in 1257 * the next request in order to hold all the groups. 1258 */ 1259 ifgr->ifgr_len = len; 1260 return (0); 1261 } else if (ifgr->ifgr_len != len) { 1262 return (EINVAL); 1263 } 1264 1265 ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO); 1266 if (ifgrq == NULL) 1267 return (ENOMEM); 1268 1269 ifgroup_lockmgr(LK_SHARED); 1270 p = ifgrq; 1271 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1272 if (len < sizeof(struct ifg_req)) { 1273 ifgroup_lockmgr(LK_RELEASE); 1274 error = EINVAL; 1275 goto failed; 1276 } 1277 1278 strlcpy(p->ifgrq_group, ifgl->ifgl_group->ifg_group, 1279 sizeof(ifgrq->ifgrq_group)); 1280 len -= sizeof(struct ifg_req); 1281 p++; 1282 } 1283 ifgroup_lockmgr(LK_RELEASE); 1284 1285 error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len); 1286 failed: 1287 kfree(ifgrq, M_TEMP); 1288 return error; 1289 } 1290 1291 /* 1292 * Store all the members of a group in memory pointed to by data. 1293 */ 1294 static int 1295 if_getgroupmembers(struct ifgroupreq *ifgr) 1296 { 1297 struct ifg_group *ifg; 1298 struct ifg_member *ifgm; 1299 struct ifg_req *ifgrq, *p; 1300 int len, error; 1301 1302 ifgroup_lockmgr(LK_SHARED); 1303 1304 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) { 1305 if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0) 1306 break; 1307 } 1308 if (ifg == NULL) { 1309 ifgroup_lockmgr(LK_RELEASE); 1310 return (ENOENT); 1311 } 1312 1313 len = 0; 1314 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) 1315 len += sizeof(struct ifg_req); 1316 1317 ifgroup_lockmgr(LK_RELEASE); 1318 1319 if (ifgr->ifgr_len == 0) { 1320 ifgr->ifgr_len = len; 1321 return (0); 1322 } else if (ifgr->ifgr_len != len) { 1323 return (EINVAL); 1324 } 1325 1326 ifgrq = kmalloc(len, M_TEMP, M_INTWAIT | M_NULLOK | M_ZERO); 1327 if (ifgrq == NULL) 1328 return (ENOMEM); 1329 1330 ifgroup_lockmgr(LK_SHARED); 1331 p = ifgrq; 1332 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { 1333 if (len < sizeof(struct ifg_req)) { 1334 ifgroup_lockmgr(LK_RELEASE); 1335 error = EINVAL; 1336 goto failed; 1337 } 1338 1339 strlcpy(p->ifgrq_member, ifgm->ifgm_ifp->if_xname, 1340 sizeof(p->ifgrq_member)); 1341 len -= sizeof(struct ifg_req); 1342 p++; 1343 } 1344 ifgroup_lockmgr(LK_RELEASE); 1345 1346 error = copyout(ifgrq, ifgr->ifgr_groups, ifgr->ifgr_len); 1347 failed: 1348 kfree(ifgrq, M_TEMP); 1349 return error; 1350 } 1351 1352 /* 1353 * Delete Routes for a Network Interface 1354 * 1355 * Called for each routing entry via the rnh->rnh_walktree() call above 1356 * to delete all route entries referencing a detaching network interface. 1357 * 1358 * Arguments: 1359 * rn pointer to node in the routing table 1360 * arg argument passed to rnh->rnh_walktree() - detaching interface 1361 * 1362 * Returns: 1363 * 0 successful 1364 * errno failed - reason indicated 1365 * 1366 */ 1367 static int 1368 if_rtdel(struct radix_node *rn, void *arg) 1369 { 1370 struct rtentry *rt = (struct rtentry *)rn; 1371 struct ifnet *ifp = arg; 1372 int err; 1373 1374 if (rt->rt_ifp == ifp) { 1375 1376 /* 1377 * Protect (sorta) against walktree recursion problems 1378 * with cloned routes 1379 */ 1380 if (!(rt->rt_flags & RTF_UP)) 1381 return (0); 1382 1383 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, 1384 rt_mask(rt), rt->rt_flags, 1385 NULL); 1386 if (err) { 1387 log(LOG_WARNING, "if_rtdel: error %d\n", err); 1388 } 1389 } 1390 1391 return (0); 1392 } 1393 1394 static __inline boolean_t 1395 ifa_prefer(const struct ifaddr *cur_ifa, const struct ifaddr *old_ifa) 1396 { 1397 if (old_ifa == NULL) 1398 return TRUE; 1399 1400 if ((old_ifa->ifa_ifp->if_flags & IFF_UP) == 0 && 1401 (cur_ifa->ifa_ifp->if_flags & IFF_UP)) 1402 return TRUE; 1403 if ((old_ifa->ifa_flags & IFA_ROUTE) == 0 && 1404 (cur_ifa->ifa_flags & IFA_ROUTE)) 1405 return TRUE; 1406 return FALSE; 1407 } 1408 1409 /* 1410 * Locate an interface based on a complete address. 1411 */ 1412 struct ifaddr * 1413 ifa_ifwithaddr(struct sockaddr *addr) 1414 { 1415 const struct ifnet_array *arr; 1416 int i; 1417 1418 arr = ifnet_array_get(); 1419 for (i = 0; i < arr->ifnet_count; ++i) { 1420 struct ifnet *ifp = arr->ifnet_arr[i]; 1421 struct ifaddr_container *ifac; 1422 1423 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1424 struct ifaddr *ifa = ifac->ifa; 1425 1426 if (ifa->ifa_addr->sa_family != addr->sa_family) 1427 continue; 1428 if (sa_equal(addr, ifa->ifa_addr)) 1429 return (ifa); 1430 if ((ifp->if_flags & IFF_BROADCAST) && 1431 ifa->ifa_broadaddr && 1432 /* IPv6 doesn't have broadcast */ 1433 ifa->ifa_broadaddr->sa_len != 0 && 1434 sa_equal(ifa->ifa_broadaddr, addr)) 1435 return (ifa); 1436 } 1437 } 1438 return (NULL); 1439 } 1440 1441 /* 1442 * Locate the point to point interface with a given destination address. 1443 */ 1444 struct ifaddr * 1445 ifa_ifwithdstaddr(struct sockaddr *addr) 1446 { 1447 const struct ifnet_array *arr; 1448 int i; 1449 1450 arr = ifnet_array_get(); 1451 for (i = 0; i < arr->ifnet_count; ++i) { 1452 struct ifnet *ifp = arr->ifnet_arr[i]; 1453 struct ifaddr_container *ifac; 1454 1455 if (!(ifp->if_flags & IFF_POINTOPOINT)) 1456 continue; 1457 1458 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1459 struct ifaddr *ifa = ifac->ifa; 1460 1461 if (ifa->ifa_addr->sa_family != addr->sa_family) 1462 continue; 1463 if (ifa->ifa_dstaddr && 1464 sa_equal(addr, ifa->ifa_dstaddr)) 1465 return (ifa); 1466 } 1467 } 1468 return (NULL); 1469 } 1470 1471 /* 1472 * Find an interface on a specific network. If many, choice 1473 * is most specific found. 1474 */ 1475 struct ifaddr * 1476 ifa_ifwithnet(struct sockaddr *addr) 1477 { 1478 struct ifaddr *ifa_maybe = NULL; 1479 u_int af = addr->sa_family; 1480 char *addr_data = addr->sa_data, *cplim; 1481 const struct ifnet_array *arr; 1482 int i; 1483 1484 /* 1485 * AF_LINK addresses can be looked up directly by their index number, 1486 * so do that if we can. 1487 */ 1488 if (af == AF_LINK) { 1489 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; 1490 1491 if (sdl->sdl_index && sdl->sdl_index <= if_index) 1492 return (ifindex2ifnet[sdl->sdl_index]->if_lladdr); 1493 } 1494 1495 /* 1496 * Scan though each interface, looking for ones that have 1497 * addresses in this address family. 1498 */ 1499 arr = ifnet_array_get(); 1500 for (i = 0; i < arr->ifnet_count; ++i) { 1501 struct ifnet *ifp = arr->ifnet_arr[i]; 1502 struct ifaddr_container *ifac; 1503 1504 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1505 struct ifaddr *ifa = ifac->ifa; 1506 char *cp, *cp2, *cp3; 1507 1508 if (ifa->ifa_addr->sa_family != af) 1509 next: continue; 1510 if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) { 1511 /* 1512 * This is a bit broken as it doesn't 1513 * take into account that the remote end may 1514 * be a single node in the network we are 1515 * looking for. 1516 * The trouble is that we don't know the 1517 * netmask for the remote end. 1518 */ 1519 if (ifa->ifa_dstaddr != NULL && 1520 sa_equal(addr, ifa->ifa_dstaddr)) 1521 return (ifa); 1522 } else { 1523 /* 1524 * if we have a special address handler, 1525 * then use it instead of the generic one. 1526 */ 1527 if (ifa->ifa_claim_addr) { 1528 if ((*ifa->ifa_claim_addr)(ifa, addr)) { 1529 return (ifa); 1530 } else { 1531 continue; 1532 } 1533 } 1534 1535 /* 1536 * Scan all the bits in the ifa's address. 1537 * If a bit dissagrees with what we are 1538 * looking for, mask it with the netmask 1539 * to see if it really matters. 1540 * (A byte at a time) 1541 */ 1542 if (ifa->ifa_netmask == 0) 1543 continue; 1544 cp = addr_data; 1545 cp2 = ifa->ifa_addr->sa_data; 1546 cp3 = ifa->ifa_netmask->sa_data; 1547 cplim = ifa->ifa_netmask->sa_len + 1548 (char *)ifa->ifa_netmask; 1549 while (cp3 < cplim) 1550 if ((*cp++ ^ *cp2++) & *cp3++) 1551 goto next; /* next address! */ 1552 /* 1553 * If the netmask of what we just found 1554 * is more specific than what we had before 1555 * (if we had one) then remember the new one 1556 * before continuing to search for an even 1557 * better one. If the netmasks are equal, 1558 * we prefer the this ifa based on the result 1559 * of ifa_prefer(). 1560 */ 1561 if (ifa_maybe == NULL || 1562 rn_refines((char *)ifa->ifa_netmask, 1563 (char *)ifa_maybe->ifa_netmask) || 1564 (sa_equal(ifa_maybe->ifa_netmask, 1565 ifa->ifa_netmask) && 1566 ifa_prefer(ifa, ifa_maybe))) 1567 ifa_maybe = ifa; 1568 } 1569 } 1570 } 1571 return (ifa_maybe); 1572 } 1573 1574 /* 1575 * Find an interface address specific to an interface best matching 1576 * a given address. 1577 */ 1578 struct ifaddr * 1579 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp) 1580 { 1581 struct ifaddr_container *ifac; 1582 char *cp, *cp2, *cp3; 1583 char *cplim; 1584 struct ifaddr *ifa_maybe = NULL; 1585 u_int af = addr->sa_family; 1586 1587 if (af >= AF_MAX) 1588 return (0); 1589 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1590 struct ifaddr *ifa = ifac->ifa; 1591 1592 if (ifa->ifa_addr->sa_family != af) 1593 continue; 1594 if (ifa_maybe == NULL) 1595 ifa_maybe = ifa; 1596 if (ifa->ifa_netmask == NULL) { 1597 if (sa_equal(addr, ifa->ifa_addr) || 1598 (ifa->ifa_dstaddr != NULL && 1599 sa_equal(addr, ifa->ifa_dstaddr))) 1600 return (ifa); 1601 continue; 1602 } 1603 if (ifp->if_flags & IFF_POINTOPOINT) { 1604 if (sa_equal(addr, ifa->ifa_dstaddr)) 1605 return (ifa); 1606 } else { 1607 cp = addr->sa_data; 1608 cp2 = ifa->ifa_addr->sa_data; 1609 cp3 = ifa->ifa_netmask->sa_data; 1610 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; 1611 for (; cp3 < cplim; cp3++) 1612 if ((*cp++ ^ *cp2++) & *cp3) 1613 break; 1614 if (cp3 == cplim) 1615 return (ifa); 1616 } 1617 } 1618 return (ifa_maybe); 1619 } 1620 1621 /* 1622 * Default action when installing a route with a Link Level gateway. 1623 * Lookup an appropriate real ifa to point to. 1624 * This should be moved to /sys/net/link.c eventually. 1625 */ 1626 static void 1627 link_rtrequest(int cmd, struct rtentry *rt) 1628 { 1629 struct ifaddr *ifa; 1630 struct sockaddr *dst; 1631 struct ifnet *ifp; 1632 1633 if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL || 1634 (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL) 1635 return; 1636 ifa = ifaof_ifpforaddr(dst, ifp); 1637 if (ifa != NULL) { 1638 IFAFREE(rt->rt_ifa); 1639 IFAREF(ifa); 1640 rt->rt_ifa = ifa; 1641 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) 1642 ifa->ifa_rtrequest(cmd, rt); 1643 } 1644 } 1645 1646 struct netmsg_if { 1647 struct netmsg_base base; 1648 struct ifnet *ifp; 1649 }; 1650 1651 /* 1652 * Mark an interface down and notify protocols of the transition. 1653 */ 1654 static void 1655 if_down_dispatch(netmsg_t nmsg) 1656 { 1657 struct netmsg_if *msg = (struct netmsg_if *)nmsg; 1658 struct ifnet *ifp = msg->ifp; 1659 struct ifaddr_container *ifac; 1660 struct domain *dp; 1661 1662 ASSERT_NETISR0; 1663 1664 ifp->if_flags &= ~IFF_UP; 1665 getmicrotime(&ifp->if_lastchange); 1666 rt_ifmsg(ifp); 1667 1668 /* 1669 * The ifaddr processing in the following loop will block, 1670 * however, this function is called in netisr0, in which 1671 * ifaddr list changes happen, so we don't care about the 1672 * blockness of the ifaddr processing here. 1673 */ 1674 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1675 struct ifaddr *ifa = ifac->ifa; 1676 1677 /* Ignore marker */ 1678 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 1679 continue; 1680 1681 kpfctlinput(PRC_IFDOWN, ifa->ifa_addr); 1682 } 1683 1684 SLIST_FOREACH(dp, &domains, dom_next) 1685 if (dp->dom_if_down != NULL) 1686 dp->dom_if_down(ifp); 1687 1688 ifq_purge_all(&ifp->if_snd); 1689 netisr_replymsg(&nmsg->base, 0); 1690 } 1691 1692 /* 1693 * Mark an interface up and notify protocols of the transition. 1694 */ 1695 static void 1696 if_up_dispatch(netmsg_t nmsg) 1697 { 1698 struct netmsg_if *msg = (struct netmsg_if *)nmsg; 1699 struct ifnet *ifp = msg->ifp; 1700 struct ifaddr_container *ifac; 1701 struct domain *dp; 1702 1703 ASSERT_NETISR0; 1704 1705 ifq_purge_all(&ifp->if_snd); 1706 ifp->if_flags |= IFF_UP; 1707 getmicrotime(&ifp->if_lastchange); 1708 rt_ifmsg(ifp); 1709 1710 /* 1711 * The ifaddr processing in the following loop will block, 1712 * however, this function is called in netisr0, in which 1713 * ifaddr list changes happen, so we don't care about the 1714 * blockness of the ifaddr processing here. 1715 */ 1716 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1717 struct ifaddr *ifa = ifac->ifa; 1718 1719 /* Ignore marker */ 1720 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 1721 continue; 1722 1723 kpfctlinput(PRC_IFUP, ifa->ifa_addr); 1724 } 1725 1726 SLIST_FOREACH(dp, &domains, dom_next) 1727 if (dp->dom_if_up != NULL) 1728 dp->dom_if_up(ifp); 1729 1730 netisr_replymsg(&nmsg->base, 0); 1731 } 1732 1733 /* 1734 * Mark an interface down and notify protocols of the transition. An 1735 * interface going down is also considered to be a synchronizing event. 1736 * We must ensure that all packet processing related to the interface 1737 * has completed before we return so e.g. the caller can free the ifnet 1738 * structure that the mbufs may be referencing. 1739 * 1740 * NOTE: must be called at splnet or eqivalent. 1741 */ 1742 void 1743 if_down(struct ifnet *ifp) 1744 { 1745 struct netmsg_if msg; 1746 1747 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN); 1748 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, 1749 if_down_dispatch); 1750 msg.ifp = ifp; 1751 netisr_domsg(&msg.base, 0); 1752 netmsg_service_sync(); 1753 } 1754 1755 /* 1756 * Mark an interface up and notify protocols of 1757 * the transition. 1758 * NOTE: must be called at splnet or eqivalent. 1759 */ 1760 void 1761 if_up(struct ifnet *ifp) 1762 { 1763 struct netmsg_if msg; 1764 1765 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, 1766 if_up_dispatch); 1767 msg.ifp = ifp; 1768 netisr_domsg(&msg.base, 0); 1769 EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP); 1770 } 1771 1772 /* 1773 * Process a link state change. 1774 * NOTE: must be called at splsoftnet or equivalent. 1775 */ 1776 void 1777 if_link_state_change(struct ifnet *ifp) 1778 { 1779 int link_state = ifp->if_link_state; 1780 1781 rt_ifmsg(ifp); 1782 devctl_notify("IFNET", ifp->if_xname, 1783 (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); 1784 1785 EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state); 1786 } 1787 1788 /* 1789 * Handle interface watchdog timer routines. Called 1790 * from softclock, we decrement timers (if set) and 1791 * call the appropriate interface routine on expiration. 1792 */ 1793 static void 1794 if_slowtimo_dispatch(netmsg_t nmsg) 1795 { 1796 struct globaldata *gd = mycpu; 1797 const struct ifnet_array *arr; 1798 int i; 1799 1800 ASSERT_NETISR0; 1801 1802 crit_enter_gd(gd); 1803 lwkt_replymsg(&nmsg->lmsg, 0); /* reply ASAP */ 1804 crit_exit_gd(gd); 1805 1806 arr = ifnet_array_get(); 1807 for (i = 0; i < arr->ifnet_count; ++i) { 1808 struct ifnet *ifp = arr->ifnet_arr[i]; 1809 1810 crit_enter_gd(gd); 1811 1812 if (if_stats_compat) { 1813 IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets); 1814 IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors); 1815 IFNET_STAT_GET(ifp, opackets, ifp->if_opackets); 1816 IFNET_STAT_GET(ifp, oerrors, ifp->if_oerrors); 1817 IFNET_STAT_GET(ifp, collisions, ifp->if_collisions); 1818 IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes); 1819 IFNET_STAT_GET(ifp, obytes, ifp->if_obytes); 1820 IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts); 1821 IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts); 1822 IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops); 1823 IFNET_STAT_GET(ifp, noproto, ifp->if_noproto); 1824 IFNET_STAT_GET(ifp, oqdrops, ifp->if_oqdrops); 1825 } 1826 1827 if (ifp->if_timer == 0 || --ifp->if_timer) { 1828 crit_exit_gd(gd); 1829 continue; 1830 } 1831 if (ifp->if_watchdog) { 1832 if (ifnet_tryserialize_all(ifp)) { 1833 (*ifp->if_watchdog)(ifp); 1834 ifnet_deserialize_all(ifp); 1835 } else { 1836 /* try again next timeout */ 1837 ++ifp->if_timer; 1838 } 1839 } 1840 1841 crit_exit_gd(gd); 1842 } 1843 1844 callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL); 1845 } 1846 1847 static void 1848 if_slowtimo(void *arg __unused) 1849 { 1850 struct lwkt_msg *lmsg = &if_slowtimo_netmsg.lmsg; 1851 1852 KASSERT(mycpuid == 0, ("not on cpu0")); 1853 crit_enter(); 1854 if (lmsg->ms_flags & MSGF_DONE) 1855 lwkt_sendmsg_oncpu(netisr_cpuport(0), lmsg); 1856 crit_exit(); 1857 } 1858 1859 /* 1860 * Map interface name to 1861 * interface structure pointer. 1862 */ 1863 struct ifnet * 1864 ifunit(const char *name) 1865 { 1866 struct ifnet *ifp; 1867 1868 /* 1869 * Search all the interfaces for this name/number 1870 */ 1871 KASSERT(mtx_owned(&ifnet_mtx), ("ifnet is not locked")); 1872 1873 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 1874 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1875 break; 1876 } 1877 return (ifp); 1878 } 1879 1880 struct ifnet * 1881 ifunit_netisr(const char *name) 1882 { 1883 const struct ifnet_array *arr; 1884 int i; 1885 1886 /* 1887 * Search all the interfaces for this name/number 1888 */ 1889 1890 arr = ifnet_array_get(); 1891 for (i = 0; i < arr->ifnet_count; ++i) { 1892 struct ifnet *ifp = arr->ifnet_arr[i]; 1893 1894 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1895 return ifp; 1896 } 1897 return NULL; 1898 } 1899 1900 /* 1901 * Interface ioctls. 1902 */ 1903 int 1904 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred) 1905 { 1906 struct ifnet *ifp; 1907 struct ifgroupreq *ifgr; 1908 struct ifreq *ifr; 1909 struct ifstat *ifs; 1910 int error, do_ifup = 0; 1911 short oif_flags; 1912 int new_flags; 1913 size_t namelen, onamelen; 1914 char new_name[IFNAMSIZ]; 1915 struct ifaddr *ifa; 1916 struct sockaddr_dl *sdl; 1917 1918 switch (cmd) { 1919 case SIOCGIFCONF: 1920 return (ifconf(cmd, data, cred)); 1921 default: 1922 break; 1923 } 1924 1925 ifr = (struct ifreq *)data; 1926 1927 switch (cmd) { 1928 case SIOCIFCREATE: 1929 case SIOCIFCREATE2: 1930 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 1931 return (error); 1932 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), 1933 (cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL), NULL)); 1934 case SIOCIFDESTROY: 1935 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 1936 return (error); 1937 return (if_clone_destroy(ifr->ifr_name)); 1938 case SIOCIFGCLONERS: 1939 return (if_clone_list((struct if_clonereq *)data)); 1940 case SIOCGIFGMEMB: 1941 return (if_getgroupmembers((struct ifgroupreq *)data)); 1942 default: 1943 break; 1944 } 1945 1946 /* 1947 * Nominal ioctl through interface, lookup the ifp and obtain a 1948 * lock to serialize the ifconfig ioctl operation. 1949 */ 1950 ifnet_lock(); 1951 1952 ifp = ifunit(ifr->ifr_name); 1953 if (ifp == NULL) { 1954 ifnet_unlock(); 1955 return (ENXIO); 1956 } 1957 error = 0; 1958 1959 switch (cmd) { 1960 case SIOCGIFINDEX: 1961 ifr->ifr_index = ifp->if_index; 1962 break; 1963 1964 case SIOCGIFFLAGS: 1965 ifr->ifr_flags = ifp->if_flags; 1966 ifr->ifr_flagshigh = ifp->if_flags >> 16; 1967 break; 1968 1969 case SIOCGIFCAP: 1970 ifr->ifr_reqcap = ifp->if_capabilities; 1971 ifr->ifr_curcap = ifp->if_capenable; 1972 break; 1973 1974 case SIOCGIFMETRIC: 1975 ifr->ifr_metric = ifp->if_metric; 1976 break; 1977 1978 case SIOCGIFMTU: 1979 ifr->ifr_mtu = ifp->if_mtu; 1980 break; 1981 1982 case SIOCGIFTSOLEN: 1983 ifr->ifr_tsolen = ifp->if_tsolen; 1984 break; 1985 1986 case SIOCGIFDATA: 1987 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data, 1988 sizeof(ifp->if_data)); 1989 break; 1990 1991 case SIOCGIFPHYS: 1992 ifr->ifr_phys = ifp->if_physical; 1993 break; 1994 1995 case SIOCGIFPOLLCPU: 1996 ifr->ifr_pollcpu = -1; 1997 break; 1998 1999 case SIOCSIFPOLLCPU: 2000 break; 2001 2002 case SIOCSIFFLAGS: 2003 error = priv_check_cred(cred, PRIV_ROOT, 0); 2004 if (error) 2005 break; 2006 new_flags = (ifr->ifr_flags & 0xffff) | 2007 (ifr->ifr_flagshigh << 16); 2008 if (ifp->if_flags & IFF_SMART) { 2009 /* Smart drivers twiddle their own routes */ 2010 } else if (ifp->if_flags & IFF_UP && 2011 (new_flags & IFF_UP) == 0) { 2012 if_down(ifp); 2013 } else if (new_flags & IFF_UP && 2014 (ifp->if_flags & IFF_UP) == 0) { 2015 do_ifup = 1; 2016 } 2017 2018 #ifdef IFPOLL_ENABLE 2019 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) { 2020 if (new_flags & IFF_NPOLLING) 2021 ifpoll_register(ifp); 2022 else 2023 ifpoll_deregister(ifp); 2024 } 2025 #endif 2026 2027 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | 2028 (new_flags &~ IFF_CANTCHANGE); 2029 if (new_flags & IFF_PPROMISC) { 2030 /* Permanently promiscuous mode requested */ 2031 ifp->if_flags |= IFF_PROMISC; 2032 } else if (ifp->if_pcount == 0) { 2033 ifp->if_flags &= ~IFF_PROMISC; 2034 } 2035 if (ifp->if_ioctl) { 2036 ifnet_serialize_all(ifp); 2037 ifp->if_ioctl(ifp, cmd, data, cred); 2038 ifnet_deserialize_all(ifp); 2039 } 2040 if (do_ifup) 2041 if_up(ifp); 2042 getmicrotime(&ifp->if_lastchange); 2043 break; 2044 2045 case SIOCSIFCAP: 2046 error = priv_check_cred(cred, PRIV_ROOT, 0); 2047 if (error) 2048 break; 2049 if (ifr->ifr_reqcap & ~ifp->if_capabilities) { 2050 error = EINVAL; 2051 break; 2052 } 2053 ifnet_serialize_all(ifp); 2054 ifp->if_ioctl(ifp, cmd, data, cred); 2055 ifnet_deserialize_all(ifp); 2056 break; 2057 2058 case SIOCSIFNAME: 2059 error = priv_check_cred(cred, PRIV_ROOT, 0); 2060 if (error) 2061 break; 2062 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL); 2063 if (error) 2064 break; 2065 if (new_name[0] == '\0') { 2066 error = EINVAL; 2067 break; 2068 } 2069 if (ifunit(new_name) != NULL) { 2070 error = EEXIST; 2071 break; 2072 } 2073 2074 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 2075 2076 /* Announce the departure of the interface. */ 2077 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 2078 2079 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); 2080 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 2081 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 2082 namelen = strlen(new_name); 2083 onamelen = sdl->sdl_nlen; 2084 /* 2085 * Move the address if needed. This is safe because we 2086 * allocate space for a name of length IFNAMSIZ when we 2087 * create this in if_attach(). 2088 */ 2089 if (namelen != onamelen) { 2090 bcopy(sdl->sdl_data + onamelen, 2091 sdl->sdl_data + namelen, sdl->sdl_alen); 2092 } 2093 bcopy(new_name, sdl->sdl_data, namelen); 2094 sdl->sdl_nlen = namelen; 2095 sdl = (struct sockaddr_dl *)ifa->ifa_netmask; 2096 bzero(sdl->sdl_data, onamelen); 2097 while (namelen != 0) 2098 sdl->sdl_data[--namelen] = 0xff; 2099 2100 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 2101 2102 /* Announce the return of the interface. */ 2103 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 2104 break; 2105 2106 case SIOCSIFMETRIC: 2107 error = priv_check_cred(cred, PRIV_ROOT, 0); 2108 if (error) 2109 break; 2110 ifp->if_metric = ifr->ifr_metric; 2111 getmicrotime(&ifp->if_lastchange); 2112 break; 2113 2114 case SIOCSIFPHYS: 2115 error = priv_check_cred(cred, PRIV_ROOT, 0); 2116 if (error) 2117 break; 2118 if (ifp->if_ioctl == NULL) { 2119 error = EOPNOTSUPP; 2120 break; 2121 } 2122 ifnet_serialize_all(ifp); 2123 error = ifp->if_ioctl(ifp, cmd, data, cred); 2124 ifnet_deserialize_all(ifp); 2125 if (error == 0) 2126 getmicrotime(&ifp->if_lastchange); 2127 break; 2128 2129 case SIOCSIFMTU: 2130 { 2131 u_long oldmtu = ifp->if_mtu; 2132 2133 error = priv_check_cred(cred, PRIV_ROOT, 0); 2134 if (error) 2135 break; 2136 if (ifp->if_ioctl == NULL) { 2137 error = EOPNOTSUPP; 2138 break; 2139 } 2140 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) { 2141 error = EINVAL; 2142 break; 2143 } 2144 ifnet_serialize_all(ifp); 2145 error = ifp->if_ioctl(ifp, cmd, data, cred); 2146 ifnet_deserialize_all(ifp); 2147 if (error == 0) { 2148 getmicrotime(&ifp->if_lastchange); 2149 rt_ifmsg(ifp); 2150 } 2151 /* 2152 * If the link MTU changed, do network layer specific procedure. 2153 */ 2154 if (ifp->if_mtu != oldmtu) { 2155 #ifdef INET6 2156 nd6_setmtu(ifp); 2157 #endif 2158 } 2159 break; 2160 } 2161 2162 case SIOCSIFTSOLEN: 2163 error = priv_check_cred(cred, PRIV_ROOT, 0); 2164 if (error) 2165 break; 2166 2167 /* XXX need driver supplied upper limit */ 2168 if (ifr->ifr_tsolen <= 0) { 2169 error = EINVAL; 2170 break; 2171 } 2172 ifp->if_tsolen = ifr->ifr_tsolen; 2173 break; 2174 2175 case SIOCADDMULTI: 2176 case SIOCDELMULTI: 2177 error = priv_check_cred(cred, PRIV_ROOT, 0); 2178 if (error) 2179 break; 2180 2181 /* Don't allow group membership on non-multicast interfaces. */ 2182 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2183 error = EOPNOTSUPP; 2184 break; 2185 } 2186 2187 /* Don't let users screw up protocols' entries. */ 2188 if (ifr->ifr_addr.sa_family != AF_LINK) { 2189 error = EINVAL; 2190 break; 2191 } 2192 2193 if (cmd == SIOCADDMULTI) { 2194 struct ifmultiaddr *ifma; 2195 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); 2196 } else { 2197 error = if_delmulti(ifp, &ifr->ifr_addr); 2198 } 2199 if (error == 0) 2200 getmicrotime(&ifp->if_lastchange); 2201 break; 2202 2203 case SIOCSIFPHYADDR: 2204 case SIOCDIFPHYADDR: 2205 #ifdef INET6 2206 case SIOCSIFPHYADDR_IN6: 2207 #endif 2208 case SIOCSLIFPHYADDR: 2209 case SIOCSIFMEDIA: 2210 case SIOCSIFGENERIC: 2211 error = priv_check_cred(cred, PRIV_ROOT, 0); 2212 if (error) 2213 break; 2214 if (ifp->if_ioctl == NULL) { 2215 error = EOPNOTSUPP; 2216 break; 2217 } 2218 ifnet_serialize_all(ifp); 2219 error = ifp->if_ioctl(ifp, cmd, data, cred); 2220 ifnet_deserialize_all(ifp); 2221 if (error == 0) 2222 getmicrotime(&ifp->if_lastchange); 2223 break; 2224 2225 case SIOCGIFSTATUS: 2226 ifs = (struct ifstat *)data; 2227 ifs->ascii[0] = '\0'; 2228 /* fall through */ 2229 case SIOCGIFPSRCADDR: 2230 case SIOCGIFPDSTADDR: 2231 case SIOCGLIFPHYADDR: 2232 case SIOCGIFMEDIA: 2233 case SIOCGIFGENERIC: 2234 if (ifp->if_ioctl == NULL) { 2235 error = EOPNOTSUPP; 2236 break; 2237 } 2238 ifnet_serialize_all(ifp); 2239 error = ifp->if_ioctl(ifp, cmd, data, cred); 2240 ifnet_deserialize_all(ifp); 2241 break; 2242 2243 case SIOCSIFLLADDR: 2244 error = priv_check_cred(cred, PRIV_ROOT, 0); 2245 if (error) 2246 break; 2247 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, 2248 ifr->ifr_addr.sa_len); 2249 EVENTHANDLER_INVOKE(iflladdr_event, ifp); 2250 break; 2251 2252 case SIOCAIFGROUP: 2253 ifgr = (struct ifgroupreq *)ifr; 2254 if ((error = priv_check_cred(cred, PRIV_NET_ADDIFGROUP, 0))) 2255 return (error); 2256 if ((error = if_addgroup(ifp, ifgr->ifgr_group))) 2257 return (error); 2258 break; 2259 2260 case SIOCDIFGROUP: 2261 ifgr = (struct ifgroupreq *)ifr; 2262 if ((error = priv_check_cred(cred, PRIV_NET_DELIFGROUP, 0))) 2263 return (error); 2264 if ((error = if_delgroup(ifp, ifgr->ifgr_group))) 2265 return (error); 2266 break; 2267 2268 case SIOCGIFGROUP: 2269 ifgr = (struct ifgroupreq *)ifr; 2270 if ((error = if_getgroups(ifgr, ifp))) 2271 return (error); 2272 break; 2273 2274 default: 2275 oif_flags = ifp->if_flags; 2276 if (so->so_proto == 0) { 2277 error = EOPNOTSUPP; 2278 break; 2279 } 2280 error = so_pru_control_direct(so, cmd, data, ifp); 2281 2282 /* 2283 * If the socket control method returns EOPNOTSUPP, pass the 2284 * request directly to the interface. 2285 * 2286 * Exclude the SIOCSIF{ADDR,BRDADDR,DSTADDR,NETMASK} ioctls, 2287 * because drivers may trust these ioctls to come from an 2288 * already privileged layer and thus do not perform credentials 2289 * checks or input validation. 2290 */ 2291 if (error == EOPNOTSUPP && 2292 ifp->if_ioctl != NULL && 2293 cmd != SIOCSIFADDR && 2294 cmd != SIOCSIFBRDADDR && 2295 cmd != SIOCSIFDSTADDR && 2296 cmd != SIOCSIFNETMASK) { 2297 ifnet_serialize_all(ifp); 2298 error = ifp->if_ioctl(ifp, cmd, data, cred); 2299 ifnet_deserialize_all(ifp); 2300 } 2301 2302 if ((oif_flags ^ ifp->if_flags) & IFF_UP) { 2303 #ifdef INET6 2304 DELAY(100);/* XXX: temporary workaround for fxp issue*/ 2305 if (ifp->if_flags & IFF_UP) { 2306 crit_enter(); 2307 in6_if_up(ifp); 2308 crit_exit(); 2309 } 2310 #endif 2311 } 2312 break; 2313 } 2314 2315 ifnet_unlock(); 2316 return (error); 2317 } 2318 2319 /* 2320 * Set/clear promiscuous mode on interface ifp based on the truth value 2321 * of pswitch. The calls are reference counted so that only the first 2322 * "on" request actually has an effect, as does the final "off" request. 2323 * Results are undefined if the "off" and "on" requests are not matched. 2324 */ 2325 int 2326 ifpromisc(struct ifnet *ifp, int pswitch) 2327 { 2328 struct ifreq ifr; 2329 int error; 2330 int oldflags; 2331 2332 oldflags = ifp->if_flags; 2333 if (ifp->if_flags & IFF_PPROMISC) { 2334 /* Do nothing if device is in permanently promiscuous mode */ 2335 ifp->if_pcount += pswitch ? 1 : -1; 2336 return (0); 2337 } 2338 if (pswitch) { 2339 /* 2340 * If the device is not configured up, we cannot put it in 2341 * promiscuous mode. 2342 */ 2343 if ((ifp->if_flags & IFF_UP) == 0) 2344 return (ENETDOWN); 2345 if (ifp->if_pcount++ != 0) 2346 return (0); 2347 ifp->if_flags |= IFF_PROMISC; 2348 log(LOG_INFO, "%s: promiscuous mode enabled\n", 2349 ifp->if_xname); 2350 } else { 2351 if (--ifp->if_pcount > 0) 2352 return (0); 2353 ifp->if_flags &= ~IFF_PROMISC; 2354 log(LOG_INFO, "%s: promiscuous mode disabled\n", 2355 ifp->if_xname); 2356 } 2357 ifr.ifr_flags = ifp->if_flags; 2358 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2359 ifnet_serialize_all(ifp); 2360 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL); 2361 ifnet_deserialize_all(ifp); 2362 if (error == 0) 2363 rt_ifmsg(ifp); 2364 else 2365 ifp->if_flags = oldflags; 2366 return error; 2367 } 2368 2369 /* 2370 * Return interface configuration 2371 * of system. List may be used 2372 * in later ioctl's (above) to get 2373 * other information. 2374 */ 2375 static int 2376 ifconf(u_long cmd, caddr_t data, struct ucred *cred) 2377 { 2378 struct ifconf *ifc = (struct ifconf *)data; 2379 struct ifnet *ifp; 2380 struct sockaddr *sa; 2381 struct ifreq ifr, *ifrp; 2382 int space = ifc->ifc_len, error = 0; 2383 2384 ifrp = ifc->ifc_req; 2385 2386 ifnet_lock(); 2387 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 2388 struct ifaddr_container *ifac, *ifac_mark; 2389 struct ifaddr_marker mark; 2390 struct ifaddrhead *head; 2391 int addrs; 2392 2393 if (space <= sizeof ifr) 2394 break; 2395 2396 /* 2397 * Zero the stack declared structure first to prevent 2398 * memory disclosure. 2399 */ 2400 bzero(&ifr, sizeof(ifr)); 2401 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) 2402 >= sizeof(ifr.ifr_name)) { 2403 error = ENAMETOOLONG; 2404 break; 2405 } 2406 2407 /* 2408 * Add a marker, since copyout() could block and during that 2409 * period the list could be changed. Inserting the marker to 2410 * the header of the list will not cause trouble for the code 2411 * assuming that the first element of the list is AF_LINK; the 2412 * marker will be moved to the next position w/o blocking. 2413 */ 2414 ifa_marker_init(&mark, ifp); 2415 ifac_mark = &mark.ifac; 2416 head = &ifp->if_addrheads[mycpuid]; 2417 2418 addrs = 0; 2419 TAILQ_INSERT_HEAD(head, ifac_mark, ifa_link); 2420 while ((ifac = TAILQ_NEXT(ifac_mark, ifa_link)) != NULL) { 2421 struct ifaddr *ifa = ifac->ifa; 2422 2423 TAILQ_REMOVE(head, ifac_mark, ifa_link); 2424 TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link); 2425 2426 /* Ignore marker */ 2427 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 2428 continue; 2429 2430 if (space <= sizeof ifr) 2431 break; 2432 sa = ifa->ifa_addr; 2433 if (cred->cr_prison && prison_if(cred, sa)) 2434 continue; 2435 addrs++; 2436 /* 2437 * Keep a reference on this ifaddr, so that it will 2438 * not be destroyed when its address is copied to 2439 * the userland, which could block. 2440 */ 2441 IFAREF(ifa); 2442 if (sa->sa_len <= sizeof(*sa)) { 2443 ifr.ifr_addr = *sa; 2444 error = copyout(&ifr, ifrp, sizeof ifr); 2445 ifrp++; 2446 } else { 2447 if (space < (sizeof ifr) + sa->sa_len - 2448 sizeof(*sa)) { 2449 IFAFREE(ifa); 2450 break; 2451 } 2452 space -= sa->sa_len - sizeof(*sa); 2453 error = copyout(&ifr, ifrp, 2454 sizeof ifr.ifr_name); 2455 if (error == 0) 2456 error = copyout(sa, &ifrp->ifr_addr, 2457 sa->sa_len); 2458 ifrp = (struct ifreq *) 2459 (sa->sa_len + (caddr_t)&ifrp->ifr_addr); 2460 } 2461 IFAFREE(ifa); 2462 if (error) 2463 break; 2464 space -= sizeof ifr; 2465 } 2466 TAILQ_REMOVE(head, ifac_mark, ifa_link); 2467 if (error) 2468 break; 2469 if (!addrs) { 2470 bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr); 2471 error = copyout(&ifr, ifrp, sizeof ifr); 2472 if (error) 2473 break; 2474 space -= sizeof ifr; 2475 ifrp++; 2476 } 2477 } 2478 ifnet_unlock(); 2479 2480 ifc->ifc_len -= space; 2481 return (error); 2482 } 2483 2484 /* 2485 * Just like if_promisc(), but for all-multicast-reception mode. 2486 */ 2487 int 2488 if_allmulti(struct ifnet *ifp, int onswitch) 2489 { 2490 int error = 0; 2491 struct ifreq ifr; 2492 2493 crit_enter(); 2494 2495 if (onswitch) { 2496 if (ifp->if_amcount++ == 0) { 2497 ifp->if_flags |= IFF_ALLMULTI; 2498 ifr.ifr_flags = ifp->if_flags; 2499 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2500 ifnet_serialize_all(ifp); 2501 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2502 NULL); 2503 ifnet_deserialize_all(ifp); 2504 } 2505 } else { 2506 if (ifp->if_amcount > 1) { 2507 ifp->if_amcount--; 2508 } else { 2509 ifp->if_amcount = 0; 2510 ifp->if_flags &= ~IFF_ALLMULTI; 2511 ifr.ifr_flags = ifp->if_flags; 2512 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2513 ifnet_serialize_all(ifp); 2514 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2515 NULL); 2516 ifnet_deserialize_all(ifp); 2517 } 2518 } 2519 2520 crit_exit(); 2521 2522 if (error == 0) 2523 rt_ifmsg(ifp); 2524 return error; 2525 } 2526 2527 /* 2528 * Add a multicast listenership to the interface in question. 2529 * The link layer provides a routine which converts 2530 */ 2531 int 2532 if_addmulti_serialized(struct ifnet *ifp, struct sockaddr *sa, 2533 struct ifmultiaddr **retifma) 2534 { 2535 struct sockaddr *llsa, *dupsa; 2536 int error; 2537 struct ifmultiaddr *ifma; 2538 2539 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2540 2541 /* 2542 * If the matching multicast address already exists 2543 * then don't add a new one, just add a reference 2544 */ 2545 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2546 if (sa_equal(sa, ifma->ifma_addr)) { 2547 ifma->ifma_refcount++; 2548 if (retifma) 2549 *retifma = ifma; 2550 return 0; 2551 } 2552 } 2553 2554 /* 2555 * Give the link layer a chance to accept/reject it, and also 2556 * find out which AF_LINK address this maps to, if it isn't one 2557 * already. 2558 */ 2559 if (ifp->if_resolvemulti) { 2560 error = ifp->if_resolvemulti(ifp, &llsa, sa); 2561 if (error) 2562 return error; 2563 } else { 2564 llsa = NULL; 2565 } 2566 2567 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT); 2568 dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_INTWAIT); 2569 bcopy(sa, dupsa, sa->sa_len); 2570 2571 ifma->ifma_addr = dupsa; 2572 ifma->ifma_lladdr = llsa; 2573 ifma->ifma_ifp = ifp; 2574 ifma->ifma_refcount = 1; 2575 ifma->ifma_protospec = NULL; 2576 rt_newmaddrmsg(RTM_NEWMADDR, ifma); 2577 2578 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2579 if (retifma) 2580 *retifma = ifma; 2581 2582 if (llsa != NULL) { 2583 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2584 if (sa_equal(ifma->ifma_addr, llsa)) 2585 break; 2586 } 2587 if (ifma) { 2588 ifma->ifma_refcount++; 2589 } else { 2590 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_INTWAIT); 2591 dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_INTWAIT); 2592 bcopy(llsa, dupsa, llsa->sa_len); 2593 ifma->ifma_addr = dupsa; 2594 ifma->ifma_ifp = ifp; 2595 ifma->ifma_refcount = 1; 2596 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2597 } 2598 } 2599 /* 2600 * We are certain we have added something, so call down to the 2601 * interface to let them know about it. 2602 */ 2603 if (ifp->if_ioctl) 2604 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL); 2605 2606 return 0; 2607 } 2608 2609 int 2610 if_addmulti(struct ifnet *ifp, struct sockaddr *sa, 2611 struct ifmultiaddr **retifma) 2612 { 2613 int error; 2614 2615 ifnet_serialize_all(ifp); 2616 error = if_addmulti_serialized(ifp, sa, retifma); 2617 ifnet_deserialize_all(ifp); 2618 2619 return error; 2620 } 2621 2622 /* 2623 * Remove a reference to a multicast address on this interface. Yell 2624 * if the request does not match an existing membership. 2625 */ 2626 static int 2627 if_delmulti_serialized(struct ifnet *ifp, struct sockaddr *sa) 2628 { 2629 struct ifmultiaddr *ifma; 2630 2631 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2632 2633 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2634 if (sa_equal(sa, ifma->ifma_addr)) 2635 break; 2636 if (ifma == NULL) 2637 return ENOENT; 2638 2639 if (ifma->ifma_refcount > 1) { 2640 ifma->ifma_refcount--; 2641 return 0; 2642 } 2643 2644 rt_newmaddrmsg(RTM_DELMADDR, ifma); 2645 sa = ifma->ifma_lladdr; 2646 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2647 /* 2648 * Make sure the interface driver is notified 2649 * in the case of a link layer mcast group being left. 2650 */ 2651 if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) 2652 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2653 kfree(ifma->ifma_addr, M_IFMADDR); 2654 kfree(ifma, M_IFMADDR); 2655 if (sa == NULL) 2656 return 0; 2657 2658 /* 2659 * Now look for the link-layer address which corresponds to 2660 * this network address. It had been squirreled away in 2661 * ifma->ifma_lladdr for this purpose (so we don't have 2662 * to call ifp->if_resolvemulti() again), and we saved that 2663 * value in sa above. If some nasty deleted the 2664 * link-layer address out from underneath us, we can deal because 2665 * the address we stored was is not the same as the one which was 2666 * in the record for the link-layer address. (So we don't complain 2667 * in that case.) 2668 */ 2669 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2670 if (sa_equal(sa, ifma->ifma_addr)) 2671 break; 2672 if (ifma == NULL) 2673 return 0; 2674 2675 if (ifma->ifma_refcount > 1) { 2676 ifma->ifma_refcount--; 2677 return 0; 2678 } 2679 2680 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2681 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2682 kfree(ifma->ifma_addr, M_IFMADDR); 2683 kfree(sa, M_IFMADDR); 2684 kfree(ifma, M_IFMADDR); 2685 2686 return 0; 2687 } 2688 2689 int 2690 if_delmulti(struct ifnet *ifp, struct sockaddr *sa) 2691 { 2692 int error; 2693 2694 ifnet_serialize_all(ifp); 2695 error = if_delmulti_serialized(ifp, sa); 2696 ifnet_deserialize_all(ifp); 2697 2698 return error; 2699 } 2700 2701 /* 2702 * Delete all multicast group membership for an interface. 2703 * Should be used to quickly flush all multicast filters. 2704 */ 2705 void 2706 if_delallmulti_serialized(struct ifnet *ifp) 2707 { 2708 struct ifmultiaddr *ifma, mark; 2709 struct sockaddr sa; 2710 2711 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2712 2713 bzero(&sa, sizeof(sa)); 2714 sa.sa_family = AF_UNSPEC; 2715 sa.sa_len = sizeof(sa); 2716 2717 bzero(&mark, sizeof(mark)); 2718 mark.ifma_addr = &sa; 2719 2720 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, &mark, ifma_link); 2721 while ((ifma = TAILQ_NEXT(&mark, ifma_link)) != NULL) { 2722 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); 2723 TAILQ_INSERT_AFTER(&ifp->if_multiaddrs, ifma, &mark, 2724 ifma_link); 2725 2726 if (ifma->ifma_addr->sa_family == AF_UNSPEC) 2727 continue; 2728 2729 if_delmulti_serialized(ifp, ifma->ifma_addr); 2730 } 2731 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); 2732 } 2733 2734 2735 /* 2736 * Set the link layer address on an interface. 2737 * 2738 * At this time we only support certain types of interfaces, 2739 * and we don't allow the length of the address to change. 2740 */ 2741 int 2742 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) 2743 { 2744 struct sockaddr_dl *sdl; 2745 struct ifreq ifr; 2746 2747 sdl = IF_LLSOCKADDR(ifp); 2748 if (sdl == NULL) 2749 return (EINVAL); 2750 if (len != sdl->sdl_alen) /* don't allow length to change */ 2751 return (EINVAL); 2752 switch (ifp->if_type) { 2753 case IFT_ETHER: /* these types use struct arpcom */ 2754 case IFT_XETHER: 2755 case IFT_L2VLAN: 2756 case IFT_IEEE8023ADLAG: 2757 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len); 2758 bcopy(lladdr, LLADDR(sdl), len); 2759 break; 2760 default: 2761 return (ENODEV); 2762 } 2763 /* 2764 * If the interface is already up, we need 2765 * to re-init it in order to reprogram its 2766 * address filter. 2767 */ 2768 ifnet_serialize_all(ifp); 2769 if ((ifp->if_flags & IFF_UP) != 0) { 2770 #ifdef INET 2771 struct ifaddr_container *ifac; 2772 #endif 2773 2774 ifp->if_flags &= ~IFF_UP; 2775 ifr.ifr_flags = ifp->if_flags; 2776 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2777 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2778 NULL); 2779 ifp->if_flags |= IFF_UP; 2780 ifr.ifr_flags = ifp->if_flags; 2781 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2782 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2783 NULL); 2784 #ifdef INET 2785 /* 2786 * Also send gratuitous ARPs to notify other nodes about 2787 * the address change. 2788 */ 2789 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 2790 struct ifaddr *ifa = ifac->ifa; 2791 2792 if (ifa->ifa_addr != NULL && 2793 ifa->ifa_addr->sa_family == AF_INET) 2794 arp_gratuitous(ifp, ifa); 2795 } 2796 #endif 2797 } 2798 ifnet_deserialize_all(ifp); 2799 return (0); 2800 } 2801 2802 2803 /* 2804 * Locate an interface based on a complete address. 2805 */ 2806 struct ifnet * 2807 if_bylla(const void *lla, unsigned char lla_len) 2808 { 2809 const struct ifnet_array *arr; 2810 struct ifnet *ifp; 2811 struct sockaddr_dl *sdl; 2812 int i; 2813 2814 arr = ifnet_array_get(); 2815 for (i = 0; i < arr->ifnet_count; ++i) { 2816 ifp = arr->ifnet_arr[i]; 2817 if (ifp->if_addrlen != lla_len) 2818 continue; 2819 2820 sdl = IF_LLSOCKADDR(ifp); 2821 if (memcmp(lla, LLADDR(sdl), lla_len) == 0) 2822 return (ifp); 2823 } 2824 return (NULL); 2825 } 2826 2827 struct ifmultiaddr * 2828 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp) 2829 { 2830 struct ifmultiaddr *ifma; 2831 2832 /* TODO: need ifnet_serialize_main */ 2833 ifnet_serialize_all(ifp); 2834 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2835 if (sa_equal(ifma->ifma_addr, sa)) 2836 break; 2837 ifnet_deserialize_all(ifp); 2838 2839 return ifma; 2840 } 2841 2842 /* 2843 * This function locates the first real ethernet MAC from a network 2844 * card and loads it into node, returning 0 on success or ENOENT if 2845 * no suitable interfaces were found. It is used by the uuid code to 2846 * generate a unique 6-byte number. 2847 */ 2848 int 2849 if_getanyethermac(uint16_t *node, int minlen) 2850 { 2851 struct ifnet *ifp; 2852 struct sockaddr_dl *sdl; 2853 2854 ifnet_lock(); 2855 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 2856 if (ifp->if_type != IFT_ETHER) 2857 continue; 2858 sdl = IF_LLSOCKADDR(ifp); 2859 if (sdl->sdl_alen < minlen) 2860 continue; 2861 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node, 2862 minlen); 2863 ifnet_unlock(); 2864 return(0); 2865 } 2866 ifnet_unlock(); 2867 return (ENOENT); 2868 } 2869 2870 /* 2871 * The name argument must be a pointer to storage which will last as 2872 * long as the interface does. For physical devices, the result of 2873 * device_get_name(dev) is a good choice and for pseudo-devices a 2874 * static string works well. 2875 */ 2876 void 2877 if_initname(struct ifnet *ifp, const char *name, int unit) 2878 { 2879 ifp->if_dname = name; 2880 ifp->if_dunit = unit; 2881 if (unit != IF_DUNIT_NONE) 2882 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); 2883 else 2884 strlcpy(ifp->if_xname, name, IFNAMSIZ); 2885 } 2886 2887 int 2888 if_printf(struct ifnet *ifp, const char *fmt, ...) 2889 { 2890 __va_list ap; 2891 int retval; 2892 2893 retval = kprintf("%s: ", ifp->if_xname); 2894 __va_start(ap, fmt); 2895 retval += kvprintf(fmt, ap); 2896 __va_end(ap); 2897 return (retval); 2898 } 2899 2900 struct ifnet * 2901 if_alloc(uint8_t type) 2902 { 2903 struct ifnet *ifp; 2904 size_t size; 2905 2906 /* 2907 * XXX temporary hack until arpcom is setup in if_l2com 2908 */ 2909 if (type == IFT_ETHER) 2910 size = sizeof(struct arpcom); 2911 else 2912 size = sizeof(struct ifnet); 2913 2914 ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO); 2915 2916 ifp->if_type = type; 2917 2918 if (if_com_alloc[type] != NULL) { 2919 ifp->if_l2com = if_com_alloc[type](type, ifp); 2920 if (ifp->if_l2com == NULL) { 2921 kfree(ifp, M_IFNET); 2922 return (NULL); 2923 } 2924 } 2925 return (ifp); 2926 } 2927 2928 void 2929 if_free(struct ifnet *ifp) 2930 { 2931 kfree(ifp, M_IFNET); 2932 } 2933 2934 void 2935 ifq_set_classic(struct ifaltq *ifq) 2936 { 2937 ifq_set_methods(ifq, ifq->altq_ifp->if_mapsubq, 2938 ifsq_classic_enqueue, ifsq_classic_dequeue, ifsq_classic_request); 2939 } 2940 2941 void 2942 ifq_set_methods(struct ifaltq *ifq, altq_mapsubq_t mapsubq, 2943 ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request) 2944 { 2945 int q; 2946 2947 KASSERT(mapsubq != NULL, ("mapsubq is not specified")); 2948 KASSERT(enqueue != NULL, ("enqueue is not specified")); 2949 KASSERT(dequeue != NULL, ("dequeue is not specified")); 2950 KASSERT(request != NULL, ("request is not specified")); 2951 2952 ifq->altq_mapsubq = mapsubq; 2953 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 2954 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 2955 2956 ifsq->ifsq_enqueue = enqueue; 2957 ifsq->ifsq_dequeue = dequeue; 2958 ifsq->ifsq_request = request; 2959 } 2960 } 2961 2962 static void 2963 ifsq_norm_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) 2964 { 2965 2966 classq_add(&ifsq->ifsq_norm, m); 2967 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); 2968 } 2969 2970 static void 2971 ifsq_prio_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) 2972 { 2973 2974 classq_add(&ifsq->ifsq_prio, m); 2975 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); 2976 ALTQ_SQ_PRIO_CNTR_INC(ifsq, m->m_pkthdr.len); 2977 } 2978 2979 static struct mbuf * 2980 ifsq_norm_dequeue(struct ifaltq_subque *ifsq) 2981 { 2982 struct mbuf *m; 2983 2984 m = classq_get(&ifsq->ifsq_norm); 2985 if (m != NULL) 2986 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); 2987 return (m); 2988 } 2989 2990 static struct mbuf * 2991 ifsq_prio_dequeue(struct ifaltq_subque *ifsq) 2992 { 2993 struct mbuf *m; 2994 2995 m = classq_get(&ifsq->ifsq_prio); 2996 if (m != NULL) { 2997 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); 2998 ALTQ_SQ_PRIO_CNTR_DEC(ifsq, m->m_pkthdr.len); 2999 } 3000 return (m); 3001 } 3002 3003 int 3004 ifsq_classic_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m, 3005 struct altq_pktattr *pa __unused) 3006 { 3007 3008 M_ASSERTPKTHDR(m); 3009 again: 3010 if (ifsq->ifsq_len >= ifsq->ifsq_maxlen || 3011 ifsq->ifsq_bcnt >= ifsq->ifsq_maxbcnt) { 3012 struct mbuf *m_drop; 3013 3014 if (m->m_flags & M_PRIO) { 3015 m_drop = NULL; 3016 if (ifsq->ifsq_prio_len < (ifsq->ifsq_maxlen >> 1) && 3017 ifsq->ifsq_prio_bcnt < (ifsq->ifsq_maxbcnt >> 1)) { 3018 /* Try dropping some from normal queue. */ 3019 m_drop = ifsq_norm_dequeue(ifsq); 3020 } 3021 if (m_drop == NULL) 3022 m_drop = ifsq_prio_dequeue(ifsq); 3023 } else { 3024 m_drop = ifsq_norm_dequeue(ifsq); 3025 } 3026 if (m_drop != NULL) { 3027 IFNET_STAT_INC(ifsq->ifsq_ifp, oqdrops, 1); 3028 m_freem(m_drop); 3029 goto again; 3030 } 3031 /* 3032 * No old packets could be dropped! 3033 * NOTE: Caller increases oqdrops. 3034 */ 3035 m_freem(m); 3036 return (ENOBUFS); 3037 } else { 3038 if (m->m_flags & M_PRIO) 3039 ifsq_prio_enqueue(ifsq, m); 3040 else 3041 ifsq_norm_enqueue(ifsq, m); 3042 return (0); 3043 } 3044 } 3045 3046 struct mbuf * 3047 ifsq_classic_dequeue(struct ifaltq_subque *ifsq, int op) 3048 { 3049 struct mbuf *m; 3050 3051 switch (op) { 3052 case ALTDQ_POLL: 3053 m = classq_head(&ifsq->ifsq_prio); 3054 if (m == NULL) 3055 m = classq_head(&ifsq->ifsq_norm); 3056 break; 3057 3058 case ALTDQ_REMOVE: 3059 m = ifsq_prio_dequeue(ifsq); 3060 if (m == NULL) 3061 m = ifsq_norm_dequeue(ifsq); 3062 break; 3063 3064 default: 3065 panic("unsupported ALTQ dequeue op: %d", op); 3066 } 3067 return m; 3068 } 3069 3070 int 3071 ifsq_classic_request(struct ifaltq_subque *ifsq, int req, void *arg) 3072 { 3073 switch (req) { 3074 case ALTRQ_PURGE: 3075 for (;;) { 3076 struct mbuf *m; 3077 3078 m = ifsq_classic_dequeue(ifsq, ALTDQ_REMOVE); 3079 if (m == NULL) 3080 break; 3081 m_freem(m); 3082 } 3083 break; 3084 3085 default: 3086 panic("unsupported ALTQ request: %d", req); 3087 } 3088 return 0; 3089 } 3090 3091 static void 3092 ifsq_ifstart_try(struct ifaltq_subque *ifsq, int force_sched) 3093 { 3094 struct ifnet *ifp = ifsq_get_ifp(ifsq); 3095 int running = 0, need_sched; 3096 3097 /* 3098 * Try to do direct ifnet.if_start on the subqueue first, if there is 3099 * contention on the subqueue hardware serializer, ifnet.if_start on 3100 * the subqueue will be scheduled on the subqueue owner CPU. 3101 */ 3102 if (!ifsq_tryserialize_hw(ifsq)) { 3103 /* 3104 * Subqueue hardware serializer contention happened, 3105 * ifnet.if_start on the subqueue is scheduled on 3106 * the subqueue owner CPU, and we keep going. 3107 */ 3108 ifsq_ifstart_schedule(ifsq, 1); 3109 return; 3110 } 3111 3112 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 3113 ifp->if_start(ifp, ifsq); 3114 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 3115 running = 1; 3116 } 3117 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 3118 3119 ifsq_deserialize_hw(ifsq); 3120 3121 if (need_sched) { 3122 /* 3123 * More data need to be transmitted, ifnet.if_start on the 3124 * subqueue is scheduled on the subqueue owner CPU, and we 3125 * keep going. 3126 * NOTE: ifnet.if_start subqueue interlock is not released. 3127 */ 3128 ifsq_ifstart_schedule(ifsq, force_sched); 3129 } 3130 } 3131 3132 /* 3133 * Subqeue packets staging mechanism: 3134 * 3135 * The packets enqueued into the subqueue are staged to a certain amount 3136 * before the ifnet.if_start on the subqueue is called. In this way, the 3137 * driver could avoid writing to hardware registers upon every packet, 3138 * instead, hardware registers could be written when certain amount of 3139 * packets are put onto hardware TX ring. The measurement on several modern 3140 * NICs (emx(4), igb(4), bnx(4), bge(4), jme(4)) shows that the hardware 3141 * registers writing aggregation could save ~20% CPU time when 18bytes UDP 3142 * datagrams are transmitted at 1.48Mpps. The performance improvement by 3143 * hardware registers writing aggeregation is also mentioned by Luigi Rizzo's 3144 * netmap paper (http://info.iet.unipi.it/~luigi/netmap/). 3145 * 3146 * Subqueue packets staging is performed for two entry points into drivers' 3147 * transmission function: 3148 * - Direct ifnet.if_start calling on the subqueue, i.e. ifsq_ifstart_try() 3149 * - ifnet.if_start scheduling on the subqueue, i.e. ifsq_ifstart_schedule() 3150 * 3151 * Subqueue packets staging will be stopped upon any of the following 3152 * conditions: 3153 * - If the count of packets enqueued on the current CPU is great than or 3154 * equal to ifsq_stage_cntmax. (XXX this should be per-interface) 3155 * - If the total length of packets enqueued on the current CPU is great 3156 * than or equal to the hardware's MTU - max_protohdr. max_protohdr is 3157 * cut from the hardware's MTU mainly bacause a full TCP segment's size 3158 * is usually less than hardware's MTU. 3159 * - ifsq_ifstart_schedule() is not pending on the current CPU and 3160 * ifnet.if_start subqueue interlock (ifaltq_subq.ifsq_started) is not 3161 * released. 3162 * - The if_start_rollup(), which is registered as low priority netisr 3163 * rollup function, is called; probably because no more work is pending 3164 * for netisr. 3165 * 3166 * NOTE: 3167 * Currently subqueue packet staging is only performed in netisr threads. 3168 */ 3169 int 3170 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa) 3171 { 3172 struct ifaltq *ifq = &ifp->if_snd; 3173 struct ifaltq_subque *ifsq; 3174 int error, start = 0, len, mcast = 0, avoid_start = 0; 3175 struct ifsubq_stage_head *head = NULL; 3176 struct ifsubq_stage *stage = NULL; 3177 struct globaldata *gd = mycpu; 3178 struct thread *td = gd->gd_curthread; 3179 3180 crit_enter_quick(td); 3181 3182 ifsq = ifq_map_subq(ifq, gd->gd_cpuid); 3183 ASSERT_ALTQ_SQ_NOT_SERIALIZED_HW(ifsq); 3184 3185 len = m->m_pkthdr.len; 3186 if (m->m_flags & M_MCAST) 3187 mcast = 1; 3188 3189 if (td->td_type == TD_TYPE_NETISR) { 3190 head = &ifsubq_stage_heads[mycpuid]; 3191 stage = ifsq_get_stage(ifsq, mycpuid); 3192 3193 stage->stg_cnt++; 3194 stage->stg_len += len; 3195 if (stage->stg_cnt < ifsq_stage_cntmax && 3196 stage->stg_len < (ifp->if_mtu - max_protohdr)) 3197 avoid_start = 1; 3198 } 3199 3200 ALTQ_SQ_LOCK(ifsq); 3201 error = ifsq_enqueue_locked(ifsq, m, pa); 3202 if (error) { 3203 IFNET_STAT_INC(ifp, oqdrops, 1); 3204 if (!ifsq_data_ready(ifsq)) { 3205 ALTQ_SQ_UNLOCK(ifsq); 3206 crit_exit_quick(td); 3207 return error; 3208 } 3209 avoid_start = 0; 3210 } 3211 if (!ifsq_is_started(ifsq)) { 3212 if (avoid_start) { 3213 ALTQ_SQ_UNLOCK(ifsq); 3214 3215 KKASSERT(!error); 3216 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 3217 ifsq_stage_insert(head, stage); 3218 3219 IFNET_STAT_INC(ifp, obytes, len); 3220 if (mcast) 3221 IFNET_STAT_INC(ifp, omcasts, 1); 3222 crit_exit_quick(td); 3223 return error; 3224 } 3225 3226 /* 3227 * Hold the subqueue interlock of ifnet.if_start 3228 */ 3229 ifsq_set_started(ifsq); 3230 start = 1; 3231 } 3232 ALTQ_SQ_UNLOCK(ifsq); 3233 3234 if (!error) { 3235 IFNET_STAT_INC(ifp, obytes, len); 3236 if (mcast) 3237 IFNET_STAT_INC(ifp, omcasts, 1); 3238 } 3239 3240 if (stage != NULL) { 3241 if (!start && (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)) { 3242 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 3243 if (!avoid_start) { 3244 ifsq_stage_remove(head, stage); 3245 ifsq_ifstart_schedule(ifsq, 1); 3246 } 3247 crit_exit_quick(td); 3248 return error; 3249 } 3250 3251 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) { 3252 ifsq_stage_remove(head, stage); 3253 } else { 3254 stage->stg_cnt = 0; 3255 stage->stg_len = 0; 3256 } 3257 } 3258 3259 if (!start) { 3260 crit_exit_quick(td); 3261 return error; 3262 } 3263 3264 ifsq_ifstart_try(ifsq, 0); 3265 3266 crit_exit_quick(td); 3267 return error; 3268 } 3269 3270 void * 3271 ifa_create(int size) 3272 { 3273 struct ifaddr *ifa; 3274 int i; 3275 3276 KASSERT(size >= sizeof(*ifa), ("ifaddr size too small")); 3277 3278 ifa = kmalloc(size, M_IFADDR, M_INTWAIT | M_ZERO); 3279 3280 /* 3281 * Make ifa_container availabel on all CPUs, since they 3282 * could be accessed by any threads. 3283 */ 3284 ifa->ifa_containers = 3285 kmalloc(ncpus * sizeof(struct ifaddr_container), 3286 M_IFADDR, 3287 M_INTWAIT | M_ZERO | M_CACHEALIGN); 3288 3289 ifa->ifa_ncnt = ncpus; 3290 for (i = 0; i < ncpus; ++i) { 3291 struct ifaddr_container *ifac = &ifa->ifa_containers[i]; 3292 3293 ifac->ifa_magic = IFA_CONTAINER_MAGIC; 3294 ifac->ifa = ifa; 3295 ifac->ifa_refcnt = 1; 3296 } 3297 #ifdef IFADDR_DEBUG 3298 kprintf("alloc ifa %p %d\n", ifa, size); 3299 #endif 3300 return ifa; 3301 } 3302 3303 void 3304 ifac_free(struct ifaddr_container *ifac, int cpu_id) 3305 { 3306 struct ifaddr *ifa = ifac->ifa; 3307 3308 KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC); 3309 KKASSERT(ifac->ifa_refcnt == 0); 3310 KASSERT(ifac->ifa_listmask == 0, 3311 ("ifa is still on %#x lists", ifac->ifa_listmask)); 3312 3313 ifac->ifa_magic = IFA_CONTAINER_DEAD; 3314 3315 #ifdef IFADDR_DEBUG_VERBOSE 3316 kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id); 3317 #endif 3318 3319 KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus, 3320 ("invalid # of ifac, %d", ifa->ifa_ncnt)); 3321 if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) { 3322 #ifdef IFADDR_DEBUG 3323 kprintf("free ifa %p\n", ifa); 3324 #endif 3325 kfree(ifa->ifa_containers, M_IFADDR); 3326 kfree(ifa, M_IFADDR); 3327 } 3328 } 3329 3330 static void 3331 ifa_iflink_dispatch(netmsg_t nmsg) 3332 { 3333 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3334 struct ifaddr *ifa = msg->ifa; 3335 struct ifnet *ifp = msg->ifp; 3336 int cpu = mycpuid; 3337 struct ifaddr_container *ifac; 3338 3339 crit_enter(); 3340 3341 ifac = &ifa->ifa_containers[cpu]; 3342 ASSERT_IFAC_VALID(ifac); 3343 KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0, 3344 ("ifaddr is on if_addrheads")); 3345 3346 ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD; 3347 if (msg->tail) 3348 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link); 3349 else 3350 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link); 3351 3352 crit_exit(); 3353 3354 netisr_forwardmsg_all(&nmsg->base, cpu + 1); 3355 } 3356 3357 void 3358 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail) 3359 { 3360 struct netmsg_ifaddr msg; 3361 3362 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3363 0, ifa_iflink_dispatch); 3364 msg.ifa = ifa; 3365 msg.ifp = ifp; 3366 msg.tail = tail; 3367 3368 netisr_domsg(&msg.base, 0); 3369 } 3370 3371 static void 3372 ifa_ifunlink_dispatch(netmsg_t nmsg) 3373 { 3374 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3375 struct ifaddr *ifa = msg->ifa; 3376 struct ifnet *ifp = msg->ifp; 3377 int cpu = mycpuid; 3378 struct ifaddr_container *ifac; 3379 3380 crit_enter(); 3381 3382 ifac = &ifa->ifa_containers[cpu]; 3383 ASSERT_IFAC_VALID(ifac); 3384 KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD, 3385 ("ifaddr is not on if_addrhead")); 3386 3387 TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link); 3388 ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD; 3389 3390 crit_exit(); 3391 3392 netisr_forwardmsg_all(&nmsg->base, cpu + 1); 3393 } 3394 3395 void 3396 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp) 3397 { 3398 struct netmsg_ifaddr msg; 3399 3400 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3401 0, ifa_ifunlink_dispatch); 3402 msg.ifa = ifa; 3403 msg.ifp = ifp; 3404 3405 netisr_domsg(&msg.base, 0); 3406 } 3407 3408 static void 3409 ifa_destroy_dispatch(netmsg_t nmsg) 3410 { 3411 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3412 3413 IFAFREE(msg->ifa); 3414 netisr_forwardmsg_all(&nmsg->base, mycpuid + 1); 3415 } 3416 3417 void 3418 ifa_destroy(struct ifaddr *ifa) 3419 { 3420 struct netmsg_ifaddr msg; 3421 3422 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3423 0, ifa_destroy_dispatch); 3424 msg.ifa = ifa; 3425 3426 netisr_domsg(&msg.base, 0); 3427 } 3428 3429 static void 3430 if_start_rollup(void) 3431 { 3432 struct ifsubq_stage_head *head = &ifsubq_stage_heads[mycpuid]; 3433 struct ifsubq_stage *stage; 3434 3435 crit_enter(); 3436 3437 while ((stage = TAILQ_FIRST(&head->stg_head)) != NULL) { 3438 struct ifaltq_subque *ifsq = stage->stg_subq; 3439 int is_sched = 0; 3440 3441 if (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED) 3442 is_sched = 1; 3443 ifsq_stage_remove(head, stage); 3444 3445 if (is_sched) { 3446 ifsq_ifstart_schedule(ifsq, 1); 3447 } else { 3448 int start = 0; 3449 3450 ALTQ_SQ_LOCK(ifsq); 3451 if (!ifsq_is_started(ifsq)) { 3452 /* 3453 * Hold the subqueue interlock of 3454 * ifnet.if_start 3455 */ 3456 ifsq_set_started(ifsq); 3457 start = 1; 3458 } 3459 ALTQ_SQ_UNLOCK(ifsq); 3460 3461 if (start) 3462 ifsq_ifstart_try(ifsq, 1); 3463 } 3464 KKASSERT((stage->stg_flags & 3465 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 3466 } 3467 3468 crit_exit(); 3469 } 3470 3471 static void 3472 ifnetinit(void *dummy __unused) 3473 { 3474 int i; 3475 3476 /* XXX netisr_ncpus */ 3477 for (i = 0; i < ncpus; ++i) 3478 TAILQ_INIT(&ifsubq_stage_heads[i].stg_head); 3479 netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART); 3480 } 3481 3482 void 3483 if_register_com_alloc(u_char type, 3484 if_com_alloc_t *a, if_com_free_t *f) 3485 { 3486 3487 KASSERT(if_com_alloc[type] == NULL, 3488 ("if_register_com_alloc: %d already registered", type)); 3489 KASSERT(if_com_free[type] == NULL, 3490 ("if_register_com_alloc: %d free already registered", type)); 3491 3492 if_com_alloc[type] = a; 3493 if_com_free[type] = f; 3494 } 3495 3496 void 3497 if_deregister_com_alloc(u_char type) 3498 { 3499 3500 KASSERT(if_com_alloc[type] != NULL, 3501 ("if_deregister_com_alloc: %d not registered", type)); 3502 KASSERT(if_com_free[type] != NULL, 3503 ("if_deregister_com_alloc: %d free not registered", type)); 3504 if_com_alloc[type] = NULL; 3505 if_com_free[type] = NULL; 3506 } 3507 3508 void 3509 ifq_set_maxlen(struct ifaltq *ifq, int len) 3510 { 3511 ifq->altq_maxlen = len + (ncpus * ifsq_stage_cntmax); 3512 } 3513 3514 int 3515 ifq_mapsubq_default(struct ifaltq *ifq __unused, int cpuid __unused) 3516 { 3517 return ALTQ_SUBQ_INDEX_DEFAULT; 3518 } 3519 3520 int 3521 ifq_mapsubq_modulo(struct ifaltq *ifq, int cpuid) 3522 { 3523 3524 return (cpuid % ifq->altq_subq_mappriv); 3525 } 3526 3527 /* 3528 * Watchdog timeout. Process callback as appropriate. If we cannot 3529 * serialize the ifnet just try again on the next timeout. 3530 * 3531 * NOTE: The ifnet can adjust wd_timer while holding the serializer. We 3532 * can only safely adjust it under the same circumstances. 3533 */ 3534 static void 3535 ifsq_watchdog(void *arg) 3536 { 3537 struct ifsubq_watchdog *wd = arg; 3538 struct ifnet *ifp; 3539 int count; 3540 3541 /* 3542 * Fast track. Try to avoid acquiring the serializer when not 3543 * near the terminal count, unless asked to. If the atomic op 3544 * to decrement the count fails just retry on the next callout. 3545 */ 3546 count = wd->wd_timer; 3547 cpu_ccfence(); 3548 if (count == 0) 3549 goto done; 3550 if (count > 2 && (wd->wd_flags & IF_WDOG_ALLTICKS) == 0) { 3551 (void)atomic_cmpset_int(&wd->wd_timer, count, count - 1); 3552 goto done; 3553 } 3554 3555 /* 3556 * Obtain the serializer and then re-test all wd_timer conditions 3557 * as it may have changed. NICs do not mess with wd_timer without 3558 * holding the serializer. 3559 * 3560 * If we are unable to obtain the serializer just retry the same 3561 * count on the next callout. 3562 * 3563 * - call watchdog in terminal count (0) 3564 * - call watchdog on last tick (1) if requested 3565 * - call watchdog on all ticks if requested 3566 */ 3567 ifp = ifsq_get_ifp(wd->wd_subq); 3568 if (ifnet_tryserialize_all(ifp) == 0) 3569 goto done; 3570 if (atomic_cmpset_int(&wd->wd_timer, count, count - 1)) { 3571 --count; 3572 if (count == 0 || 3573 (wd->wd_flags & IF_WDOG_ALLTICKS) || 3574 ((wd->wd_flags & IF_WDOG_LASTTICK) && count == 1)) { 3575 wd->wd_watchdog(wd->wd_subq); 3576 } 3577 } 3578 ifnet_deserialize_all(ifp); 3579 done: 3580 ifsq_watchdog_reset(wd); 3581 } 3582 3583 static void 3584 ifsq_watchdog_reset(struct ifsubq_watchdog *wd) 3585 { 3586 callout_reset_bycpu(&wd->wd_callout, hz, ifsq_watchdog, wd, 3587 ifsq_get_cpuid(wd->wd_subq)); 3588 } 3589 3590 void 3591 ifsq_watchdog_init(struct ifsubq_watchdog *wd, struct ifaltq_subque *ifsq, 3592 ifsq_watchdog_t watchdog, int flags) 3593 { 3594 callout_init_mp(&wd->wd_callout); 3595 wd->wd_timer = 0; 3596 wd->wd_flags = flags; 3597 wd->wd_subq = ifsq; 3598 wd->wd_watchdog = watchdog; 3599 } 3600 3601 void 3602 ifsq_watchdog_start(struct ifsubq_watchdog *wd) 3603 { 3604 atomic_swap_int(&wd->wd_timer, 0); 3605 ifsq_watchdog_reset(wd); 3606 } 3607 3608 void 3609 ifsq_watchdog_stop(struct ifsubq_watchdog *wd) 3610 { 3611 atomic_swap_int(&wd->wd_timer, 0); 3612 callout_stop(&wd->wd_callout); 3613 } 3614 3615 void 3616 ifsq_watchdog_set_count(struct ifsubq_watchdog *wd, int count) 3617 { 3618 atomic_swap_int(&wd->wd_timer, count); 3619 } 3620 3621 void 3622 ifnet_lock(void) 3623 { 3624 KASSERT(curthread->td_type != TD_TYPE_NETISR, 3625 ("try holding ifnet lock in netisr")); 3626 mtx_lock(&ifnet_mtx); 3627 } 3628 3629 void 3630 ifnet_unlock(void) 3631 { 3632 KASSERT(curthread->td_type != TD_TYPE_NETISR, 3633 ("try holding ifnet lock in netisr")); 3634 mtx_unlock(&ifnet_mtx); 3635 } 3636 3637 static struct ifnet_array * 3638 ifnet_array_alloc(int count) 3639 { 3640 struct ifnet_array *arr; 3641 3642 arr = kmalloc(__offsetof(struct ifnet_array, ifnet_arr[count]), 3643 M_IFNET, M_WAITOK); 3644 arr->ifnet_count = count; 3645 3646 return arr; 3647 } 3648 3649 static void 3650 ifnet_array_free(struct ifnet_array *arr) 3651 { 3652 if (arr == &ifnet_array0) 3653 return; 3654 kfree(arr, M_IFNET); 3655 } 3656 3657 static struct ifnet_array * 3658 ifnet_array_add(struct ifnet *ifp, const struct ifnet_array *old_arr) 3659 { 3660 struct ifnet_array *arr; 3661 int count, i; 3662 3663 KASSERT(old_arr->ifnet_count >= 0, 3664 ("invalid ifnet array count %d", old_arr->ifnet_count)); 3665 count = old_arr->ifnet_count + 1; 3666 arr = ifnet_array_alloc(count); 3667 3668 /* 3669 * Save the old ifnet array and append this ifp to the end of 3670 * the new ifnet array. 3671 */ 3672 for (i = 0; i < old_arr->ifnet_count; ++i) { 3673 KASSERT(old_arr->ifnet_arr[i] != ifp, 3674 ("%s is already in ifnet array", ifp->if_xname)); 3675 arr->ifnet_arr[i] = old_arr->ifnet_arr[i]; 3676 } 3677 KASSERT(i == count - 1, 3678 ("add %s, ifnet array index mismatch, should be %d, but got %d", 3679 ifp->if_xname, count - 1, i)); 3680 arr->ifnet_arr[i] = ifp; 3681 3682 return arr; 3683 } 3684 3685 static struct ifnet_array * 3686 ifnet_array_del(struct ifnet *ifp, const struct ifnet_array *old_arr) 3687 { 3688 struct ifnet_array *arr; 3689 int count, i, idx, found = 0; 3690 3691 KASSERT(old_arr->ifnet_count > 0, 3692 ("invalid ifnet array count %d", old_arr->ifnet_count)); 3693 count = old_arr->ifnet_count - 1; 3694 arr = ifnet_array_alloc(count); 3695 3696 /* 3697 * Save the old ifnet array, but skip this ifp. 3698 */ 3699 idx = 0; 3700 for (i = 0; i < old_arr->ifnet_count; ++i) { 3701 if (old_arr->ifnet_arr[i] == ifp) { 3702 KASSERT(!found, 3703 ("dup %s is in ifnet array", ifp->if_xname)); 3704 found = 1; 3705 continue; 3706 } 3707 KASSERT(idx < count, 3708 ("invalid ifnet array index %d, count %d", idx, count)); 3709 arr->ifnet_arr[idx] = old_arr->ifnet_arr[i]; 3710 ++idx; 3711 } 3712 KASSERT(found, ("%s is not in ifnet array", ifp->if_xname)); 3713 KASSERT(idx == count, 3714 ("del %s, ifnet array count mismatch, should be %d, but got %d ", 3715 ifp->if_xname, count, idx)); 3716 3717 return arr; 3718 } 3719 3720 const struct ifnet_array * 3721 ifnet_array_get(void) 3722 { 3723 const struct ifnet_array *ret; 3724 3725 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 3726 ret = ifnet_array; 3727 /* Make sure 'ret' is really used. */ 3728 cpu_ccfence(); 3729 return (ret); 3730 } 3731 3732 int 3733 ifnet_array_isempty(void) 3734 { 3735 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 3736 if (ifnet_array->ifnet_count == 0) 3737 return 1; 3738 else 3739 return 0; 3740 } 3741 3742 void 3743 ifa_marker_init(struct ifaddr_marker *mark, struct ifnet *ifp) 3744 { 3745 struct ifaddr *ifa; 3746 3747 memset(mark, 0, sizeof(*mark)); 3748 ifa = &mark->ifa; 3749 3750 mark->ifac.ifa = ifa; 3751 3752 ifa->ifa_addr = &mark->addr; 3753 ifa->ifa_dstaddr = &mark->dstaddr; 3754 ifa->ifa_netmask = &mark->netmask; 3755 ifa->ifa_ifp = ifp; 3756 } 3757 3758 static int 3759 if_ringcnt_fixup(int ring_cnt, int ring_cntmax) 3760 { 3761 3762 KASSERT(ring_cntmax > 0, ("invalid ring count max %d", ring_cntmax)); 3763 3764 if (ring_cnt <= 0 || ring_cnt > ring_cntmax) 3765 ring_cnt = ring_cntmax; 3766 if (ring_cnt > netisr_ncpus) 3767 ring_cnt = netisr_ncpus; 3768 return (ring_cnt); 3769 } 3770 3771 static void 3772 if_ringmap_set_grid(device_t dev, struct if_ringmap *rm, int grid) 3773 { 3774 int i, offset; 3775 3776 KASSERT(grid > 0, ("invalid if_ringmap grid %d", grid)); 3777 KASSERT(grid >= rm->rm_cnt, ("invalid if_ringmap grid %d, count %d", 3778 grid, rm->rm_cnt)); 3779 rm->rm_grid = grid; 3780 3781 offset = (rm->rm_grid * device_get_unit(dev)) % netisr_ncpus; 3782 for (i = 0; i < rm->rm_cnt; ++i) { 3783 rm->rm_cpumap[i] = offset + i; 3784 KASSERT(rm->rm_cpumap[i] < netisr_ncpus, 3785 ("invalid cpumap[%d] = %d, offset %d", i, 3786 rm->rm_cpumap[i], offset)); 3787 } 3788 } 3789 3790 static struct if_ringmap * 3791 if_ringmap_alloc_flags(device_t dev, int ring_cnt, int ring_cntmax, 3792 uint32_t flags) 3793 { 3794 struct if_ringmap *rm; 3795 int i, grid = 0, prev_grid; 3796 3797 ring_cnt = if_ringcnt_fixup(ring_cnt, ring_cntmax); 3798 rm = kmalloc(__offsetof(struct if_ringmap, rm_cpumap[ring_cnt]), 3799 M_DEVBUF, M_WAITOK | M_ZERO); 3800 3801 rm->rm_cnt = ring_cnt; 3802 if (flags & RINGMAP_FLAG_POWEROF2) 3803 rm->rm_cnt = 1 << (fls(rm->rm_cnt) - 1); 3804 3805 prev_grid = netisr_ncpus; 3806 for (i = 0; i < netisr_ncpus; ++i) { 3807 if (netisr_ncpus % (i + 1) != 0) 3808 continue; 3809 3810 grid = netisr_ncpus / (i + 1); 3811 if (rm->rm_cnt > grid) { 3812 grid = prev_grid; 3813 break; 3814 } 3815 3816 if (rm->rm_cnt > netisr_ncpus / (i + 2)) 3817 break; 3818 prev_grid = grid; 3819 } 3820 if_ringmap_set_grid(dev, rm, grid); 3821 3822 return (rm); 3823 } 3824 3825 struct if_ringmap * 3826 if_ringmap_alloc(device_t dev, int ring_cnt, int ring_cntmax) 3827 { 3828 3829 return (if_ringmap_alloc_flags(dev, ring_cnt, ring_cntmax, 3830 RINGMAP_FLAG_NONE)); 3831 } 3832 3833 struct if_ringmap * 3834 if_ringmap_alloc2(device_t dev, int ring_cnt, int ring_cntmax) 3835 { 3836 3837 return (if_ringmap_alloc_flags(dev, ring_cnt, ring_cntmax, 3838 RINGMAP_FLAG_POWEROF2)); 3839 } 3840 3841 void 3842 if_ringmap_free(struct if_ringmap *rm) 3843 { 3844 3845 kfree(rm, M_DEVBUF); 3846 } 3847 3848 /* 3849 * Align the two ringmaps. 3850 * 3851 * e.g. 8 netisrs, rm0 contains 4 rings, rm1 contains 2 rings. 3852 * 3853 * Before: 3854 * 3855 * CPU 0 1 2 3 4 5 6 7 3856 * NIC_RX n0 n1 n2 n3 3857 * NIC_TX N0 N1 3858 * 3859 * After: 3860 * 3861 * CPU 0 1 2 3 4 5 6 7 3862 * NIC_RX n0 n1 n2 n3 3863 * NIC_TX N0 N1 3864 */ 3865 void 3866 if_ringmap_align(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1) 3867 { 3868 3869 if (rm0->rm_grid > rm1->rm_grid) 3870 if_ringmap_set_grid(dev, rm1, rm0->rm_grid); 3871 else if (rm0->rm_grid < rm1->rm_grid) 3872 if_ringmap_set_grid(dev, rm0, rm1->rm_grid); 3873 } 3874 3875 void 3876 if_ringmap_match(device_t dev, struct if_ringmap *rm0, struct if_ringmap *rm1) 3877 { 3878 int subset_grid, cnt, divisor, mod, offset, i; 3879 struct if_ringmap *subset_rm, *rm; 3880 int old_rm0_grid, old_rm1_grid; 3881 3882 if (rm0->rm_grid == rm1->rm_grid) 3883 return; 3884 3885 /* Save grid for later use */ 3886 old_rm0_grid = rm0->rm_grid; 3887 old_rm1_grid = rm1->rm_grid; 3888 3889 if_ringmap_align(dev, rm0, rm1); 3890 3891 /* 3892 * Re-shuffle rings to get more even distribution. 3893 * 3894 * e.g. 12 netisrs, rm0 contains 4 rings, rm1 contains 2 rings. 3895 * 3896 * CPU 0 1 2 3 4 5 6 7 8 9 10 11 3897 * 3898 * NIC_RX a0 a1 a2 a3 b0 b1 b2 b3 c0 c1 c2 c3 3899 * NIC_TX A0 A1 B0 B1 C0 C1 3900 * 3901 * NIC_RX d0 d1 d2 d3 e0 e1 e2 e3 f0 f1 f2 f3 3902 * NIC_TX D0 D1 E0 E1 F0 F1 3903 */ 3904 3905 if (rm0->rm_cnt >= (2 * old_rm1_grid)) { 3906 cnt = rm0->rm_cnt; 3907 subset_grid = old_rm1_grid; 3908 subset_rm = rm1; 3909 rm = rm0; 3910 } else if (rm1->rm_cnt > (2 * old_rm0_grid)) { 3911 cnt = rm1->rm_cnt; 3912 subset_grid = old_rm0_grid; 3913 subset_rm = rm0; 3914 rm = rm1; 3915 } else { 3916 /* No space to shuffle. */ 3917 return; 3918 } 3919 3920 mod = cnt / subset_grid; 3921 KKASSERT(mod >= 2); 3922 divisor = netisr_ncpus / rm->rm_grid; 3923 offset = ((device_get_unit(dev) / divisor) % mod) * subset_grid; 3924 3925 for (i = 0; i < subset_rm->rm_cnt; ++i) { 3926 subset_rm->rm_cpumap[i] += offset; 3927 KASSERT(subset_rm->rm_cpumap[i] < netisr_ncpus, 3928 ("match: invalid cpumap[%d] = %d, offset %d", 3929 i, subset_rm->rm_cpumap[i], offset)); 3930 } 3931 #ifdef INVARIANTS 3932 for (i = 0; i < subset_rm->rm_cnt; ++i) { 3933 int j; 3934 3935 for (j = 0; j < rm->rm_cnt; ++j) { 3936 if (rm->rm_cpumap[j] == subset_rm->rm_cpumap[i]) 3937 break; 3938 } 3939 KASSERT(j < rm->rm_cnt, 3940 ("subset cpumap[%d] = %d not found in superset", 3941 i, subset_rm->rm_cpumap[i])); 3942 } 3943 #endif 3944 } 3945 3946 int 3947 if_ringmap_count(const struct if_ringmap *rm) 3948 { 3949 3950 return (rm->rm_cnt); 3951 } 3952 3953 int 3954 if_ringmap_cpumap(const struct if_ringmap *rm, int ring) 3955 { 3956 3957 KASSERT(ring >= 0 && ring < rm->rm_cnt, ("invalid ring %d", ring)); 3958 return (rm->rm_cpumap[ring]); 3959 } 3960 3961 void 3962 if_ringmap_rdrtable(const struct if_ringmap *rm, int table[], int table_nent) 3963 { 3964 int i, grid_idx, grid_cnt, patch_off, patch_cnt, ncopy; 3965 3966 KASSERT(table_nent > 0 && (table_nent & NETISR_CPUMASK) == 0, 3967 ("invalid redirect table entries %d", table_nent)); 3968 3969 grid_idx = 0; 3970 for (i = 0; i < NETISR_CPUMAX; ++i) { 3971 table[i] = grid_idx++ % rm->rm_cnt; 3972 3973 if (grid_idx == rm->rm_grid) 3974 grid_idx = 0; 3975 } 3976 3977 /* 3978 * Make the ring distributed more evenly for the remainder 3979 * of each grid. 3980 * 3981 * e.g. 12 netisrs, rm contains 8 rings. 3982 * 3983 * Redirect table before: 3984 * 3985 * 0 1 2 3 4 5 6 7 0 1 2 3 0 1 2 3 3986 * 4 5 6 7 0 1 2 3 0 1 2 3 4 5 6 7 3987 * 0 1 2 3 0 1 2 3 4 5 6 7 0 1 2 3 3988 * .... 3989 * 3990 * Redirect table after being patched (pX, patched entries): 3991 * 3992 * 0 1 2 3 4 5 6 7 p0 p1 p2 p3 0 1 2 3 3993 * 4 5 6 7 p4 p5 p6 p7 0 1 2 3 4 5 6 7 3994 * p0 p1 p2 p3 0 1 2 3 4 5 6 7 p4 p5 p6 p7 3995 * .... 3996 */ 3997 patch_cnt = rm->rm_grid % rm->rm_cnt; 3998 if (patch_cnt == 0) 3999 goto done; 4000 patch_off = rm->rm_grid - (rm->rm_grid % rm->rm_cnt); 4001 4002 grid_cnt = roundup(NETISR_CPUMAX, rm->rm_grid) / rm->rm_grid; 4003 grid_idx = 0; 4004 for (i = 0; i < grid_cnt; ++i) { 4005 int j; 4006 4007 for (j = 0; j < patch_cnt; ++j) { 4008 int fix_idx; 4009 4010 fix_idx = (i * rm->rm_grid) + patch_off + j; 4011 if (fix_idx >= NETISR_CPUMAX) 4012 goto done; 4013 table[fix_idx] = grid_idx++ % rm->rm_cnt; 4014 } 4015 } 4016 done: 4017 /* 4018 * If the device supports larger redirect table, duplicate 4019 * the first NETISR_CPUMAX entries to the rest of the table, 4020 * so that it matches upper layer's expectation: 4021 * (hash & NETISR_CPUMASK) % netisr_ncpus 4022 */ 4023 ncopy = table_nent / NETISR_CPUMAX; 4024 for (i = 1; i < ncopy; ++i) { 4025 memcpy(&table[i * NETISR_CPUMAX], table, 4026 NETISR_CPUMAX * sizeof(table[0])); 4027 } 4028 if (if_ringmap_dumprdr) { 4029 for (i = 0; i < table_nent; ++i) { 4030 if (i != 0 && i % 16 == 0) 4031 kprintf("\n"); 4032 kprintf("%03d ", table[i]); 4033 } 4034 kprintf("\n"); 4035 } 4036 } 4037 4038 int 4039 if_ringmap_cpumap_sysctl(SYSCTL_HANDLER_ARGS) 4040 { 4041 struct if_ringmap *rm = arg1; 4042 int i, error = 0; 4043 4044 for (i = 0; i < rm->rm_cnt; ++i) { 4045 int cpu = rm->rm_cpumap[i]; 4046 4047 error = SYSCTL_OUT(req, &cpu, sizeof(cpu)); 4048 if (error) 4049 break; 4050 } 4051 return (error); 4052 } 4053