1 /* 2 * Copyright (c) 1980, 1986, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)if.c 8.3 (Berkeley) 1/4/94 30 * $FreeBSD: src/sys/net/if.c,v 1.185 2004/03/13 02:35:03 brooks Exp $ 31 */ 32 33 #include "opt_compat.h" 34 #include "opt_inet6.h" 35 #include "opt_inet.h" 36 #include "opt_ifpoll.h" 37 38 #include <sys/param.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/systm.h> 42 #include <sys/proc.h> 43 #include <sys/priv.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/socketops.h> 48 #include <sys/kernel.h> 49 #include <sys/ktr.h> 50 #include <sys/mutex.h> 51 #include <sys/sockio.h> 52 #include <sys/syslog.h> 53 #include <sys/sysctl.h> 54 #include <sys/domain.h> 55 #include <sys/thread.h> 56 #include <sys/serialize.h> 57 #include <sys/bus.h> 58 59 #include <sys/thread2.h> 60 #include <sys/msgport2.h> 61 #include <sys/mutex2.h> 62 63 #include <net/if.h> 64 #include <net/if_arp.h> 65 #include <net/if_dl.h> 66 #include <net/if_types.h> 67 #include <net/if_var.h> 68 #include <net/ifq_var.h> 69 #include <net/radix.h> 70 #include <net/route.h> 71 #include <net/if_clone.h> 72 #include <net/netisr2.h> 73 #include <net/netmsg2.h> 74 75 #include <machine/atomic.h> 76 #include <machine/stdarg.h> 77 #include <machine/smp.h> 78 79 #if defined(INET) || defined(INET6) 80 /*XXX*/ 81 #include <netinet/in.h> 82 #include <netinet/in_var.h> 83 #include <netinet/if_ether.h> 84 #ifdef INET6 85 #include <netinet6/in6_var.h> 86 #include <netinet6/in6_ifattach.h> 87 #endif 88 #endif 89 90 #if defined(COMPAT_43) 91 #include <emulation/43bsd/43bsd_socket.h> 92 #endif /* COMPAT_43 */ 93 94 struct netmsg_ifaddr { 95 struct netmsg_base base; 96 struct ifaddr *ifa; 97 struct ifnet *ifp; 98 int tail; 99 }; 100 101 struct ifsubq_stage_head { 102 TAILQ_HEAD(, ifsubq_stage) stg_head; 103 } __cachealign; 104 105 /* 106 * System initialization 107 */ 108 static void if_attachdomain(void *); 109 static void if_attachdomain1(struct ifnet *); 110 static int ifconf(u_long, caddr_t, struct ucred *); 111 static void ifinit(void *); 112 static void ifnetinit(void *); 113 static void if_slowtimo(void *); 114 static void link_rtrequest(int, struct rtentry *); 115 static int if_rtdel(struct radix_node *, void *); 116 static void if_slowtimo_dispatch(netmsg_t); 117 118 /* Helper functions */ 119 static void ifsq_watchdog_reset(struct ifsubq_watchdog *); 120 static int if_delmulti_serialized(struct ifnet *, struct sockaddr *); 121 static struct ifnet_array *ifnet_array_alloc(int); 122 static void ifnet_array_free(struct ifnet_array *); 123 static struct ifnet_array *ifnet_array_add(struct ifnet *, 124 const struct ifnet_array *); 125 static struct ifnet_array *ifnet_array_del(struct ifnet *, 126 const struct ifnet_array *); 127 128 #ifdef INET6 129 /* 130 * XXX: declare here to avoid to include many inet6 related files.. 131 * should be more generalized? 132 */ 133 extern void nd6_setmtu(struct ifnet *); 134 #endif 135 136 SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); 137 SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); 138 139 static int ifsq_stage_cntmax = 4; 140 TUNABLE_INT("net.link.stage_cntmax", &ifsq_stage_cntmax); 141 SYSCTL_INT(_net_link, OID_AUTO, stage_cntmax, CTLFLAG_RW, 142 &ifsq_stage_cntmax, 0, "ifq staging packet count max"); 143 144 static int if_stats_compat = 0; 145 SYSCTL_INT(_net_link, OID_AUTO, stats_compat, CTLFLAG_RW, 146 &if_stats_compat, 0, "Compat the old ifnet stats"); 147 148 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, ifinit, NULL); 149 /* Must be after netisr_init */ 150 SYSINIT(ifnet, SI_SUB_PRE_DRIVERS, SI_ORDER_SECOND, ifnetinit, NULL); 151 152 static if_com_alloc_t *if_com_alloc[256]; 153 static if_com_free_t *if_com_free[256]; 154 155 MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); 156 MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); 157 MALLOC_DEFINE(M_IFNET, "ifnet", "interface structure"); 158 159 int ifqmaxlen = IFQ_MAXLEN; 160 struct ifnethead ifnet = TAILQ_HEAD_INITIALIZER(ifnet); 161 162 static struct ifnet_array ifnet_array0; 163 static struct ifnet_array *ifnet_array = &ifnet_array0; 164 165 static struct callout if_slowtimo_timer; 166 static struct netmsg_base if_slowtimo_netmsg; 167 168 int if_index = 0; 169 struct ifnet **ifindex2ifnet = NULL; 170 static struct thread ifnet_threads[MAXCPU]; 171 static struct mtx ifnet_mtx = MTX_INITIALIZER("ifnet"); 172 173 static struct ifsubq_stage_head ifsubq_stage_heads[MAXCPU]; 174 175 #ifdef notyet 176 #define IFQ_KTR_STRING "ifq=%p" 177 #define IFQ_KTR_ARGS struct ifaltq *ifq 178 #ifndef KTR_IFQ 179 #define KTR_IFQ KTR_ALL 180 #endif 181 KTR_INFO_MASTER(ifq); 182 KTR_INFO(KTR_IFQ, ifq, enqueue, 0, IFQ_KTR_STRING, IFQ_KTR_ARGS); 183 KTR_INFO(KTR_IFQ, ifq, dequeue, 1, IFQ_KTR_STRING, IFQ_KTR_ARGS); 184 #define logifq(name, arg) KTR_LOG(ifq_ ## name, arg) 185 186 #define IF_START_KTR_STRING "ifp=%p" 187 #define IF_START_KTR_ARGS struct ifnet *ifp 188 #ifndef KTR_IF_START 189 #define KTR_IF_START KTR_ALL 190 #endif 191 KTR_INFO_MASTER(if_start); 192 KTR_INFO(KTR_IF_START, if_start, run, 0, 193 IF_START_KTR_STRING, IF_START_KTR_ARGS); 194 KTR_INFO(KTR_IF_START, if_start, sched, 1, 195 IF_START_KTR_STRING, IF_START_KTR_ARGS); 196 KTR_INFO(KTR_IF_START, if_start, avoid, 2, 197 IF_START_KTR_STRING, IF_START_KTR_ARGS); 198 KTR_INFO(KTR_IF_START, if_start, contend_sched, 3, 199 IF_START_KTR_STRING, IF_START_KTR_ARGS); 200 KTR_INFO(KTR_IF_START, if_start, chase_sched, 4, 201 IF_START_KTR_STRING, IF_START_KTR_ARGS); 202 #define logifstart(name, arg) KTR_LOG(if_start_ ## name, arg) 203 #endif 204 205 TAILQ_HEAD(, ifg_group) ifg_head = TAILQ_HEAD_INITIALIZER(ifg_head); 206 207 /* 208 * Network interface utility routines. 209 * 210 * Routines with ifa_ifwith* names take sockaddr *'s as 211 * parameters. 212 */ 213 /* ARGSUSED*/ 214 void 215 ifinit(void *dummy) 216 { 217 struct ifnet *ifp; 218 219 callout_init_mp(&if_slowtimo_timer); 220 netmsg_init(&if_slowtimo_netmsg, NULL, &netisr_adone_rport, 221 MSGF_PRIORITY, if_slowtimo_dispatch); 222 223 /* XXX is this necessary? */ 224 ifnet_lock(); 225 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 226 if (ifp->if_snd.altq_maxlen == 0) { 227 if_printf(ifp, "XXX: driver didn't set altq_maxlen\n"); 228 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen); 229 } 230 } 231 ifnet_unlock(); 232 233 /* Start if_slowtimo */ 234 lwkt_sendmsg(netisr_cpuport(0), &if_slowtimo_netmsg.lmsg); 235 } 236 237 static void 238 ifsq_ifstart_ipifunc(void *arg) 239 { 240 struct ifaltq_subque *ifsq = arg; 241 struct lwkt_msg *lmsg = ifsq_get_ifstart_lmsg(ifsq, mycpuid); 242 243 crit_enter(); 244 if (lmsg->ms_flags & MSGF_DONE) 245 lwkt_sendmsg_oncpu(netisr_cpuport(mycpuid), lmsg); 246 crit_exit(); 247 } 248 249 static __inline void 250 ifsq_stage_remove(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 251 { 252 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 253 TAILQ_REMOVE(&head->stg_head, stage, stg_link); 254 stage->stg_flags &= ~(IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED); 255 stage->stg_cnt = 0; 256 stage->stg_len = 0; 257 } 258 259 static __inline void 260 ifsq_stage_insert(struct ifsubq_stage_head *head, struct ifsubq_stage *stage) 261 { 262 KKASSERT((stage->stg_flags & 263 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 264 stage->stg_flags |= IFSQ_STAGE_FLAG_QUED; 265 TAILQ_INSERT_TAIL(&head->stg_head, stage, stg_link); 266 } 267 268 /* 269 * Schedule ifnet.if_start on the subqueue owner CPU 270 */ 271 static void 272 ifsq_ifstart_schedule(struct ifaltq_subque *ifsq, int force) 273 { 274 int cpu; 275 276 if (!force && curthread->td_type == TD_TYPE_NETISR && 277 ifsq_stage_cntmax > 0) { 278 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 279 280 stage->stg_cnt = 0; 281 stage->stg_len = 0; 282 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 283 ifsq_stage_insert(&ifsubq_stage_heads[mycpuid], stage); 284 stage->stg_flags |= IFSQ_STAGE_FLAG_SCHED; 285 return; 286 } 287 288 cpu = ifsq_get_cpuid(ifsq); 289 if (cpu != mycpuid) 290 lwkt_send_ipiq(globaldata_find(cpu), ifsq_ifstart_ipifunc, ifsq); 291 else 292 ifsq_ifstart_ipifunc(ifsq); 293 } 294 295 /* 296 * NOTE: 297 * This function will release ifnet.if_start subqueue interlock, 298 * if ifnet.if_start for the subqueue does not need to be scheduled 299 */ 300 static __inline int 301 ifsq_ifstart_need_schedule(struct ifaltq_subque *ifsq, int running) 302 { 303 if (!running || ifsq_is_empty(ifsq) 304 #ifdef ALTQ 305 || ifsq->ifsq_altq->altq_tbr != NULL 306 #endif 307 ) { 308 ALTQ_SQ_LOCK(ifsq); 309 /* 310 * ifnet.if_start subqueue interlock is released, if: 311 * 1) Hardware can not take any packets, due to 312 * o interface is marked down 313 * o hardware queue is full (ifsq_is_oactive) 314 * Under the second situation, hardware interrupt 315 * or polling(4) will call/schedule ifnet.if_start 316 * on the subqueue when hardware queue is ready 317 * 2) There is no packet in the subqueue. 318 * Further ifq_dispatch or ifq_handoff will call/ 319 * schedule ifnet.if_start on the subqueue. 320 * 3) TBR is used and it does not allow further 321 * dequeueing. 322 * TBR callout will call ifnet.if_start on the 323 * subqueue. 324 */ 325 if (!running || !ifsq_data_ready(ifsq)) { 326 ifsq_clr_started(ifsq); 327 ALTQ_SQ_UNLOCK(ifsq); 328 return 0; 329 } 330 ALTQ_SQ_UNLOCK(ifsq); 331 } 332 return 1; 333 } 334 335 static void 336 ifsq_ifstart_dispatch(netmsg_t msg) 337 { 338 struct lwkt_msg *lmsg = &msg->base.lmsg; 339 struct ifaltq_subque *ifsq = lmsg->u.ms_resultp; 340 struct ifnet *ifp = ifsq_get_ifp(ifsq); 341 struct globaldata *gd = mycpu; 342 int running = 0, need_sched; 343 344 crit_enter_gd(gd); 345 346 lwkt_replymsg(lmsg, 0); /* reply ASAP */ 347 348 if (gd->gd_cpuid != ifsq_get_cpuid(ifsq)) { 349 /* 350 * We need to chase the subqueue owner CPU change. 351 */ 352 ifsq_ifstart_schedule(ifsq, 1); 353 crit_exit_gd(gd); 354 return; 355 } 356 357 ifsq_serialize_hw(ifsq); 358 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 359 ifp->if_start(ifp, ifsq); 360 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 361 running = 1; 362 } 363 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 364 ifsq_deserialize_hw(ifsq); 365 366 if (need_sched) { 367 /* 368 * More data need to be transmitted, ifnet.if_start is 369 * scheduled on the subqueue owner CPU, and we keep going. 370 * NOTE: ifnet.if_start subqueue interlock is not released. 371 */ 372 ifsq_ifstart_schedule(ifsq, 0); 373 } 374 375 crit_exit_gd(gd); 376 } 377 378 /* Device driver ifnet.if_start helper function */ 379 void 380 ifsq_devstart(struct ifaltq_subque *ifsq) 381 { 382 struct ifnet *ifp = ifsq_get_ifp(ifsq); 383 int running = 0; 384 385 ASSERT_ALTQ_SQ_SERIALIZED_HW(ifsq); 386 387 ALTQ_SQ_LOCK(ifsq); 388 if (ifsq_is_started(ifsq) || !ifsq_data_ready(ifsq)) { 389 ALTQ_SQ_UNLOCK(ifsq); 390 return; 391 } 392 ifsq_set_started(ifsq); 393 ALTQ_SQ_UNLOCK(ifsq); 394 395 ifp->if_start(ifp, ifsq); 396 397 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 398 running = 1; 399 400 if (ifsq_ifstart_need_schedule(ifsq, running)) { 401 /* 402 * More data need to be transmitted, ifnet.if_start is 403 * scheduled on ifnet's CPU, and we keep going. 404 * NOTE: ifnet.if_start interlock is not released. 405 */ 406 ifsq_ifstart_schedule(ifsq, 0); 407 } 408 } 409 410 void 411 if_devstart(struct ifnet *ifp) 412 { 413 ifsq_devstart(ifq_get_subq_default(&ifp->if_snd)); 414 } 415 416 /* Device driver ifnet.if_start schedule helper function */ 417 void 418 ifsq_devstart_sched(struct ifaltq_subque *ifsq) 419 { 420 ifsq_ifstart_schedule(ifsq, 1); 421 } 422 423 void 424 if_devstart_sched(struct ifnet *ifp) 425 { 426 ifsq_devstart_sched(ifq_get_subq_default(&ifp->if_snd)); 427 } 428 429 static void 430 if_default_serialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 431 { 432 lwkt_serialize_enter(ifp->if_serializer); 433 } 434 435 static void 436 if_default_deserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 437 { 438 lwkt_serialize_exit(ifp->if_serializer); 439 } 440 441 static int 442 if_default_tryserialize(struct ifnet *ifp, enum ifnet_serialize slz __unused) 443 { 444 return lwkt_serialize_try(ifp->if_serializer); 445 } 446 447 #ifdef INVARIANTS 448 static void 449 if_default_serialize_assert(struct ifnet *ifp, 450 enum ifnet_serialize slz __unused, 451 boolean_t serialized) 452 { 453 if (serialized) 454 ASSERT_SERIALIZED(ifp->if_serializer); 455 else 456 ASSERT_NOT_SERIALIZED(ifp->if_serializer); 457 } 458 #endif 459 460 /* 461 * Attach an interface to the list of "active" interfaces. 462 * 463 * The serializer is optional. 464 */ 465 void 466 if_attach(struct ifnet *ifp, lwkt_serialize_t serializer) 467 { 468 unsigned socksize, ifasize; 469 int namelen, masklen; 470 struct sockaddr_dl *sdl, *sdl_addr; 471 struct ifaddr *ifa; 472 struct ifaltq *ifq; 473 struct ifnet **old_ifindex2ifnet = NULL; 474 struct ifnet_array *old_ifnet_array; 475 int i, q; 476 477 static int if_indexlim = 8; 478 479 if (ifp->if_serialize != NULL) { 480 KASSERT(ifp->if_deserialize != NULL && 481 ifp->if_tryserialize != NULL && 482 ifp->if_serialize_assert != NULL, 483 ("serialize functions are partially setup")); 484 485 /* 486 * If the device supplies serialize functions, 487 * then clear if_serializer to catch any invalid 488 * usage of this field. 489 */ 490 KASSERT(serializer == NULL, 491 ("both serialize functions and default serializer " 492 "are supplied")); 493 ifp->if_serializer = NULL; 494 } else { 495 KASSERT(ifp->if_deserialize == NULL && 496 ifp->if_tryserialize == NULL && 497 ifp->if_serialize_assert == NULL, 498 ("serialize functions are partially setup")); 499 ifp->if_serialize = if_default_serialize; 500 ifp->if_deserialize = if_default_deserialize; 501 ifp->if_tryserialize = if_default_tryserialize; 502 #ifdef INVARIANTS 503 ifp->if_serialize_assert = if_default_serialize_assert; 504 #endif 505 506 /* 507 * The serializer can be passed in from the device, 508 * allowing the same serializer to be used for both 509 * the interrupt interlock and the device queue. 510 * If not specified, the netif structure will use an 511 * embedded serializer. 512 */ 513 if (serializer == NULL) { 514 serializer = &ifp->if_default_serializer; 515 lwkt_serialize_init(serializer); 516 } 517 ifp->if_serializer = serializer; 518 } 519 520 /* 521 * XXX - 522 * The old code would work if the interface passed a pre-existing 523 * chain of ifaddrs to this code. We don't trust our callers to 524 * properly initialize the tailq, however, so we no longer allow 525 * this unlikely case. 526 */ 527 ifp->if_addrheads = kmalloc(ncpus * sizeof(struct ifaddrhead), 528 M_IFADDR, M_WAITOK | M_ZERO); 529 for (i = 0; i < ncpus; ++i) 530 TAILQ_INIT(&ifp->if_addrheads[i]); 531 532 TAILQ_INIT(&ifp->if_multiaddrs); 533 TAILQ_INIT(&ifp->if_groups); 534 getmicrotime(&ifp->if_lastchange); 535 536 /* 537 * create a Link Level name for this device 538 */ 539 namelen = strlen(ifp->if_xname); 540 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; 541 socksize = masklen + ifp->if_addrlen; 542 if (socksize < sizeof(*sdl)) 543 socksize = sizeof(*sdl); 544 socksize = RT_ROUNDUP(socksize); 545 ifasize = sizeof(struct ifaddr) + 2 * socksize; 546 ifa = ifa_create(ifasize, M_WAITOK); 547 sdl = sdl_addr = (struct sockaddr_dl *)(ifa + 1); 548 sdl->sdl_len = socksize; 549 sdl->sdl_family = AF_LINK; 550 bcopy(ifp->if_xname, sdl->sdl_data, namelen); 551 sdl->sdl_nlen = namelen; 552 sdl->sdl_type = ifp->if_type; 553 ifp->if_lladdr = ifa; 554 ifa->ifa_ifp = ifp; 555 ifa->ifa_rtrequest = link_rtrequest; 556 ifa->ifa_addr = (struct sockaddr *)sdl; 557 sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); 558 ifa->ifa_netmask = (struct sockaddr *)sdl; 559 sdl->sdl_len = masklen; 560 while (namelen != 0) 561 sdl->sdl_data[--namelen] = 0xff; 562 ifa_iflink(ifa, ifp, 0 /* Insert head */); 563 564 ifp->if_data_pcpu = kmalloc_cachealign( 565 ncpus * sizeof(struct ifdata_pcpu), M_DEVBUF, M_WAITOK | M_ZERO); 566 567 if (ifp->if_mapsubq == NULL) 568 ifp->if_mapsubq = ifq_mapsubq_default; 569 570 ifq = &ifp->if_snd; 571 ifq->altq_type = 0; 572 ifq->altq_disc = NULL; 573 ifq->altq_flags &= ALTQF_CANTCHANGE; 574 ifq->altq_tbr = NULL; 575 ifq->altq_ifp = ifp; 576 577 if (ifq->altq_subq_cnt <= 0) 578 ifq->altq_subq_cnt = 1; 579 ifq->altq_subq = kmalloc_cachealign( 580 ifq->altq_subq_cnt * sizeof(struct ifaltq_subque), 581 M_DEVBUF, M_WAITOK | M_ZERO); 582 583 if (ifq->altq_maxlen == 0) { 584 if_printf(ifp, "driver didn't set altq_maxlen\n"); 585 ifq_set_maxlen(ifq, ifqmaxlen); 586 } 587 588 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 589 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 590 591 ALTQ_SQ_LOCK_INIT(ifsq); 592 ifsq->ifsq_index = q; 593 594 ifsq->ifsq_altq = ifq; 595 ifsq->ifsq_ifp = ifp; 596 597 ifsq->ifsq_maxlen = ifq->altq_maxlen; 598 ifsq->ifsq_maxbcnt = ifsq->ifsq_maxlen * MCLBYTES; 599 ifsq->ifsq_prepended = NULL; 600 ifsq->ifsq_started = 0; 601 ifsq->ifsq_hw_oactive = 0; 602 ifsq_set_cpuid(ifsq, 0); 603 if (ifp->if_serializer != NULL) 604 ifsq_set_hw_serialize(ifsq, ifp->if_serializer); 605 606 ifsq->ifsq_stage = 607 kmalloc_cachealign(ncpus * sizeof(struct ifsubq_stage), 608 M_DEVBUF, M_WAITOK | M_ZERO); 609 for (i = 0; i < ncpus; ++i) 610 ifsq->ifsq_stage[i].stg_subq = ifsq; 611 612 ifsq->ifsq_ifstart_nmsg = 613 kmalloc(ncpus * sizeof(struct netmsg_base), 614 M_LWKTMSG, M_WAITOK); 615 for (i = 0; i < ncpus; ++i) { 616 netmsg_init(&ifsq->ifsq_ifstart_nmsg[i], NULL, 617 &netisr_adone_rport, 0, ifsq_ifstart_dispatch); 618 ifsq->ifsq_ifstart_nmsg[i].lmsg.u.ms_resultp = ifsq; 619 } 620 } 621 ifq_set_classic(ifq); 622 623 /* 624 * Install this ifp into ifindex2inet, ifnet queue and ifnet 625 * array after it is setup. 626 * 627 * Protect ifindex2ifnet, ifnet queue and ifnet array changes 628 * by ifnet lock, so that non-netisr threads could get a 629 * consistent view. 630 */ 631 ifnet_lock(); 632 633 /* Don't update if_index until ifindex2ifnet is setup */ 634 ifp->if_index = if_index + 1; 635 sdl_addr->sdl_index = ifp->if_index; 636 637 /* 638 * Install this ifp into ifindex2ifnet 639 */ 640 if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) { 641 unsigned int n; 642 struct ifnet **q; 643 644 /* 645 * Grow ifindex2ifnet 646 */ 647 if_indexlim <<= 1; 648 n = if_indexlim * sizeof(*q); 649 q = kmalloc(n, M_IFADDR, M_WAITOK | M_ZERO); 650 if (ifindex2ifnet != NULL) { 651 bcopy(ifindex2ifnet, q, n/2); 652 /* Free old ifindex2ifnet after sync all netisrs */ 653 old_ifindex2ifnet = ifindex2ifnet; 654 } 655 ifindex2ifnet = q; 656 } 657 ifindex2ifnet[ifp->if_index] = ifp; 658 /* 659 * Update if_index after this ifp is installed into ifindex2ifnet, 660 * so that netisrs could get a consistent view of ifindex2ifnet. 661 */ 662 cpu_sfence(); 663 if_index = ifp->if_index; 664 665 /* 666 * Install this ifp into ifnet array. 667 */ 668 /* Free old ifnet array after sync all netisrs */ 669 old_ifnet_array = ifnet_array; 670 ifnet_array = ifnet_array_add(ifp, old_ifnet_array); 671 672 /* 673 * Install this ifp into ifnet queue. 674 */ 675 TAILQ_INSERT_TAIL(&ifnetlist, ifp, if_link); 676 677 ifnet_unlock(); 678 679 /* 680 * Sync all netisrs so that the old ifindex2ifnet and ifnet array 681 * are no longer accessed and we can free them safely later on. 682 */ 683 netmsg_service_sync(); 684 if (old_ifindex2ifnet != NULL) 685 kfree(old_ifindex2ifnet, M_IFADDR); 686 ifnet_array_free(old_ifnet_array); 687 688 if (!SLIST_EMPTY(&domains)) 689 if_attachdomain1(ifp); 690 691 /* Announce the interface. */ 692 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 693 devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); 694 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 695 } 696 697 static void 698 if_attachdomain(void *dummy) 699 { 700 struct ifnet *ifp; 701 702 ifnet_lock(); 703 TAILQ_FOREACH(ifp, &ifnetlist, if_list) 704 if_attachdomain1(ifp); 705 ifnet_unlock(); 706 } 707 SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, 708 if_attachdomain, NULL); 709 710 static void 711 if_attachdomain1(struct ifnet *ifp) 712 { 713 struct domain *dp; 714 715 crit_enter(); 716 717 /* address family dependent data region */ 718 bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); 719 SLIST_FOREACH(dp, &domains, dom_next) 720 if (dp->dom_ifattach) 721 ifp->if_afdata[dp->dom_family] = 722 (*dp->dom_ifattach)(ifp); 723 crit_exit(); 724 } 725 726 /* 727 * Purge all addresses whose type is _not_ AF_LINK 728 */ 729 static void 730 if_purgeaddrs_nolink_dispatch(netmsg_t nmsg) 731 { 732 struct lwkt_msg *lmsg = &nmsg->lmsg; 733 struct ifnet *ifp = lmsg->u.ms_resultp; 734 struct ifaddr_container *ifac, *next; 735 736 KASSERT(&curthread->td_msgport == netisr_cpuport(0), 737 ("not in netisr0")); 738 739 /* 740 * The ifaddr processing in the following loop will block, 741 * however, this function is called in netisr0, in which 742 * ifaddr list changes happen, so we don't care about the 743 * blockness of the ifaddr processing here. 744 */ 745 TAILQ_FOREACH_MUTABLE(ifac, &ifp->if_addrheads[mycpuid], 746 ifa_link, next) { 747 struct ifaddr *ifa = ifac->ifa; 748 749 /* Ignore marker */ 750 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 751 continue; 752 753 /* Leave link ifaddr as it is */ 754 if (ifa->ifa_addr->sa_family == AF_LINK) 755 continue; 756 #ifdef INET 757 /* XXX: Ugly!! ad hoc just for INET */ 758 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET) { 759 struct ifaliasreq ifr; 760 #ifdef IFADDR_DEBUG_VERBOSE 761 int i; 762 763 kprintf("purge in4 addr %p: ", ifa); 764 for (i = 0; i < ncpus; ++i) 765 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt); 766 kprintf("\n"); 767 #endif 768 769 bzero(&ifr, sizeof ifr); 770 ifr.ifra_addr = *ifa->ifa_addr; 771 if (ifa->ifa_dstaddr) 772 ifr.ifra_broadaddr = *ifa->ifa_dstaddr; 773 if (in_control(SIOCDIFADDR, (caddr_t)&ifr, ifp, 774 NULL) == 0) 775 continue; 776 } 777 #endif /* INET */ 778 #ifdef INET6 779 if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6) { 780 #ifdef IFADDR_DEBUG_VERBOSE 781 int i; 782 783 kprintf("purge in6 addr %p: ", ifa); 784 for (i = 0; i < ncpus; ++i) 785 kprintf("%d ", ifa->ifa_containers[i].ifa_refcnt); 786 kprintf("\n"); 787 #endif 788 789 in6_purgeaddr(ifa); 790 /* ifp_addrhead is already updated */ 791 continue; 792 } 793 #endif /* INET6 */ 794 ifa_ifunlink(ifa, ifp); 795 ifa_destroy(ifa); 796 } 797 798 lwkt_replymsg(lmsg, 0); 799 } 800 801 void 802 if_purgeaddrs_nolink(struct ifnet *ifp) 803 { 804 struct netmsg_base nmsg; 805 struct lwkt_msg *lmsg = &nmsg.lmsg; 806 807 ASSERT_CANDOMSG_NETISR0(curthread); 808 809 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 810 if_purgeaddrs_nolink_dispatch); 811 lmsg->u.ms_resultp = ifp; 812 lwkt_domsg(netisr_cpuport(0), lmsg, 0); 813 } 814 815 static void 816 ifq_stage_detach_handler(netmsg_t nmsg) 817 { 818 struct ifaltq *ifq = nmsg->lmsg.u.ms_resultp; 819 int q; 820 821 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 822 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 823 struct ifsubq_stage *stage = ifsq_get_stage(ifsq, mycpuid); 824 825 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) 826 ifsq_stage_remove(&ifsubq_stage_heads[mycpuid], stage); 827 } 828 lwkt_replymsg(&nmsg->lmsg, 0); 829 } 830 831 static void 832 ifq_stage_detach(struct ifaltq *ifq) 833 { 834 struct netmsg_base base; 835 int cpu; 836 837 netmsg_init(&base, NULL, &curthread->td_msgport, 0, 838 ifq_stage_detach_handler); 839 base.lmsg.u.ms_resultp = ifq; 840 841 for (cpu = 0; cpu < ncpus; ++cpu) 842 lwkt_domsg(netisr_cpuport(cpu), &base.lmsg, 0); 843 } 844 845 struct netmsg_if_rtdel { 846 struct netmsg_base base; 847 struct ifnet *ifp; 848 }; 849 850 static void 851 if_rtdel_dispatch(netmsg_t msg) 852 { 853 struct netmsg_if_rtdel *rmsg = (void *)msg; 854 int i, nextcpu, cpu; 855 856 cpu = mycpuid; 857 for (i = 1; i <= AF_MAX; i++) { 858 struct radix_node_head *rnh; 859 860 if ((rnh = rt_tables[cpu][i]) == NULL) 861 continue; 862 rnh->rnh_walktree(rnh, if_rtdel, rmsg->ifp); 863 } 864 865 nextcpu = cpu + 1; 866 if (nextcpu < ncpus) 867 lwkt_forwardmsg(netisr_cpuport(nextcpu), &rmsg->base.lmsg); 868 else 869 lwkt_replymsg(&rmsg->base.lmsg, 0); 870 } 871 872 /* 873 * Detach an interface, removing it from the 874 * list of "active" interfaces. 875 */ 876 void 877 if_detach(struct ifnet *ifp) 878 { 879 struct ifnet_array *old_ifnet_array; 880 struct netmsg_if_rtdel msg; 881 struct domain *dp; 882 int q; 883 884 /* Announce that the interface is gone. */ 885 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 886 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 887 devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); 888 889 /* 890 * Remove this ifp from ifindex2inet, ifnet queue and ifnet 891 * array before it is whacked. 892 * 893 * Protect ifindex2ifnet, ifnet queue and ifnet array changes 894 * by ifnet lock, so that non-netisr threads could get a 895 * consistent view. 896 */ 897 ifnet_lock(); 898 899 /* 900 * Remove this ifp from ifindex2ifnet and maybe decrement if_index. 901 */ 902 ifindex2ifnet[ifp->if_index] = NULL; 903 while (if_index > 0 && ifindex2ifnet[if_index] == NULL) 904 if_index--; 905 906 /* 907 * Remove this ifp from ifnet queue. 908 */ 909 TAILQ_REMOVE(&ifnetlist, ifp, if_link); 910 911 /* 912 * Remove this ifp from ifnet array. 913 */ 914 /* Free old ifnet array after sync all netisrs */ 915 old_ifnet_array = ifnet_array; 916 ifnet_array = ifnet_array_del(ifp, old_ifnet_array); 917 918 ifnet_unlock(); 919 920 /* 921 * Sync all netisrs so that the old ifnet array is no longer 922 * accessed and we can free it safely later on. 923 */ 924 netmsg_service_sync(); 925 ifnet_array_free(old_ifnet_array); 926 927 /* 928 * Remove routes and flush queues. 929 */ 930 crit_enter(); 931 #ifdef IFPOLL_ENABLE 932 if (ifp->if_flags & IFF_NPOLLING) 933 ifpoll_deregister(ifp); 934 #endif 935 if_down(ifp); 936 937 #ifdef ALTQ 938 if (ifq_is_enabled(&ifp->if_snd)) 939 altq_disable(&ifp->if_snd); 940 if (ifq_is_attached(&ifp->if_snd)) 941 altq_detach(&ifp->if_snd); 942 #endif 943 944 /* 945 * Clean up all addresses. 946 */ 947 ifp->if_lladdr = NULL; 948 949 if_purgeaddrs_nolink(ifp); 950 if (!TAILQ_EMPTY(&ifp->if_addrheads[mycpuid])) { 951 struct ifaddr *ifa; 952 953 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 954 KASSERT(ifa->ifa_addr->sa_family == AF_LINK, 955 ("non-link ifaddr is left on if_addrheads")); 956 957 ifa_ifunlink(ifa, ifp); 958 ifa_destroy(ifa); 959 KASSERT(TAILQ_EMPTY(&ifp->if_addrheads[mycpuid]), 960 ("there are still ifaddrs left on if_addrheads")); 961 } 962 963 #ifdef INET 964 /* 965 * Remove all IPv4 kernel structures related to ifp. 966 */ 967 in_ifdetach(ifp); 968 #endif 969 970 #ifdef INET6 971 /* 972 * Remove all IPv6 kernel structs related to ifp. This should be done 973 * before removing routing entries below, since IPv6 interface direct 974 * routes are expected to be removed by the IPv6-specific kernel API. 975 * Otherwise, the kernel will detect some inconsistency and bark it. 976 */ 977 in6_ifdetach(ifp); 978 #endif 979 980 /* 981 * Delete all remaining routes using this interface 982 */ 983 netmsg_init(&msg.base, NULL, &curthread->td_msgport, MSGF_PRIORITY, 984 if_rtdel_dispatch); 985 msg.ifp = ifp; 986 rt_domsg_global(&msg.base); 987 988 SLIST_FOREACH(dp, &domains, dom_next) 989 if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) 990 (*dp->dom_ifdetach)(ifp, 991 ifp->if_afdata[dp->dom_family]); 992 993 kfree(ifp->if_addrheads, M_IFADDR); 994 995 lwkt_synchronize_ipiqs("if_detach"); 996 ifq_stage_detach(&ifp->if_snd); 997 998 for (q = 0; q < ifp->if_snd.altq_subq_cnt; ++q) { 999 struct ifaltq_subque *ifsq = &ifp->if_snd.altq_subq[q]; 1000 1001 kfree(ifsq->ifsq_ifstart_nmsg, M_LWKTMSG); 1002 kfree(ifsq->ifsq_stage, M_DEVBUF); 1003 } 1004 kfree(ifp->if_snd.altq_subq, M_DEVBUF); 1005 1006 kfree(ifp->if_data_pcpu, M_DEVBUF); 1007 1008 crit_exit(); 1009 } 1010 1011 /* 1012 * Create interface group without members 1013 */ 1014 struct ifg_group * 1015 if_creategroup(const char *groupname) 1016 { 1017 struct ifg_group *ifg = NULL; 1018 1019 if ((ifg = (struct ifg_group *)kmalloc(sizeof(struct ifg_group), 1020 M_TEMP, M_NOWAIT)) == NULL) 1021 return (NULL); 1022 1023 strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); 1024 ifg->ifg_refcnt = 0; 1025 ifg->ifg_carp_demoted = 0; 1026 TAILQ_INIT(&ifg->ifg_members); 1027 #if NPF > 0 1028 pfi_attach_ifgroup(ifg); 1029 #endif 1030 TAILQ_INSERT_TAIL(&ifg_head, ifg, ifg_next); 1031 1032 return (ifg); 1033 } 1034 1035 /* 1036 * Add a group to an interface 1037 */ 1038 int 1039 if_addgroup(struct ifnet *ifp, const char *groupname) 1040 { 1041 struct ifg_list *ifgl; 1042 struct ifg_group *ifg = NULL; 1043 struct ifg_member *ifgm; 1044 1045 if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' && 1046 groupname[strlen(groupname) - 1] <= '9') 1047 return (EINVAL); 1048 1049 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1050 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) 1051 return (EEXIST); 1052 1053 if ((ifgl = kmalloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL) 1054 return (ENOMEM); 1055 1056 if ((ifgm = kmalloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) { 1057 kfree(ifgl, M_TEMP); 1058 return (ENOMEM); 1059 } 1060 1061 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) 1062 if (!strcmp(ifg->ifg_group, groupname)) 1063 break; 1064 1065 if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) { 1066 kfree(ifgl, M_TEMP); 1067 kfree(ifgm, M_TEMP); 1068 return (ENOMEM); 1069 } 1070 1071 ifg->ifg_refcnt++; 1072 ifgl->ifgl_group = ifg; 1073 ifgm->ifgm_ifp = ifp; 1074 1075 TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); 1076 TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); 1077 1078 #if NPF > 0 1079 pfi_group_change(groupname); 1080 #endif 1081 1082 return (0); 1083 } 1084 1085 /* 1086 * Remove a group from an interface 1087 */ 1088 int 1089 if_delgroup(struct ifnet *ifp, const char *groupname) 1090 { 1091 struct ifg_list *ifgl; 1092 struct ifg_member *ifgm; 1093 1094 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1095 if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) 1096 break; 1097 if (ifgl == NULL) 1098 return (ENOENT); 1099 1100 TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); 1101 1102 TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) 1103 if (ifgm->ifgm_ifp == ifp) 1104 break; 1105 1106 if (ifgm != NULL) { 1107 TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); 1108 kfree(ifgm, M_TEMP); 1109 } 1110 1111 if (--ifgl->ifgl_group->ifg_refcnt == 0) { 1112 TAILQ_REMOVE(&ifg_head, ifgl->ifgl_group, ifg_next); 1113 #if NPF > 0 1114 pfi_detach_ifgroup(ifgl->ifgl_group); 1115 #endif 1116 kfree(ifgl->ifgl_group, M_TEMP); 1117 } 1118 1119 kfree(ifgl, M_TEMP); 1120 1121 #if NPF > 0 1122 pfi_group_change(groupname); 1123 #endif 1124 1125 return (0); 1126 } 1127 1128 /* 1129 * Stores all groups from an interface in memory pointed 1130 * to by data 1131 */ 1132 int 1133 if_getgroup(caddr_t data, struct ifnet *ifp) 1134 { 1135 int len, error; 1136 struct ifg_list *ifgl; 1137 struct ifg_req ifgrq, *ifgp; 1138 struct ifgroupreq *ifgr = (struct ifgroupreq *)data; 1139 1140 if (ifgr->ifgr_len == 0) { 1141 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 1142 ifgr->ifgr_len += sizeof(struct ifg_req); 1143 return (0); 1144 } 1145 1146 len = ifgr->ifgr_len; 1147 ifgp = ifgr->ifgr_groups; 1148 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 1149 if (len < sizeof(ifgrq)) 1150 return (EINVAL); 1151 bzero(&ifgrq, sizeof ifgrq); 1152 strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group, 1153 sizeof(ifgrq.ifgrq_group)); 1154 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp, 1155 sizeof(struct ifg_req)))) 1156 return (error); 1157 len -= sizeof(ifgrq); 1158 ifgp++; 1159 } 1160 1161 return (0); 1162 } 1163 1164 /* 1165 * Stores all members of a group in memory pointed to by data 1166 */ 1167 int 1168 if_getgroupmembers(caddr_t data) 1169 { 1170 struct ifgroupreq *ifgr = (struct ifgroupreq *)data; 1171 struct ifg_group *ifg; 1172 struct ifg_member *ifgm; 1173 struct ifg_req ifgrq, *ifgp; 1174 int len, error; 1175 1176 TAILQ_FOREACH(ifg, &ifg_head, ifg_next) 1177 if (!strcmp(ifg->ifg_group, ifgr->ifgr_name)) 1178 break; 1179 if (ifg == NULL) 1180 return (ENOENT); 1181 1182 if (ifgr->ifgr_len == 0) { 1183 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) 1184 ifgr->ifgr_len += sizeof(ifgrq); 1185 return (0); 1186 } 1187 1188 len = ifgr->ifgr_len; 1189 ifgp = ifgr->ifgr_groups; 1190 TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { 1191 if (len < sizeof(ifgrq)) 1192 return (EINVAL); 1193 bzero(&ifgrq, sizeof ifgrq); 1194 strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname, 1195 sizeof(ifgrq.ifgrq_member)); 1196 if ((error = copyout((caddr_t)&ifgrq, (caddr_t)ifgp, 1197 sizeof(struct ifg_req)))) 1198 return (error); 1199 len -= sizeof(ifgrq); 1200 ifgp++; 1201 } 1202 1203 return (0); 1204 } 1205 1206 /* 1207 * Delete Routes for a Network Interface 1208 * 1209 * Called for each routing entry via the rnh->rnh_walktree() call above 1210 * to delete all route entries referencing a detaching network interface. 1211 * 1212 * Arguments: 1213 * rn pointer to node in the routing table 1214 * arg argument passed to rnh->rnh_walktree() - detaching interface 1215 * 1216 * Returns: 1217 * 0 successful 1218 * errno failed - reason indicated 1219 * 1220 */ 1221 static int 1222 if_rtdel(struct radix_node *rn, void *arg) 1223 { 1224 struct rtentry *rt = (struct rtentry *)rn; 1225 struct ifnet *ifp = arg; 1226 int err; 1227 1228 if (rt->rt_ifp == ifp) { 1229 1230 /* 1231 * Protect (sorta) against walktree recursion problems 1232 * with cloned routes 1233 */ 1234 if (!(rt->rt_flags & RTF_UP)) 1235 return (0); 1236 1237 err = rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, 1238 rt_mask(rt), rt->rt_flags, 1239 NULL); 1240 if (err) { 1241 log(LOG_WARNING, "if_rtdel: error %d\n", err); 1242 } 1243 } 1244 1245 return (0); 1246 } 1247 1248 /* 1249 * Locate an interface based on a complete address. 1250 */ 1251 struct ifaddr * 1252 ifa_ifwithaddr(struct sockaddr *addr) 1253 { 1254 const struct ifnet_array *arr; 1255 int i; 1256 1257 arr = ifnet_array_get(); 1258 for (i = 0; i < arr->ifnet_count; ++i) { 1259 struct ifnet *ifp = arr->ifnet_arr[i]; 1260 struct ifaddr_container *ifac; 1261 1262 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1263 struct ifaddr *ifa = ifac->ifa; 1264 1265 if (ifa->ifa_addr->sa_family != addr->sa_family) 1266 continue; 1267 if (sa_equal(addr, ifa->ifa_addr)) 1268 return (ifa); 1269 if ((ifp->if_flags & IFF_BROADCAST) && 1270 ifa->ifa_broadaddr && 1271 /* IPv6 doesn't have broadcast */ 1272 ifa->ifa_broadaddr->sa_len != 0 && 1273 sa_equal(ifa->ifa_broadaddr, addr)) 1274 return (ifa); 1275 } 1276 } 1277 return (NULL); 1278 } 1279 /* 1280 * Locate the point to point interface with a given destination address. 1281 */ 1282 struct ifaddr * 1283 ifa_ifwithdstaddr(struct sockaddr *addr) 1284 { 1285 const struct ifnet_array *arr; 1286 int i; 1287 1288 arr = ifnet_array_get(); 1289 for (i = 0; i < arr->ifnet_count; ++i) { 1290 struct ifnet *ifp = arr->ifnet_arr[i]; 1291 struct ifaddr_container *ifac; 1292 1293 if (!(ifp->if_flags & IFF_POINTOPOINT)) 1294 continue; 1295 1296 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1297 struct ifaddr *ifa = ifac->ifa; 1298 1299 if (ifa->ifa_addr->sa_family != addr->sa_family) 1300 continue; 1301 if (ifa->ifa_dstaddr && 1302 sa_equal(addr, ifa->ifa_dstaddr)) 1303 return (ifa); 1304 } 1305 } 1306 return (NULL); 1307 } 1308 1309 /* 1310 * Find an interface on a specific network. If many, choice 1311 * is most specific found. 1312 */ 1313 struct ifaddr * 1314 ifa_ifwithnet(struct sockaddr *addr) 1315 { 1316 struct ifaddr *ifa_maybe = NULL; 1317 u_int af = addr->sa_family; 1318 char *addr_data = addr->sa_data, *cplim; 1319 const struct ifnet_array *arr; 1320 int i; 1321 1322 /* 1323 * AF_LINK addresses can be looked up directly by their index number, 1324 * so do that if we can. 1325 */ 1326 if (af == AF_LINK) { 1327 struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr; 1328 1329 if (sdl->sdl_index && sdl->sdl_index <= if_index) 1330 return (ifindex2ifnet[sdl->sdl_index]->if_lladdr); 1331 } 1332 1333 /* 1334 * Scan though each interface, looking for ones that have 1335 * addresses in this address family. 1336 */ 1337 arr = ifnet_array_get(); 1338 for (i = 0; i < arr->ifnet_count; ++i) { 1339 struct ifnet *ifp = arr->ifnet_arr[i]; 1340 struct ifaddr_container *ifac; 1341 1342 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1343 struct ifaddr *ifa = ifac->ifa; 1344 char *cp, *cp2, *cp3; 1345 1346 if (ifa->ifa_addr->sa_family != af) 1347 next: continue; 1348 if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) { 1349 /* 1350 * This is a bit broken as it doesn't 1351 * take into account that the remote end may 1352 * be a single node in the network we are 1353 * looking for. 1354 * The trouble is that we don't know the 1355 * netmask for the remote end. 1356 */ 1357 if (ifa->ifa_dstaddr != NULL && 1358 sa_equal(addr, ifa->ifa_dstaddr)) 1359 return (ifa); 1360 } else { 1361 /* 1362 * if we have a special address handler, 1363 * then use it instead of the generic one. 1364 */ 1365 if (ifa->ifa_claim_addr) { 1366 if ((*ifa->ifa_claim_addr)(ifa, addr)) { 1367 return (ifa); 1368 } else { 1369 continue; 1370 } 1371 } 1372 1373 /* 1374 * Scan all the bits in the ifa's address. 1375 * If a bit dissagrees with what we are 1376 * looking for, mask it with the netmask 1377 * to see if it really matters. 1378 * (A byte at a time) 1379 */ 1380 if (ifa->ifa_netmask == 0) 1381 continue; 1382 cp = addr_data; 1383 cp2 = ifa->ifa_addr->sa_data; 1384 cp3 = ifa->ifa_netmask->sa_data; 1385 cplim = ifa->ifa_netmask->sa_len + 1386 (char *)ifa->ifa_netmask; 1387 while (cp3 < cplim) 1388 if ((*cp++ ^ *cp2++) & *cp3++) 1389 goto next; /* next address! */ 1390 /* 1391 * If the netmask of what we just found 1392 * is more specific than what we had before 1393 * (if we had one) then remember the new one 1394 * before continuing to search 1395 * for an even better one. 1396 */ 1397 if (ifa_maybe == NULL || 1398 rn_refines((char *)ifa->ifa_netmask, 1399 (char *)ifa_maybe->ifa_netmask)) 1400 ifa_maybe = ifa; 1401 } 1402 } 1403 } 1404 return (ifa_maybe); 1405 } 1406 1407 /* 1408 * Find an interface address specific to an interface best matching 1409 * a given address. 1410 */ 1411 struct ifaddr * 1412 ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp) 1413 { 1414 struct ifaddr_container *ifac; 1415 char *cp, *cp2, *cp3; 1416 char *cplim; 1417 struct ifaddr *ifa_maybe = NULL; 1418 u_int af = addr->sa_family; 1419 1420 if (af >= AF_MAX) 1421 return (0); 1422 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1423 struct ifaddr *ifa = ifac->ifa; 1424 1425 if (ifa->ifa_addr->sa_family != af) 1426 continue; 1427 if (ifa_maybe == NULL) 1428 ifa_maybe = ifa; 1429 if (ifa->ifa_netmask == NULL) { 1430 if (sa_equal(addr, ifa->ifa_addr) || 1431 (ifa->ifa_dstaddr != NULL && 1432 sa_equal(addr, ifa->ifa_dstaddr))) 1433 return (ifa); 1434 continue; 1435 } 1436 if (ifp->if_flags & IFF_POINTOPOINT) { 1437 if (sa_equal(addr, ifa->ifa_dstaddr)) 1438 return (ifa); 1439 } else { 1440 cp = addr->sa_data; 1441 cp2 = ifa->ifa_addr->sa_data; 1442 cp3 = ifa->ifa_netmask->sa_data; 1443 cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; 1444 for (; cp3 < cplim; cp3++) 1445 if ((*cp++ ^ *cp2++) & *cp3) 1446 break; 1447 if (cp3 == cplim) 1448 return (ifa); 1449 } 1450 } 1451 return (ifa_maybe); 1452 } 1453 1454 /* 1455 * Default action when installing a route with a Link Level gateway. 1456 * Lookup an appropriate real ifa to point to. 1457 * This should be moved to /sys/net/link.c eventually. 1458 */ 1459 static void 1460 link_rtrequest(int cmd, struct rtentry *rt) 1461 { 1462 struct ifaddr *ifa; 1463 struct sockaddr *dst; 1464 struct ifnet *ifp; 1465 1466 if (cmd != RTM_ADD || (ifa = rt->rt_ifa) == NULL || 1467 (ifp = ifa->ifa_ifp) == NULL || (dst = rt_key(rt)) == NULL) 1468 return; 1469 ifa = ifaof_ifpforaddr(dst, ifp); 1470 if (ifa != NULL) { 1471 IFAFREE(rt->rt_ifa); 1472 IFAREF(ifa); 1473 rt->rt_ifa = ifa; 1474 if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) 1475 ifa->ifa_rtrequest(cmd, rt); 1476 } 1477 } 1478 1479 struct netmsg_ifroute { 1480 struct netmsg_base base; 1481 struct ifnet *ifp; 1482 int flag; 1483 int fam; 1484 }; 1485 1486 /* 1487 * Mark an interface down and notify protocols of the transition. 1488 */ 1489 static void 1490 if_unroute_dispatch(netmsg_t nmsg) 1491 { 1492 struct netmsg_ifroute *msg = (struct netmsg_ifroute *)nmsg; 1493 struct ifnet *ifp = msg->ifp; 1494 int flag = msg->flag, fam = msg->fam; 1495 struct ifaddr_container *ifac; 1496 1497 ifp->if_flags &= ~flag; 1498 getmicrotime(&ifp->if_lastchange); 1499 /* 1500 * The ifaddr processing in the following loop will block, 1501 * however, this function is called in netisr0, in which 1502 * ifaddr list changes happen, so we don't care about the 1503 * blockness of the ifaddr processing here. 1504 */ 1505 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1506 struct ifaddr *ifa = ifac->ifa; 1507 1508 /* Ignore marker */ 1509 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 1510 continue; 1511 1512 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) 1513 kpfctlinput(PRC_IFDOWN, ifa->ifa_addr); 1514 } 1515 ifq_purge_all(&ifp->if_snd); 1516 rt_ifmsg(ifp); 1517 1518 lwkt_replymsg(&nmsg->lmsg, 0); 1519 } 1520 1521 void 1522 if_unroute(struct ifnet *ifp, int flag, int fam) 1523 { 1524 struct netmsg_ifroute msg; 1525 1526 ASSERT_CANDOMSG_NETISR0(curthread); 1527 1528 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, 1529 if_unroute_dispatch); 1530 msg.ifp = ifp; 1531 msg.flag = flag; 1532 msg.fam = fam; 1533 lwkt_domsg(netisr_cpuport(0), &msg.base.lmsg, 0); 1534 } 1535 1536 /* 1537 * Mark an interface up and notify protocols of the transition. 1538 */ 1539 static void 1540 if_route_dispatch(netmsg_t nmsg) 1541 { 1542 struct netmsg_ifroute *msg = (struct netmsg_ifroute *)nmsg; 1543 struct ifnet *ifp = msg->ifp; 1544 int flag = msg->flag, fam = msg->fam; 1545 struct ifaddr_container *ifac; 1546 1547 ifq_purge_all(&ifp->if_snd); 1548 ifp->if_flags |= flag; 1549 getmicrotime(&ifp->if_lastchange); 1550 /* 1551 * The ifaddr processing in the following loop will block, 1552 * however, this function is called in netisr0, in which 1553 * ifaddr list changes happen, so we don't care about the 1554 * blockness of the ifaddr processing here. 1555 */ 1556 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1557 struct ifaddr *ifa = ifac->ifa; 1558 1559 /* Ignore marker */ 1560 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 1561 continue; 1562 1563 if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) 1564 kpfctlinput(PRC_IFUP, ifa->ifa_addr); 1565 } 1566 rt_ifmsg(ifp); 1567 #ifdef INET6 1568 in6_if_up(ifp); 1569 #endif 1570 1571 lwkt_replymsg(&nmsg->lmsg, 0); 1572 } 1573 1574 void 1575 if_route(struct ifnet *ifp, int flag, int fam) 1576 { 1577 struct netmsg_ifroute msg; 1578 1579 ASSERT_CANDOMSG_NETISR0(curthread); 1580 1581 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 0, 1582 if_route_dispatch); 1583 msg.ifp = ifp; 1584 msg.flag = flag; 1585 msg.fam = fam; 1586 lwkt_domsg(netisr_cpuport(0), &msg.base.lmsg, 0); 1587 } 1588 1589 /* 1590 * Mark an interface down and notify protocols of the transition. An 1591 * interface going down is also considered to be a synchronizing event. 1592 * We must ensure that all packet processing related to the interface 1593 * has completed before we return so e.g. the caller can free the ifnet 1594 * structure that the mbufs may be referencing. 1595 * 1596 * NOTE: must be called at splnet or eqivalent. 1597 */ 1598 void 1599 if_down(struct ifnet *ifp) 1600 { 1601 if_unroute(ifp, IFF_UP, AF_UNSPEC); 1602 netmsg_service_sync(); 1603 } 1604 1605 /* 1606 * Mark an interface up and notify protocols of 1607 * the transition. 1608 * NOTE: must be called at splnet or eqivalent. 1609 */ 1610 void 1611 if_up(struct ifnet *ifp) 1612 { 1613 if_route(ifp, IFF_UP, AF_UNSPEC); 1614 } 1615 1616 /* 1617 * Process a link state change. 1618 * NOTE: must be called at splsoftnet or equivalent. 1619 */ 1620 void 1621 if_link_state_change(struct ifnet *ifp) 1622 { 1623 int link_state = ifp->if_link_state; 1624 1625 rt_ifmsg(ifp); 1626 devctl_notify("IFNET", ifp->if_xname, 1627 (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); 1628 } 1629 1630 /* 1631 * Handle interface watchdog timer routines. Called 1632 * from softclock, we decrement timers (if set) and 1633 * call the appropriate interface routine on expiration. 1634 */ 1635 static void 1636 if_slowtimo_dispatch(netmsg_t nmsg) 1637 { 1638 struct globaldata *gd = mycpu; 1639 const struct ifnet_array *arr; 1640 int i; 1641 1642 KASSERT(&curthread->td_msgport == netisr_cpuport(0), 1643 ("not in netisr0")); 1644 1645 crit_enter_gd(gd); 1646 lwkt_replymsg(&nmsg->lmsg, 0); /* reply ASAP */ 1647 crit_exit_gd(gd); 1648 1649 arr = ifnet_array_get(); 1650 for (i = 0; i < arr->ifnet_count; ++i) { 1651 struct ifnet *ifp = arr->ifnet_arr[i]; 1652 1653 crit_enter_gd(gd); 1654 1655 if (if_stats_compat) { 1656 IFNET_STAT_GET(ifp, ipackets, ifp->if_ipackets); 1657 IFNET_STAT_GET(ifp, ierrors, ifp->if_ierrors); 1658 IFNET_STAT_GET(ifp, opackets, ifp->if_opackets); 1659 IFNET_STAT_GET(ifp, oerrors, ifp->if_oerrors); 1660 IFNET_STAT_GET(ifp, collisions, ifp->if_collisions); 1661 IFNET_STAT_GET(ifp, ibytes, ifp->if_ibytes); 1662 IFNET_STAT_GET(ifp, obytes, ifp->if_obytes); 1663 IFNET_STAT_GET(ifp, imcasts, ifp->if_imcasts); 1664 IFNET_STAT_GET(ifp, omcasts, ifp->if_omcasts); 1665 IFNET_STAT_GET(ifp, iqdrops, ifp->if_iqdrops); 1666 IFNET_STAT_GET(ifp, noproto, ifp->if_noproto); 1667 } 1668 1669 if (ifp->if_timer == 0 || --ifp->if_timer) { 1670 crit_exit_gd(gd); 1671 continue; 1672 } 1673 if (ifp->if_watchdog) { 1674 if (ifnet_tryserialize_all(ifp)) { 1675 (*ifp->if_watchdog)(ifp); 1676 ifnet_deserialize_all(ifp); 1677 } else { 1678 /* try again next timeout */ 1679 ++ifp->if_timer; 1680 } 1681 } 1682 1683 crit_exit_gd(gd); 1684 } 1685 1686 callout_reset(&if_slowtimo_timer, hz / IFNET_SLOWHZ, if_slowtimo, NULL); 1687 } 1688 1689 static void 1690 if_slowtimo(void *arg __unused) 1691 { 1692 struct lwkt_msg *lmsg = &if_slowtimo_netmsg.lmsg; 1693 1694 KASSERT(mycpuid == 0, ("not on cpu0")); 1695 crit_enter(); 1696 if (lmsg->ms_flags & MSGF_DONE) 1697 lwkt_sendmsg_oncpu(netisr_cpuport(0), lmsg); 1698 crit_exit(); 1699 } 1700 1701 /* 1702 * Map interface name to 1703 * interface structure pointer. 1704 */ 1705 struct ifnet * 1706 ifunit(const char *name) 1707 { 1708 struct ifnet *ifp; 1709 1710 /* 1711 * Search all the interfaces for this name/number 1712 */ 1713 KASSERT(mtx_owned(&ifnet_mtx), ("ifnet is not locked")); 1714 1715 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 1716 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1717 break; 1718 } 1719 return (ifp); 1720 } 1721 1722 struct ifnet * 1723 ifunit_netisr(const char *name) 1724 { 1725 const struct ifnet_array *arr; 1726 int i; 1727 1728 /* 1729 * Search all the interfaces for this name/number 1730 */ 1731 1732 arr = ifnet_array_get(); 1733 for (i = 0; i < arr->ifnet_count; ++i) { 1734 struct ifnet *ifp = arr->ifnet_arr[i]; 1735 1736 if (strncmp(ifp->if_xname, name, IFNAMSIZ) == 0) 1737 return ifp; 1738 } 1739 return NULL; 1740 } 1741 1742 /* 1743 * Interface ioctls. 1744 */ 1745 int 1746 ifioctl(struct socket *so, u_long cmd, caddr_t data, struct ucred *cred) 1747 { 1748 struct ifnet *ifp; 1749 struct ifreq *ifr; 1750 struct ifstat *ifs; 1751 int error; 1752 short oif_flags; 1753 int new_flags; 1754 #ifdef COMPAT_43 1755 int ocmd; 1756 #endif 1757 size_t namelen, onamelen; 1758 char new_name[IFNAMSIZ]; 1759 struct ifaddr *ifa; 1760 struct sockaddr_dl *sdl; 1761 1762 switch (cmd) { 1763 case SIOCGIFCONF: 1764 case OSIOCGIFCONF: 1765 return (ifconf(cmd, data, cred)); 1766 default: 1767 break; 1768 } 1769 1770 ifr = (struct ifreq *)data; 1771 1772 switch (cmd) { 1773 case SIOCIFCREATE: 1774 case SIOCIFCREATE2: 1775 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 1776 return (error); 1777 return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), 1778 cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL)); 1779 case SIOCIFDESTROY: 1780 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 1781 return (error); 1782 return (if_clone_destroy(ifr->ifr_name)); 1783 case SIOCIFGCLONERS: 1784 return (if_clone_list((struct if_clonereq *)data)); 1785 default: 1786 break; 1787 } 1788 1789 /* 1790 * Nominal ioctl through interface, lookup the ifp and obtain a 1791 * lock to serialize the ifconfig ioctl operation. 1792 */ 1793 ifnet_lock(); 1794 1795 ifp = ifunit(ifr->ifr_name); 1796 if (ifp == NULL) { 1797 ifnet_unlock(); 1798 return (ENXIO); 1799 } 1800 error = 0; 1801 1802 switch (cmd) { 1803 case SIOCGIFINDEX: 1804 ifr->ifr_index = ifp->if_index; 1805 break; 1806 1807 case SIOCGIFFLAGS: 1808 ifr->ifr_flags = ifp->if_flags; 1809 ifr->ifr_flagshigh = ifp->if_flags >> 16; 1810 break; 1811 1812 case SIOCGIFCAP: 1813 ifr->ifr_reqcap = ifp->if_capabilities; 1814 ifr->ifr_curcap = ifp->if_capenable; 1815 break; 1816 1817 case SIOCGIFMETRIC: 1818 ifr->ifr_metric = ifp->if_metric; 1819 break; 1820 1821 case SIOCGIFMTU: 1822 ifr->ifr_mtu = ifp->if_mtu; 1823 break; 1824 1825 case SIOCGIFTSOLEN: 1826 ifr->ifr_tsolen = ifp->if_tsolen; 1827 break; 1828 1829 case SIOCGIFDATA: 1830 error = copyout((caddr_t)&ifp->if_data, ifr->ifr_data, 1831 sizeof(ifp->if_data)); 1832 break; 1833 1834 case SIOCGIFPHYS: 1835 ifr->ifr_phys = ifp->if_physical; 1836 break; 1837 1838 case SIOCGIFPOLLCPU: 1839 ifr->ifr_pollcpu = -1; 1840 break; 1841 1842 case SIOCSIFPOLLCPU: 1843 break; 1844 1845 case SIOCSIFFLAGS: 1846 error = priv_check_cred(cred, PRIV_ROOT, 0); 1847 if (error) 1848 break; 1849 new_flags = (ifr->ifr_flags & 0xffff) | 1850 (ifr->ifr_flagshigh << 16); 1851 if (ifp->if_flags & IFF_SMART) { 1852 /* Smart drivers twiddle their own routes */ 1853 } else if (ifp->if_flags & IFF_UP && 1854 (new_flags & IFF_UP) == 0) { 1855 crit_enter(); 1856 if_down(ifp); 1857 crit_exit(); 1858 } else if (new_flags & IFF_UP && 1859 (ifp->if_flags & IFF_UP) == 0) { 1860 crit_enter(); 1861 if_up(ifp); 1862 crit_exit(); 1863 } 1864 1865 #ifdef IFPOLL_ENABLE 1866 if ((new_flags ^ ifp->if_flags) & IFF_NPOLLING) { 1867 if (new_flags & IFF_NPOLLING) 1868 ifpoll_register(ifp); 1869 else 1870 ifpoll_deregister(ifp); 1871 } 1872 #endif 1873 1874 ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | 1875 (new_flags &~ IFF_CANTCHANGE); 1876 if (new_flags & IFF_PPROMISC) { 1877 /* Permanently promiscuous mode requested */ 1878 ifp->if_flags |= IFF_PROMISC; 1879 } else if (ifp->if_pcount == 0) { 1880 ifp->if_flags &= ~IFF_PROMISC; 1881 } 1882 if (ifp->if_ioctl) { 1883 ifnet_serialize_all(ifp); 1884 ifp->if_ioctl(ifp, cmd, data, cred); 1885 ifnet_deserialize_all(ifp); 1886 } 1887 getmicrotime(&ifp->if_lastchange); 1888 break; 1889 1890 case SIOCSIFCAP: 1891 error = priv_check_cred(cred, PRIV_ROOT, 0); 1892 if (error) 1893 break; 1894 if (ifr->ifr_reqcap & ~ifp->if_capabilities) { 1895 error = EINVAL; 1896 break; 1897 } 1898 ifnet_serialize_all(ifp); 1899 ifp->if_ioctl(ifp, cmd, data, cred); 1900 ifnet_deserialize_all(ifp); 1901 break; 1902 1903 case SIOCSIFNAME: 1904 error = priv_check_cred(cred, PRIV_ROOT, 0); 1905 if (error) 1906 break; 1907 error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL); 1908 if (error) 1909 break; 1910 if (new_name[0] == '\0') { 1911 error = EINVAL; 1912 break; 1913 } 1914 if (ifunit(new_name) != NULL) { 1915 error = EEXIST; 1916 break; 1917 } 1918 1919 EVENTHANDLER_INVOKE(ifnet_detach_event, ifp); 1920 1921 /* Announce the departure of the interface. */ 1922 rt_ifannouncemsg(ifp, IFAN_DEPARTURE); 1923 1924 strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); 1925 ifa = TAILQ_FIRST(&ifp->if_addrheads[mycpuid])->ifa; 1926 sdl = (struct sockaddr_dl *)ifa->ifa_addr; 1927 namelen = strlen(new_name); 1928 onamelen = sdl->sdl_nlen; 1929 /* 1930 * Move the address if needed. This is safe because we 1931 * allocate space for a name of length IFNAMSIZ when we 1932 * create this in if_attach(). 1933 */ 1934 if (namelen != onamelen) { 1935 bcopy(sdl->sdl_data + onamelen, 1936 sdl->sdl_data + namelen, sdl->sdl_alen); 1937 } 1938 bcopy(new_name, sdl->sdl_data, namelen); 1939 sdl->sdl_nlen = namelen; 1940 sdl = (struct sockaddr_dl *)ifa->ifa_netmask; 1941 bzero(sdl->sdl_data, onamelen); 1942 while (namelen != 0) 1943 sdl->sdl_data[--namelen] = 0xff; 1944 1945 EVENTHANDLER_INVOKE(ifnet_attach_event, ifp); 1946 1947 /* Announce the return of the interface. */ 1948 rt_ifannouncemsg(ifp, IFAN_ARRIVAL); 1949 break; 1950 1951 case SIOCSIFMETRIC: 1952 error = priv_check_cred(cred, PRIV_ROOT, 0); 1953 if (error) 1954 break; 1955 ifp->if_metric = ifr->ifr_metric; 1956 getmicrotime(&ifp->if_lastchange); 1957 break; 1958 1959 case SIOCSIFPHYS: 1960 error = priv_check_cred(cred, PRIV_ROOT, 0); 1961 if (error) 1962 break; 1963 if (ifp->if_ioctl == NULL) { 1964 error = EOPNOTSUPP; 1965 break; 1966 } 1967 ifnet_serialize_all(ifp); 1968 error = ifp->if_ioctl(ifp, cmd, data, cred); 1969 ifnet_deserialize_all(ifp); 1970 if (error == 0) 1971 getmicrotime(&ifp->if_lastchange); 1972 break; 1973 1974 case SIOCSIFMTU: 1975 { 1976 u_long oldmtu = ifp->if_mtu; 1977 1978 error = priv_check_cred(cred, PRIV_ROOT, 0); 1979 if (error) 1980 break; 1981 if (ifp->if_ioctl == NULL) { 1982 error = EOPNOTSUPP; 1983 break; 1984 } 1985 if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) { 1986 error = EINVAL; 1987 break; 1988 } 1989 ifnet_serialize_all(ifp); 1990 error = ifp->if_ioctl(ifp, cmd, data, cred); 1991 ifnet_deserialize_all(ifp); 1992 if (error == 0) { 1993 getmicrotime(&ifp->if_lastchange); 1994 rt_ifmsg(ifp); 1995 } 1996 /* 1997 * If the link MTU changed, do network layer specific procedure. 1998 */ 1999 if (ifp->if_mtu != oldmtu) { 2000 #ifdef INET6 2001 nd6_setmtu(ifp); 2002 #endif 2003 } 2004 break; 2005 } 2006 2007 case SIOCSIFTSOLEN: 2008 error = priv_check_cred(cred, PRIV_ROOT, 0); 2009 if (error) 2010 break; 2011 2012 /* XXX need driver supplied upper limit */ 2013 if (ifr->ifr_tsolen <= 0) { 2014 error = EINVAL; 2015 break; 2016 } 2017 ifp->if_tsolen = ifr->ifr_tsolen; 2018 break; 2019 2020 case SIOCADDMULTI: 2021 case SIOCDELMULTI: 2022 error = priv_check_cred(cred, PRIV_ROOT, 0); 2023 if (error) 2024 break; 2025 2026 /* Don't allow group membership on non-multicast interfaces. */ 2027 if ((ifp->if_flags & IFF_MULTICAST) == 0) { 2028 error = EOPNOTSUPP; 2029 break; 2030 } 2031 2032 /* Don't let users screw up protocols' entries. */ 2033 if (ifr->ifr_addr.sa_family != AF_LINK) { 2034 error = EINVAL; 2035 break; 2036 } 2037 2038 if (cmd == SIOCADDMULTI) { 2039 struct ifmultiaddr *ifma; 2040 error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); 2041 } else { 2042 error = if_delmulti(ifp, &ifr->ifr_addr); 2043 } 2044 if (error == 0) 2045 getmicrotime(&ifp->if_lastchange); 2046 break; 2047 2048 case SIOCSIFPHYADDR: 2049 case SIOCDIFPHYADDR: 2050 #ifdef INET6 2051 case SIOCSIFPHYADDR_IN6: 2052 #endif 2053 case SIOCSLIFPHYADDR: 2054 case SIOCSIFMEDIA: 2055 case SIOCSIFGENERIC: 2056 error = priv_check_cred(cred, PRIV_ROOT, 0); 2057 if (error) 2058 break; 2059 if (ifp->if_ioctl == 0) { 2060 error = EOPNOTSUPP; 2061 break; 2062 } 2063 ifnet_serialize_all(ifp); 2064 error = ifp->if_ioctl(ifp, cmd, data, cred); 2065 ifnet_deserialize_all(ifp); 2066 if (error == 0) 2067 getmicrotime(&ifp->if_lastchange); 2068 break; 2069 2070 case SIOCGIFSTATUS: 2071 ifs = (struct ifstat *)data; 2072 ifs->ascii[0] = '\0'; 2073 /* fall through */ 2074 case SIOCGIFPSRCADDR: 2075 case SIOCGIFPDSTADDR: 2076 case SIOCGLIFPHYADDR: 2077 case SIOCGIFMEDIA: 2078 case SIOCGIFGENERIC: 2079 if (ifp->if_ioctl == NULL) { 2080 error = EOPNOTSUPP; 2081 break; 2082 } 2083 ifnet_serialize_all(ifp); 2084 error = ifp->if_ioctl(ifp, cmd, data, cred); 2085 ifnet_deserialize_all(ifp); 2086 break; 2087 2088 case SIOCSIFLLADDR: 2089 error = priv_check_cred(cred, PRIV_ROOT, 0); 2090 if (error) 2091 break; 2092 error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, 2093 ifr->ifr_addr.sa_len); 2094 EVENTHANDLER_INVOKE(iflladdr_event, ifp); 2095 break; 2096 2097 default: 2098 oif_flags = ifp->if_flags; 2099 if (so->so_proto == 0) { 2100 error = EOPNOTSUPP; 2101 break; 2102 } 2103 #ifndef COMPAT_43 2104 error = so_pru_control_direct(so, cmd, data, ifp); 2105 #else 2106 ocmd = cmd; 2107 2108 switch (cmd) { 2109 case SIOCSIFDSTADDR: 2110 case SIOCSIFADDR: 2111 case SIOCSIFBRDADDR: 2112 case SIOCSIFNETMASK: 2113 #if BYTE_ORDER != BIG_ENDIAN 2114 if (ifr->ifr_addr.sa_family == 0 && 2115 ifr->ifr_addr.sa_len < 16) { 2116 ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len; 2117 ifr->ifr_addr.sa_len = 16; 2118 } 2119 #else 2120 if (ifr->ifr_addr.sa_len == 0) 2121 ifr->ifr_addr.sa_len = 16; 2122 #endif 2123 break; 2124 case OSIOCGIFADDR: 2125 cmd = SIOCGIFADDR; 2126 break; 2127 case OSIOCGIFDSTADDR: 2128 cmd = SIOCGIFDSTADDR; 2129 break; 2130 case OSIOCGIFBRDADDR: 2131 cmd = SIOCGIFBRDADDR; 2132 break; 2133 case OSIOCGIFNETMASK: 2134 cmd = SIOCGIFNETMASK; 2135 break; 2136 default: 2137 break; 2138 } 2139 2140 error = so_pru_control_direct(so, cmd, data, ifp); 2141 2142 switch (ocmd) { 2143 case OSIOCGIFADDR: 2144 case OSIOCGIFDSTADDR: 2145 case OSIOCGIFBRDADDR: 2146 case OSIOCGIFNETMASK: 2147 *(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family; 2148 break; 2149 } 2150 #endif /* COMPAT_43 */ 2151 2152 if ((oif_flags ^ ifp->if_flags) & IFF_UP) { 2153 #ifdef INET6 2154 DELAY(100);/* XXX: temporary workaround for fxp issue*/ 2155 if (ifp->if_flags & IFF_UP) { 2156 crit_enter(); 2157 in6_if_up(ifp); 2158 crit_exit(); 2159 } 2160 #endif 2161 } 2162 break; 2163 } 2164 2165 ifnet_unlock(); 2166 return (error); 2167 } 2168 2169 /* 2170 * Set/clear promiscuous mode on interface ifp based on the truth value 2171 * of pswitch. The calls are reference counted so that only the first 2172 * "on" request actually has an effect, as does the final "off" request. 2173 * Results are undefined if the "off" and "on" requests are not matched. 2174 */ 2175 int 2176 ifpromisc(struct ifnet *ifp, int pswitch) 2177 { 2178 struct ifreq ifr; 2179 int error; 2180 int oldflags; 2181 2182 oldflags = ifp->if_flags; 2183 if (ifp->if_flags & IFF_PPROMISC) { 2184 /* Do nothing if device is in permanently promiscuous mode */ 2185 ifp->if_pcount += pswitch ? 1 : -1; 2186 return (0); 2187 } 2188 if (pswitch) { 2189 /* 2190 * If the device is not configured up, we cannot put it in 2191 * promiscuous mode. 2192 */ 2193 if ((ifp->if_flags & IFF_UP) == 0) 2194 return (ENETDOWN); 2195 if (ifp->if_pcount++ != 0) 2196 return (0); 2197 ifp->if_flags |= IFF_PROMISC; 2198 log(LOG_INFO, "%s: promiscuous mode enabled\n", 2199 ifp->if_xname); 2200 } else { 2201 if (--ifp->if_pcount > 0) 2202 return (0); 2203 ifp->if_flags &= ~IFF_PROMISC; 2204 log(LOG_INFO, "%s: promiscuous mode disabled\n", 2205 ifp->if_xname); 2206 } 2207 ifr.ifr_flags = ifp->if_flags; 2208 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2209 ifnet_serialize_all(ifp); 2210 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, NULL); 2211 ifnet_deserialize_all(ifp); 2212 if (error == 0) 2213 rt_ifmsg(ifp); 2214 else 2215 ifp->if_flags = oldflags; 2216 return error; 2217 } 2218 2219 /* 2220 * Return interface configuration 2221 * of system. List may be used 2222 * in later ioctl's (above) to get 2223 * other information. 2224 */ 2225 static int 2226 ifconf(u_long cmd, caddr_t data, struct ucred *cred) 2227 { 2228 struct ifconf *ifc = (struct ifconf *)data; 2229 struct ifnet *ifp; 2230 struct sockaddr *sa; 2231 struct ifreq ifr, *ifrp; 2232 int space = ifc->ifc_len, error = 0; 2233 2234 ifrp = ifc->ifc_req; 2235 2236 ifnet_lock(); 2237 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 2238 struct ifaddr_container *ifac, *ifac_mark; 2239 struct ifaddr_marker mark; 2240 struct ifaddrhead *head; 2241 int addrs; 2242 2243 if (space <= sizeof ifr) 2244 break; 2245 2246 /* 2247 * Zero the stack declared structure first to prevent 2248 * memory disclosure. 2249 */ 2250 bzero(&ifr, sizeof(ifr)); 2251 if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) 2252 >= sizeof(ifr.ifr_name)) { 2253 error = ENAMETOOLONG; 2254 break; 2255 } 2256 2257 /* 2258 * Add a marker, since copyout() could block and during that 2259 * period the list could be changed. Inserting the marker to 2260 * the header of the list will not cause trouble for the code 2261 * assuming that the first element of the list is AF_LINK; the 2262 * marker will be moved to the next position w/o blocking. 2263 */ 2264 ifa_marker_init(&mark, ifp); 2265 ifac_mark = &mark.ifac; 2266 head = &ifp->if_addrheads[mycpuid]; 2267 2268 addrs = 0; 2269 TAILQ_INSERT_HEAD(head, ifac_mark, ifa_link); 2270 while ((ifac = TAILQ_NEXT(ifac_mark, ifa_link)) != NULL) { 2271 struct ifaddr *ifa = ifac->ifa; 2272 2273 TAILQ_REMOVE(head, ifac_mark, ifa_link); 2274 TAILQ_INSERT_AFTER(head, ifac, ifac_mark, ifa_link); 2275 2276 /* Ignore marker */ 2277 if (ifa->ifa_addr->sa_family == AF_UNSPEC) 2278 continue; 2279 2280 if (space <= sizeof ifr) 2281 break; 2282 sa = ifa->ifa_addr; 2283 if (cred->cr_prison && 2284 prison_if(cred, sa)) 2285 continue; 2286 addrs++; 2287 /* 2288 * Keep a reference on this ifaddr, so that it will 2289 * not be destroyed when its address is copied to 2290 * the userland, which could block. 2291 */ 2292 IFAREF(ifa); 2293 #ifdef COMPAT_43 2294 if (cmd == OSIOCGIFCONF) { 2295 struct osockaddr *osa = 2296 (struct osockaddr *)&ifr.ifr_addr; 2297 ifr.ifr_addr = *sa; 2298 osa->sa_family = sa->sa_family; 2299 error = copyout(&ifr, ifrp, sizeof ifr); 2300 ifrp++; 2301 } else 2302 #endif 2303 if (sa->sa_len <= sizeof(*sa)) { 2304 ifr.ifr_addr = *sa; 2305 error = copyout(&ifr, ifrp, sizeof ifr); 2306 ifrp++; 2307 } else { 2308 if (space < (sizeof ifr) + sa->sa_len - 2309 sizeof(*sa)) { 2310 IFAFREE(ifa); 2311 break; 2312 } 2313 space -= sa->sa_len - sizeof(*sa); 2314 error = copyout(&ifr, ifrp, 2315 sizeof ifr.ifr_name); 2316 if (error == 0) 2317 error = copyout(sa, &ifrp->ifr_addr, 2318 sa->sa_len); 2319 ifrp = (struct ifreq *) 2320 (sa->sa_len + (caddr_t)&ifrp->ifr_addr); 2321 } 2322 IFAFREE(ifa); 2323 if (error) 2324 break; 2325 space -= sizeof ifr; 2326 } 2327 TAILQ_REMOVE(head, ifac_mark, ifa_link); 2328 if (error) 2329 break; 2330 if (!addrs) { 2331 bzero(&ifr.ifr_addr, sizeof ifr.ifr_addr); 2332 error = copyout(&ifr, ifrp, sizeof ifr); 2333 if (error) 2334 break; 2335 space -= sizeof ifr; 2336 ifrp++; 2337 } 2338 } 2339 ifnet_unlock(); 2340 2341 ifc->ifc_len -= space; 2342 return (error); 2343 } 2344 2345 /* 2346 * Just like if_promisc(), but for all-multicast-reception mode. 2347 */ 2348 int 2349 if_allmulti(struct ifnet *ifp, int onswitch) 2350 { 2351 int error = 0; 2352 struct ifreq ifr; 2353 2354 crit_enter(); 2355 2356 if (onswitch) { 2357 if (ifp->if_amcount++ == 0) { 2358 ifp->if_flags |= IFF_ALLMULTI; 2359 ifr.ifr_flags = ifp->if_flags; 2360 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2361 ifnet_serialize_all(ifp); 2362 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2363 NULL); 2364 ifnet_deserialize_all(ifp); 2365 } 2366 } else { 2367 if (ifp->if_amcount > 1) { 2368 ifp->if_amcount--; 2369 } else { 2370 ifp->if_amcount = 0; 2371 ifp->if_flags &= ~IFF_ALLMULTI; 2372 ifr.ifr_flags = ifp->if_flags; 2373 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2374 ifnet_serialize_all(ifp); 2375 error = ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2376 NULL); 2377 ifnet_deserialize_all(ifp); 2378 } 2379 } 2380 2381 crit_exit(); 2382 2383 if (error == 0) 2384 rt_ifmsg(ifp); 2385 return error; 2386 } 2387 2388 /* 2389 * Add a multicast listenership to the interface in question. 2390 * The link layer provides a routine which converts 2391 */ 2392 int 2393 if_addmulti_serialized(struct ifnet *ifp, struct sockaddr *sa, 2394 struct ifmultiaddr **retifma) 2395 { 2396 struct sockaddr *llsa, *dupsa; 2397 int error; 2398 struct ifmultiaddr *ifma; 2399 2400 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2401 2402 /* 2403 * If the matching multicast address already exists 2404 * then don't add a new one, just add a reference 2405 */ 2406 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2407 if (sa_equal(sa, ifma->ifma_addr)) { 2408 ifma->ifma_refcount++; 2409 if (retifma) 2410 *retifma = ifma; 2411 return 0; 2412 } 2413 } 2414 2415 /* 2416 * Give the link layer a chance to accept/reject it, and also 2417 * find out which AF_LINK address this maps to, if it isn't one 2418 * already. 2419 */ 2420 if (ifp->if_resolvemulti) { 2421 error = ifp->if_resolvemulti(ifp, &llsa, sa); 2422 if (error) 2423 return error; 2424 } else { 2425 llsa = NULL; 2426 } 2427 2428 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK); 2429 dupsa = kmalloc(sa->sa_len, M_IFMADDR, M_WAITOK); 2430 bcopy(sa, dupsa, sa->sa_len); 2431 2432 ifma->ifma_addr = dupsa; 2433 ifma->ifma_lladdr = llsa; 2434 ifma->ifma_ifp = ifp; 2435 ifma->ifma_refcount = 1; 2436 ifma->ifma_protospec = NULL; 2437 rt_newmaddrmsg(RTM_NEWMADDR, ifma); 2438 2439 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2440 if (retifma) 2441 *retifma = ifma; 2442 2443 if (llsa != NULL) { 2444 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2445 if (sa_equal(ifma->ifma_addr, llsa)) 2446 break; 2447 } 2448 if (ifma) { 2449 ifma->ifma_refcount++; 2450 } else { 2451 ifma = kmalloc(sizeof *ifma, M_IFMADDR, M_WAITOK); 2452 dupsa = kmalloc(llsa->sa_len, M_IFMADDR, M_WAITOK); 2453 bcopy(llsa, dupsa, llsa->sa_len); 2454 ifma->ifma_addr = dupsa; 2455 ifma->ifma_ifp = ifp; 2456 ifma->ifma_refcount = 1; 2457 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); 2458 } 2459 } 2460 /* 2461 * We are certain we have added something, so call down to the 2462 * interface to let them know about it. 2463 */ 2464 if (ifp->if_ioctl) 2465 ifp->if_ioctl(ifp, SIOCADDMULTI, 0, NULL); 2466 2467 return 0; 2468 } 2469 2470 int 2471 if_addmulti(struct ifnet *ifp, struct sockaddr *sa, 2472 struct ifmultiaddr **retifma) 2473 { 2474 int error; 2475 2476 ifnet_serialize_all(ifp); 2477 error = if_addmulti_serialized(ifp, sa, retifma); 2478 ifnet_deserialize_all(ifp); 2479 2480 return error; 2481 } 2482 2483 /* 2484 * Remove a reference to a multicast address on this interface. Yell 2485 * if the request does not match an existing membership. 2486 */ 2487 static int 2488 if_delmulti_serialized(struct ifnet *ifp, struct sockaddr *sa) 2489 { 2490 struct ifmultiaddr *ifma; 2491 2492 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2493 2494 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2495 if (sa_equal(sa, ifma->ifma_addr)) 2496 break; 2497 if (ifma == NULL) 2498 return ENOENT; 2499 2500 if (ifma->ifma_refcount > 1) { 2501 ifma->ifma_refcount--; 2502 return 0; 2503 } 2504 2505 rt_newmaddrmsg(RTM_DELMADDR, ifma); 2506 sa = ifma->ifma_lladdr; 2507 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2508 /* 2509 * Make sure the interface driver is notified 2510 * in the case of a link layer mcast group being left. 2511 */ 2512 if (ifma->ifma_addr->sa_family == AF_LINK && sa == NULL) 2513 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2514 kfree(ifma->ifma_addr, M_IFMADDR); 2515 kfree(ifma, M_IFMADDR); 2516 if (sa == NULL) 2517 return 0; 2518 2519 /* 2520 * Now look for the link-layer address which corresponds to 2521 * this network address. It had been squirreled away in 2522 * ifma->ifma_lladdr for this purpose (so we don't have 2523 * to call ifp->if_resolvemulti() again), and we saved that 2524 * value in sa above. If some nasty deleted the 2525 * link-layer address out from underneath us, we can deal because 2526 * the address we stored was is not the same as the one which was 2527 * in the record for the link-layer address. (So we don't complain 2528 * in that case.) 2529 */ 2530 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2531 if (sa_equal(sa, ifma->ifma_addr)) 2532 break; 2533 if (ifma == NULL) 2534 return 0; 2535 2536 if (ifma->ifma_refcount > 1) { 2537 ifma->ifma_refcount--; 2538 return 0; 2539 } 2540 2541 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 2542 ifp->if_ioctl(ifp, SIOCDELMULTI, 0, NULL); 2543 kfree(ifma->ifma_addr, M_IFMADDR); 2544 kfree(sa, M_IFMADDR); 2545 kfree(ifma, M_IFMADDR); 2546 2547 return 0; 2548 } 2549 2550 int 2551 if_delmulti(struct ifnet *ifp, struct sockaddr *sa) 2552 { 2553 int error; 2554 2555 ifnet_serialize_all(ifp); 2556 error = if_delmulti_serialized(ifp, sa); 2557 ifnet_deserialize_all(ifp); 2558 2559 return error; 2560 } 2561 2562 /* 2563 * Delete all multicast group membership for an interface. 2564 * Should be used to quickly flush all multicast filters. 2565 */ 2566 void 2567 if_delallmulti_serialized(struct ifnet *ifp) 2568 { 2569 struct ifmultiaddr *ifma, mark; 2570 struct sockaddr sa; 2571 2572 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2573 2574 bzero(&sa, sizeof(sa)); 2575 sa.sa_family = AF_UNSPEC; 2576 sa.sa_len = sizeof(sa); 2577 2578 bzero(&mark, sizeof(mark)); 2579 mark.ifma_addr = &sa; 2580 2581 TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, &mark, ifma_link); 2582 while ((ifma = TAILQ_NEXT(&mark, ifma_link)) != NULL) { 2583 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); 2584 TAILQ_INSERT_AFTER(&ifp->if_multiaddrs, ifma, &mark, 2585 ifma_link); 2586 2587 if (ifma->ifma_addr->sa_family == AF_UNSPEC) 2588 continue; 2589 2590 if_delmulti_serialized(ifp, ifma->ifma_addr); 2591 } 2592 TAILQ_REMOVE(&ifp->if_multiaddrs, &mark, ifma_link); 2593 } 2594 2595 2596 /* 2597 * Set the link layer address on an interface. 2598 * 2599 * At this time we only support certain types of interfaces, 2600 * and we don't allow the length of the address to change. 2601 */ 2602 int 2603 if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) 2604 { 2605 struct sockaddr_dl *sdl; 2606 struct ifreq ifr; 2607 2608 sdl = IF_LLSOCKADDR(ifp); 2609 if (sdl == NULL) 2610 return (EINVAL); 2611 if (len != sdl->sdl_alen) /* don't allow length to change */ 2612 return (EINVAL); 2613 switch (ifp->if_type) { 2614 case IFT_ETHER: /* these types use struct arpcom */ 2615 case IFT_XETHER: 2616 case IFT_L2VLAN: 2617 case IFT_IEEE8023ADLAG: 2618 bcopy(lladdr, ((struct arpcom *)ifp->if_softc)->ac_enaddr, len); 2619 bcopy(lladdr, LLADDR(sdl), len); 2620 break; 2621 default: 2622 return (ENODEV); 2623 } 2624 /* 2625 * If the interface is already up, we need 2626 * to re-init it in order to reprogram its 2627 * address filter. 2628 */ 2629 ifnet_serialize_all(ifp); 2630 if ((ifp->if_flags & IFF_UP) != 0) { 2631 #ifdef INET 2632 struct ifaddr_container *ifac; 2633 #endif 2634 2635 ifp->if_flags &= ~IFF_UP; 2636 ifr.ifr_flags = ifp->if_flags; 2637 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2638 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2639 NULL); 2640 ifp->if_flags |= IFF_UP; 2641 ifr.ifr_flags = ifp->if_flags; 2642 ifr.ifr_flagshigh = ifp->if_flags >> 16; 2643 ifp->if_ioctl(ifp, SIOCSIFFLAGS, (caddr_t)&ifr, 2644 NULL); 2645 #ifdef INET 2646 /* 2647 * Also send gratuitous ARPs to notify other nodes about 2648 * the address change. 2649 */ 2650 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 2651 struct ifaddr *ifa = ifac->ifa; 2652 2653 if (ifa->ifa_addr != NULL && 2654 ifa->ifa_addr->sa_family == AF_INET) 2655 arp_gratuitous(ifp, ifa); 2656 } 2657 #endif 2658 } 2659 ifnet_deserialize_all(ifp); 2660 return (0); 2661 } 2662 2663 struct ifmultiaddr * 2664 ifmaof_ifpforaddr(struct sockaddr *sa, struct ifnet *ifp) 2665 { 2666 struct ifmultiaddr *ifma; 2667 2668 /* TODO: need ifnet_serialize_main */ 2669 ifnet_serialize_all(ifp); 2670 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) 2671 if (sa_equal(ifma->ifma_addr, sa)) 2672 break; 2673 ifnet_deserialize_all(ifp); 2674 2675 return ifma; 2676 } 2677 2678 /* 2679 * This function locates the first real ethernet MAC from a network 2680 * card and loads it into node, returning 0 on success or ENOENT if 2681 * no suitable interfaces were found. It is used by the uuid code to 2682 * generate a unique 6-byte number. 2683 */ 2684 int 2685 if_getanyethermac(uint16_t *node, int minlen) 2686 { 2687 struct ifnet *ifp; 2688 struct sockaddr_dl *sdl; 2689 2690 ifnet_lock(); 2691 TAILQ_FOREACH(ifp, &ifnetlist, if_link) { 2692 if (ifp->if_type != IFT_ETHER) 2693 continue; 2694 sdl = IF_LLSOCKADDR(ifp); 2695 if (sdl->sdl_alen < minlen) 2696 continue; 2697 bcopy(((struct arpcom *)ifp->if_softc)->ac_enaddr, node, 2698 minlen); 2699 ifnet_unlock(); 2700 return(0); 2701 } 2702 ifnet_unlock(); 2703 return (ENOENT); 2704 } 2705 2706 /* 2707 * The name argument must be a pointer to storage which will last as 2708 * long as the interface does. For physical devices, the result of 2709 * device_get_name(dev) is a good choice and for pseudo-devices a 2710 * static string works well. 2711 */ 2712 void 2713 if_initname(struct ifnet *ifp, const char *name, int unit) 2714 { 2715 ifp->if_dname = name; 2716 ifp->if_dunit = unit; 2717 if (unit != IF_DUNIT_NONE) 2718 ksnprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); 2719 else 2720 strlcpy(ifp->if_xname, name, IFNAMSIZ); 2721 } 2722 2723 int 2724 if_printf(struct ifnet *ifp, const char *fmt, ...) 2725 { 2726 __va_list ap; 2727 int retval; 2728 2729 retval = kprintf("%s: ", ifp->if_xname); 2730 __va_start(ap, fmt); 2731 retval += kvprintf(fmt, ap); 2732 __va_end(ap); 2733 return (retval); 2734 } 2735 2736 struct ifnet * 2737 if_alloc(uint8_t type) 2738 { 2739 struct ifnet *ifp; 2740 size_t size; 2741 2742 /* 2743 * XXX temporary hack until arpcom is setup in if_l2com 2744 */ 2745 if (type == IFT_ETHER) 2746 size = sizeof(struct arpcom); 2747 else 2748 size = sizeof(struct ifnet); 2749 2750 ifp = kmalloc(size, M_IFNET, M_WAITOK|M_ZERO); 2751 2752 ifp->if_type = type; 2753 2754 if (if_com_alloc[type] != NULL) { 2755 ifp->if_l2com = if_com_alloc[type](type, ifp); 2756 if (ifp->if_l2com == NULL) { 2757 kfree(ifp, M_IFNET); 2758 return (NULL); 2759 } 2760 } 2761 return (ifp); 2762 } 2763 2764 void 2765 if_free(struct ifnet *ifp) 2766 { 2767 kfree(ifp, M_IFNET); 2768 } 2769 2770 void 2771 ifq_set_classic(struct ifaltq *ifq) 2772 { 2773 ifq_set_methods(ifq, ifq->altq_ifp->if_mapsubq, 2774 ifsq_classic_enqueue, ifsq_classic_dequeue, ifsq_classic_request); 2775 } 2776 2777 void 2778 ifq_set_methods(struct ifaltq *ifq, altq_mapsubq_t mapsubq, 2779 ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request) 2780 { 2781 int q; 2782 2783 KASSERT(mapsubq != NULL, ("mapsubq is not specified")); 2784 KASSERT(enqueue != NULL, ("enqueue is not specified")); 2785 KASSERT(dequeue != NULL, ("dequeue is not specified")); 2786 KASSERT(request != NULL, ("request is not specified")); 2787 2788 ifq->altq_mapsubq = mapsubq; 2789 for (q = 0; q < ifq->altq_subq_cnt; ++q) { 2790 struct ifaltq_subque *ifsq = &ifq->altq_subq[q]; 2791 2792 ifsq->ifsq_enqueue = enqueue; 2793 ifsq->ifsq_dequeue = dequeue; 2794 ifsq->ifsq_request = request; 2795 } 2796 } 2797 2798 static void 2799 ifsq_norm_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) 2800 { 2801 m->m_nextpkt = NULL; 2802 if (ifsq->ifsq_norm_tail == NULL) 2803 ifsq->ifsq_norm_head = m; 2804 else 2805 ifsq->ifsq_norm_tail->m_nextpkt = m; 2806 ifsq->ifsq_norm_tail = m; 2807 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); 2808 } 2809 2810 static void 2811 ifsq_prio_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m) 2812 { 2813 m->m_nextpkt = NULL; 2814 if (ifsq->ifsq_prio_tail == NULL) 2815 ifsq->ifsq_prio_head = m; 2816 else 2817 ifsq->ifsq_prio_tail->m_nextpkt = m; 2818 ifsq->ifsq_prio_tail = m; 2819 ALTQ_SQ_CNTR_INC(ifsq, m->m_pkthdr.len); 2820 ALTQ_SQ_PRIO_CNTR_INC(ifsq, m->m_pkthdr.len); 2821 } 2822 2823 static struct mbuf * 2824 ifsq_norm_dequeue(struct ifaltq_subque *ifsq) 2825 { 2826 struct mbuf *m; 2827 2828 m = ifsq->ifsq_norm_head; 2829 if (m != NULL) { 2830 if ((ifsq->ifsq_norm_head = m->m_nextpkt) == NULL) 2831 ifsq->ifsq_norm_tail = NULL; 2832 m->m_nextpkt = NULL; 2833 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); 2834 } 2835 return m; 2836 } 2837 2838 static struct mbuf * 2839 ifsq_prio_dequeue(struct ifaltq_subque *ifsq) 2840 { 2841 struct mbuf *m; 2842 2843 m = ifsq->ifsq_prio_head; 2844 if (m != NULL) { 2845 if ((ifsq->ifsq_prio_head = m->m_nextpkt) == NULL) 2846 ifsq->ifsq_prio_tail = NULL; 2847 m->m_nextpkt = NULL; 2848 ALTQ_SQ_CNTR_DEC(ifsq, m->m_pkthdr.len); 2849 ALTQ_SQ_PRIO_CNTR_DEC(ifsq, m->m_pkthdr.len); 2850 } 2851 return m; 2852 } 2853 2854 int 2855 ifsq_classic_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m, 2856 struct altq_pktattr *pa __unused) 2857 { 2858 M_ASSERTPKTHDR(m); 2859 if (ifsq->ifsq_len >= ifsq->ifsq_maxlen || 2860 ifsq->ifsq_bcnt >= ifsq->ifsq_maxbcnt) { 2861 if ((m->m_flags & M_PRIO) && 2862 ifsq->ifsq_prio_len < (ifsq->ifsq_maxlen / 2) && 2863 ifsq->ifsq_prio_bcnt < (ifsq->ifsq_maxbcnt / 2)) { 2864 struct mbuf *m_drop; 2865 2866 /* 2867 * Perform drop-head on normal queue 2868 */ 2869 m_drop = ifsq_norm_dequeue(ifsq); 2870 if (m_drop != NULL) { 2871 m_freem(m_drop); 2872 ifsq_prio_enqueue(ifsq, m); 2873 return 0; 2874 } 2875 /* XXX nothing could be dropped? */ 2876 } 2877 m_freem(m); 2878 return ENOBUFS; 2879 } else { 2880 if (m->m_flags & M_PRIO) 2881 ifsq_prio_enqueue(ifsq, m); 2882 else 2883 ifsq_norm_enqueue(ifsq, m); 2884 return 0; 2885 } 2886 } 2887 2888 struct mbuf * 2889 ifsq_classic_dequeue(struct ifaltq_subque *ifsq, int op) 2890 { 2891 struct mbuf *m; 2892 2893 switch (op) { 2894 case ALTDQ_POLL: 2895 m = ifsq->ifsq_prio_head; 2896 if (m == NULL) 2897 m = ifsq->ifsq_norm_head; 2898 break; 2899 2900 case ALTDQ_REMOVE: 2901 m = ifsq_prio_dequeue(ifsq); 2902 if (m == NULL) 2903 m = ifsq_norm_dequeue(ifsq); 2904 break; 2905 2906 default: 2907 panic("unsupported ALTQ dequeue op: %d", op); 2908 } 2909 return m; 2910 } 2911 2912 int 2913 ifsq_classic_request(struct ifaltq_subque *ifsq, int req, void *arg) 2914 { 2915 switch (req) { 2916 case ALTRQ_PURGE: 2917 for (;;) { 2918 struct mbuf *m; 2919 2920 m = ifsq_classic_dequeue(ifsq, ALTDQ_REMOVE); 2921 if (m == NULL) 2922 break; 2923 m_freem(m); 2924 } 2925 break; 2926 2927 default: 2928 panic("unsupported ALTQ request: %d", req); 2929 } 2930 return 0; 2931 } 2932 2933 static void 2934 ifsq_ifstart_try(struct ifaltq_subque *ifsq, int force_sched) 2935 { 2936 struct ifnet *ifp = ifsq_get_ifp(ifsq); 2937 int running = 0, need_sched; 2938 2939 /* 2940 * Try to do direct ifnet.if_start on the subqueue first, if there is 2941 * contention on the subqueue hardware serializer, ifnet.if_start on 2942 * the subqueue will be scheduled on the subqueue owner CPU. 2943 */ 2944 if (!ifsq_tryserialize_hw(ifsq)) { 2945 /* 2946 * Subqueue hardware serializer contention happened, 2947 * ifnet.if_start on the subqueue is scheduled on 2948 * the subqueue owner CPU, and we keep going. 2949 */ 2950 ifsq_ifstart_schedule(ifsq, 1); 2951 return; 2952 } 2953 2954 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) { 2955 ifp->if_start(ifp, ifsq); 2956 if ((ifp->if_flags & IFF_RUNNING) && !ifsq_is_oactive(ifsq)) 2957 running = 1; 2958 } 2959 need_sched = ifsq_ifstart_need_schedule(ifsq, running); 2960 2961 ifsq_deserialize_hw(ifsq); 2962 2963 if (need_sched) { 2964 /* 2965 * More data need to be transmitted, ifnet.if_start on the 2966 * subqueue is scheduled on the subqueue owner CPU, and we 2967 * keep going. 2968 * NOTE: ifnet.if_start subqueue interlock is not released. 2969 */ 2970 ifsq_ifstart_schedule(ifsq, force_sched); 2971 } 2972 } 2973 2974 /* 2975 * Subqeue packets staging mechanism: 2976 * 2977 * The packets enqueued into the subqueue are staged to a certain amount 2978 * before the ifnet.if_start on the subqueue is called. In this way, the 2979 * driver could avoid writing to hardware registers upon every packet, 2980 * instead, hardware registers could be written when certain amount of 2981 * packets are put onto hardware TX ring. The measurement on several modern 2982 * NICs (emx(4), igb(4), bnx(4), bge(4), jme(4)) shows that the hardware 2983 * registers writing aggregation could save ~20% CPU time when 18bytes UDP 2984 * datagrams are transmitted at 1.48Mpps. The performance improvement by 2985 * hardware registers writing aggeregation is also mentioned by Luigi Rizzo's 2986 * netmap paper (http://info.iet.unipi.it/~luigi/netmap/). 2987 * 2988 * Subqueue packets staging is performed for two entry points into drivers' 2989 * transmission function: 2990 * - Direct ifnet.if_start calling on the subqueue, i.e. ifsq_ifstart_try() 2991 * - ifnet.if_start scheduling on the subqueue, i.e. ifsq_ifstart_schedule() 2992 * 2993 * Subqueue packets staging will be stopped upon any of the following 2994 * conditions: 2995 * - If the count of packets enqueued on the current CPU is great than or 2996 * equal to ifsq_stage_cntmax. (XXX this should be per-interface) 2997 * - If the total length of packets enqueued on the current CPU is great 2998 * than or equal to the hardware's MTU - max_protohdr. max_protohdr is 2999 * cut from the hardware's MTU mainly bacause a full TCP segment's size 3000 * is usually less than hardware's MTU. 3001 * - ifsq_ifstart_schedule() is not pending on the current CPU and 3002 * ifnet.if_start subqueue interlock (ifaltq_subq.ifsq_started) is not 3003 * released. 3004 * - The if_start_rollup(), which is registered as low priority netisr 3005 * rollup function, is called; probably because no more work is pending 3006 * for netisr. 3007 * 3008 * NOTE: 3009 * Currently subqueue packet staging is only performed in netisr threads. 3010 */ 3011 int 3012 ifq_dispatch(struct ifnet *ifp, struct mbuf *m, struct altq_pktattr *pa) 3013 { 3014 struct ifaltq *ifq = &ifp->if_snd; 3015 struct ifaltq_subque *ifsq; 3016 int error, start = 0, len, mcast = 0, avoid_start = 0; 3017 struct ifsubq_stage_head *head = NULL; 3018 struct ifsubq_stage *stage = NULL; 3019 struct globaldata *gd = mycpu; 3020 struct thread *td = gd->gd_curthread; 3021 3022 crit_enter_quick(td); 3023 3024 ifsq = ifq_map_subq(ifq, gd->gd_cpuid); 3025 ASSERT_ALTQ_SQ_NOT_SERIALIZED_HW(ifsq); 3026 3027 len = m->m_pkthdr.len; 3028 if (m->m_flags & M_MCAST) 3029 mcast = 1; 3030 3031 if (td->td_type == TD_TYPE_NETISR) { 3032 head = &ifsubq_stage_heads[mycpuid]; 3033 stage = ifsq_get_stage(ifsq, mycpuid); 3034 3035 stage->stg_cnt++; 3036 stage->stg_len += len; 3037 if (stage->stg_cnt < ifsq_stage_cntmax && 3038 stage->stg_len < (ifp->if_mtu - max_protohdr)) 3039 avoid_start = 1; 3040 } 3041 3042 ALTQ_SQ_LOCK(ifsq); 3043 error = ifsq_enqueue_locked(ifsq, m, pa); 3044 if (error) { 3045 if (!ifsq_data_ready(ifsq)) { 3046 ALTQ_SQ_UNLOCK(ifsq); 3047 crit_exit_quick(td); 3048 return error; 3049 } 3050 avoid_start = 0; 3051 } 3052 if (!ifsq_is_started(ifsq)) { 3053 if (avoid_start) { 3054 ALTQ_SQ_UNLOCK(ifsq); 3055 3056 KKASSERT(!error); 3057 if ((stage->stg_flags & IFSQ_STAGE_FLAG_QUED) == 0) 3058 ifsq_stage_insert(head, stage); 3059 3060 IFNET_STAT_INC(ifp, obytes, len); 3061 if (mcast) 3062 IFNET_STAT_INC(ifp, omcasts, 1); 3063 crit_exit_quick(td); 3064 return error; 3065 } 3066 3067 /* 3068 * Hold the subqueue interlock of ifnet.if_start 3069 */ 3070 ifsq_set_started(ifsq); 3071 start = 1; 3072 } 3073 ALTQ_SQ_UNLOCK(ifsq); 3074 3075 if (!error) { 3076 IFNET_STAT_INC(ifp, obytes, len); 3077 if (mcast) 3078 IFNET_STAT_INC(ifp, omcasts, 1); 3079 } 3080 3081 if (stage != NULL) { 3082 if (!start && (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED)) { 3083 KKASSERT(stage->stg_flags & IFSQ_STAGE_FLAG_QUED); 3084 if (!avoid_start) { 3085 ifsq_stage_remove(head, stage); 3086 ifsq_ifstart_schedule(ifsq, 1); 3087 } 3088 crit_exit_quick(td); 3089 return error; 3090 } 3091 3092 if (stage->stg_flags & IFSQ_STAGE_FLAG_QUED) { 3093 ifsq_stage_remove(head, stage); 3094 } else { 3095 stage->stg_cnt = 0; 3096 stage->stg_len = 0; 3097 } 3098 } 3099 3100 if (!start) { 3101 crit_exit_quick(td); 3102 return error; 3103 } 3104 3105 ifsq_ifstart_try(ifsq, 0); 3106 3107 crit_exit_quick(td); 3108 return error; 3109 } 3110 3111 void * 3112 ifa_create(int size, int flags) 3113 { 3114 struct ifaddr *ifa; 3115 int i; 3116 3117 KASSERT(size >= sizeof(*ifa), ("ifaddr size too small")); 3118 3119 ifa = kmalloc(size, M_IFADDR, flags | M_ZERO); 3120 if (ifa == NULL) 3121 return NULL; 3122 3123 ifa->ifa_containers = 3124 kmalloc_cachealign(ncpus * sizeof(struct ifaddr_container), 3125 M_IFADDR, M_WAITOK | M_ZERO); 3126 ifa->ifa_ncnt = ncpus; 3127 for (i = 0; i < ncpus; ++i) { 3128 struct ifaddr_container *ifac = &ifa->ifa_containers[i]; 3129 3130 ifac->ifa_magic = IFA_CONTAINER_MAGIC; 3131 ifac->ifa = ifa; 3132 ifac->ifa_refcnt = 1; 3133 } 3134 #ifdef IFADDR_DEBUG 3135 kprintf("alloc ifa %p %d\n", ifa, size); 3136 #endif 3137 return ifa; 3138 } 3139 3140 void 3141 ifac_free(struct ifaddr_container *ifac, int cpu_id) 3142 { 3143 struct ifaddr *ifa = ifac->ifa; 3144 3145 KKASSERT(ifac->ifa_magic == IFA_CONTAINER_MAGIC); 3146 KKASSERT(ifac->ifa_refcnt == 0); 3147 KASSERT(ifac->ifa_listmask == 0, 3148 ("ifa is still on %#x lists", ifac->ifa_listmask)); 3149 3150 ifac->ifa_magic = IFA_CONTAINER_DEAD; 3151 3152 #ifdef IFADDR_DEBUG_VERBOSE 3153 kprintf("try free ifa %p cpu_id %d\n", ifac->ifa, cpu_id); 3154 #endif 3155 3156 KASSERT(ifa->ifa_ncnt > 0 && ifa->ifa_ncnt <= ncpus, 3157 ("invalid # of ifac, %d", ifa->ifa_ncnt)); 3158 if (atomic_fetchadd_int(&ifa->ifa_ncnt, -1) == 1) { 3159 #ifdef IFADDR_DEBUG 3160 kprintf("free ifa %p\n", ifa); 3161 #endif 3162 kfree(ifa->ifa_containers, M_IFADDR); 3163 kfree(ifa, M_IFADDR); 3164 } 3165 } 3166 3167 static void 3168 ifa_iflink_dispatch(netmsg_t nmsg) 3169 { 3170 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3171 struct ifaddr *ifa = msg->ifa; 3172 struct ifnet *ifp = msg->ifp; 3173 int cpu = mycpuid; 3174 struct ifaddr_container *ifac; 3175 3176 crit_enter(); 3177 3178 ifac = &ifa->ifa_containers[cpu]; 3179 ASSERT_IFAC_VALID(ifac); 3180 KASSERT((ifac->ifa_listmask & IFA_LIST_IFADDRHEAD) == 0, 3181 ("ifaddr is on if_addrheads")); 3182 3183 ifac->ifa_listmask |= IFA_LIST_IFADDRHEAD; 3184 if (msg->tail) 3185 TAILQ_INSERT_TAIL(&ifp->if_addrheads[cpu], ifac, ifa_link); 3186 else 3187 TAILQ_INSERT_HEAD(&ifp->if_addrheads[cpu], ifac, ifa_link); 3188 3189 crit_exit(); 3190 3191 ifa_forwardmsg(&nmsg->lmsg, cpu + 1); 3192 } 3193 3194 void 3195 ifa_iflink(struct ifaddr *ifa, struct ifnet *ifp, int tail) 3196 { 3197 struct netmsg_ifaddr msg; 3198 3199 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3200 0, ifa_iflink_dispatch); 3201 msg.ifa = ifa; 3202 msg.ifp = ifp; 3203 msg.tail = tail; 3204 3205 ifa_domsg(&msg.base.lmsg, 0); 3206 } 3207 3208 static void 3209 ifa_ifunlink_dispatch(netmsg_t nmsg) 3210 { 3211 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3212 struct ifaddr *ifa = msg->ifa; 3213 struct ifnet *ifp = msg->ifp; 3214 int cpu = mycpuid; 3215 struct ifaddr_container *ifac; 3216 3217 crit_enter(); 3218 3219 ifac = &ifa->ifa_containers[cpu]; 3220 ASSERT_IFAC_VALID(ifac); 3221 KASSERT(ifac->ifa_listmask & IFA_LIST_IFADDRHEAD, 3222 ("ifaddr is not on if_addrhead")); 3223 3224 TAILQ_REMOVE(&ifp->if_addrheads[cpu], ifac, ifa_link); 3225 ifac->ifa_listmask &= ~IFA_LIST_IFADDRHEAD; 3226 3227 crit_exit(); 3228 3229 ifa_forwardmsg(&nmsg->lmsg, cpu + 1); 3230 } 3231 3232 void 3233 ifa_ifunlink(struct ifaddr *ifa, struct ifnet *ifp) 3234 { 3235 struct netmsg_ifaddr msg; 3236 3237 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3238 0, ifa_ifunlink_dispatch); 3239 msg.ifa = ifa; 3240 msg.ifp = ifp; 3241 3242 ifa_domsg(&msg.base.lmsg, 0); 3243 } 3244 3245 static void 3246 ifa_destroy_dispatch(netmsg_t nmsg) 3247 { 3248 struct netmsg_ifaddr *msg = (struct netmsg_ifaddr *)nmsg; 3249 3250 IFAFREE(msg->ifa); 3251 ifa_forwardmsg(&nmsg->lmsg, mycpuid + 1); 3252 } 3253 3254 void 3255 ifa_destroy(struct ifaddr *ifa) 3256 { 3257 struct netmsg_ifaddr msg; 3258 3259 netmsg_init(&msg.base, NULL, &curthread->td_msgport, 3260 0, ifa_destroy_dispatch); 3261 msg.ifa = ifa; 3262 3263 ifa_domsg(&msg.base.lmsg, 0); 3264 } 3265 3266 struct lwkt_port * 3267 ifnet_portfn(int cpu) 3268 { 3269 return &ifnet_threads[cpu].td_msgport; 3270 } 3271 3272 void 3273 ifnet_forwardmsg(struct lwkt_msg *lmsg, int next_cpu) 3274 { 3275 KKASSERT(next_cpu > mycpuid && next_cpu <= ncpus); 3276 3277 if (next_cpu < ncpus) 3278 lwkt_forwardmsg(ifnet_portfn(next_cpu), lmsg); 3279 else 3280 lwkt_replymsg(lmsg, 0); 3281 } 3282 3283 int 3284 ifnet_domsg(struct lwkt_msg *lmsg, int cpu) 3285 { 3286 KKASSERT(cpu < ncpus); 3287 return lwkt_domsg(ifnet_portfn(cpu), lmsg, 0); 3288 } 3289 3290 void 3291 ifnet_sendmsg(struct lwkt_msg *lmsg, int cpu) 3292 { 3293 KKASSERT(cpu < ncpus); 3294 lwkt_sendmsg(ifnet_portfn(cpu), lmsg); 3295 } 3296 3297 /* 3298 * Generic netmsg service loop. Some protocols may roll their own but all 3299 * must do the basic command dispatch function call done here. 3300 */ 3301 static void 3302 ifnet_service_loop(void *arg __unused) 3303 { 3304 netmsg_t msg; 3305 3306 while ((msg = lwkt_waitport(&curthread->td_msgport, 0))) { 3307 KASSERT(msg->base.nm_dispatch, ("ifnet_service: badmsg")); 3308 msg->base.nm_dispatch(msg); 3309 } 3310 } 3311 3312 static void 3313 if_start_rollup(void) 3314 { 3315 struct ifsubq_stage_head *head = &ifsubq_stage_heads[mycpuid]; 3316 struct ifsubq_stage *stage; 3317 3318 crit_enter(); 3319 3320 while ((stage = TAILQ_FIRST(&head->stg_head)) != NULL) { 3321 struct ifaltq_subque *ifsq = stage->stg_subq; 3322 int is_sched = 0; 3323 3324 if (stage->stg_flags & IFSQ_STAGE_FLAG_SCHED) 3325 is_sched = 1; 3326 ifsq_stage_remove(head, stage); 3327 3328 if (is_sched) { 3329 ifsq_ifstart_schedule(ifsq, 1); 3330 } else { 3331 int start = 0; 3332 3333 ALTQ_SQ_LOCK(ifsq); 3334 if (!ifsq_is_started(ifsq)) { 3335 /* 3336 * Hold the subqueue interlock of 3337 * ifnet.if_start 3338 */ 3339 ifsq_set_started(ifsq); 3340 start = 1; 3341 } 3342 ALTQ_SQ_UNLOCK(ifsq); 3343 3344 if (start) 3345 ifsq_ifstart_try(ifsq, 1); 3346 } 3347 KKASSERT((stage->stg_flags & 3348 (IFSQ_STAGE_FLAG_QUED | IFSQ_STAGE_FLAG_SCHED)) == 0); 3349 } 3350 3351 crit_exit(); 3352 } 3353 3354 static void 3355 ifnetinit(void *dummy __unused) 3356 { 3357 int i; 3358 3359 for (i = 0; i < ncpus; ++i) { 3360 struct thread *thr = &ifnet_threads[i]; 3361 3362 lwkt_create(ifnet_service_loop, NULL, NULL, 3363 thr, TDF_NOSTART|TDF_FORCE_SPINPORT|TDF_FIXEDCPU, 3364 i, "ifnet %d", i); 3365 netmsg_service_port_init(&thr->td_msgport); 3366 lwkt_schedule(thr); 3367 } 3368 3369 for (i = 0; i < ncpus; ++i) 3370 TAILQ_INIT(&ifsubq_stage_heads[i].stg_head); 3371 netisr_register_rollup(if_start_rollup, NETISR_ROLLUP_PRIO_IFSTART); 3372 } 3373 3374 void 3375 if_register_com_alloc(u_char type, 3376 if_com_alloc_t *a, if_com_free_t *f) 3377 { 3378 3379 KASSERT(if_com_alloc[type] == NULL, 3380 ("if_register_com_alloc: %d already registered", type)); 3381 KASSERT(if_com_free[type] == NULL, 3382 ("if_register_com_alloc: %d free already registered", type)); 3383 3384 if_com_alloc[type] = a; 3385 if_com_free[type] = f; 3386 } 3387 3388 void 3389 if_deregister_com_alloc(u_char type) 3390 { 3391 3392 KASSERT(if_com_alloc[type] != NULL, 3393 ("if_deregister_com_alloc: %d not registered", type)); 3394 KASSERT(if_com_free[type] != NULL, 3395 ("if_deregister_com_alloc: %d free not registered", type)); 3396 if_com_alloc[type] = NULL; 3397 if_com_free[type] = NULL; 3398 } 3399 3400 int 3401 if_ring_count2(int cnt, int cnt_max) 3402 { 3403 int shift = 0; 3404 3405 KASSERT(cnt_max >= 1 && powerof2(cnt_max), 3406 ("invalid ring count max %d", cnt_max)); 3407 3408 if (cnt <= 0) 3409 cnt = cnt_max; 3410 if (cnt > ncpus2) 3411 cnt = ncpus2; 3412 if (cnt > cnt_max) 3413 cnt = cnt_max; 3414 3415 while ((1 << (shift + 1)) <= cnt) 3416 ++shift; 3417 cnt = 1 << shift; 3418 3419 KASSERT(cnt >= 1 && cnt <= ncpus2 && cnt <= cnt_max, 3420 ("calculate cnt %d, ncpus2 %d, cnt max %d", 3421 cnt, ncpus2, cnt_max)); 3422 return cnt; 3423 } 3424 3425 void 3426 ifq_set_maxlen(struct ifaltq *ifq, int len) 3427 { 3428 ifq->altq_maxlen = len + (ncpus * ifsq_stage_cntmax); 3429 } 3430 3431 int 3432 ifq_mapsubq_default(struct ifaltq *ifq __unused, int cpuid __unused) 3433 { 3434 return ALTQ_SUBQ_INDEX_DEFAULT; 3435 } 3436 3437 int 3438 ifq_mapsubq_mask(struct ifaltq *ifq, int cpuid) 3439 { 3440 return (cpuid & ifq->altq_subq_mask); 3441 } 3442 3443 static void 3444 ifsq_watchdog(void *arg) 3445 { 3446 struct ifsubq_watchdog *wd = arg; 3447 struct ifnet *ifp; 3448 3449 if (__predict_true(wd->wd_timer == 0 || --wd->wd_timer)) 3450 goto done; 3451 3452 ifp = ifsq_get_ifp(wd->wd_subq); 3453 if (ifnet_tryserialize_all(ifp)) { 3454 wd->wd_watchdog(wd->wd_subq); 3455 ifnet_deserialize_all(ifp); 3456 } else { 3457 /* try again next timeout */ 3458 wd->wd_timer = 1; 3459 } 3460 done: 3461 ifsq_watchdog_reset(wd); 3462 } 3463 3464 static void 3465 ifsq_watchdog_reset(struct ifsubq_watchdog *wd) 3466 { 3467 callout_reset_bycpu(&wd->wd_callout, hz, ifsq_watchdog, wd, 3468 ifsq_get_cpuid(wd->wd_subq)); 3469 } 3470 3471 void 3472 ifsq_watchdog_init(struct ifsubq_watchdog *wd, struct ifaltq_subque *ifsq, 3473 ifsq_watchdog_t watchdog) 3474 { 3475 callout_init_mp(&wd->wd_callout); 3476 wd->wd_timer = 0; 3477 wd->wd_subq = ifsq; 3478 wd->wd_watchdog = watchdog; 3479 } 3480 3481 void 3482 ifsq_watchdog_start(struct ifsubq_watchdog *wd) 3483 { 3484 wd->wd_timer = 0; 3485 ifsq_watchdog_reset(wd); 3486 } 3487 3488 void 3489 ifsq_watchdog_stop(struct ifsubq_watchdog *wd) 3490 { 3491 wd->wd_timer = 0; 3492 callout_stop(&wd->wd_callout); 3493 } 3494 3495 void 3496 ifnet_lock(void) 3497 { 3498 KASSERT(curthread->td_type != TD_TYPE_NETISR, 3499 ("try holding ifnet lock in netisr")); 3500 mtx_lock(&ifnet_mtx); 3501 } 3502 3503 void 3504 ifnet_unlock(void) 3505 { 3506 KASSERT(curthread->td_type != TD_TYPE_NETISR, 3507 ("try holding ifnet lock in netisr")); 3508 mtx_unlock(&ifnet_mtx); 3509 } 3510 3511 static struct ifnet_array * 3512 ifnet_array_alloc(int count) 3513 { 3514 struct ifnet_array *arr; 3515 3516 arr = kmalloc(__offsetof(struct ifnet_array, ifnet_arr[count]), 3517 M_IFNET, M_WAITOK); 3518 arr->ifnet_count = count; 3519 3520 return arr; 3521 } 3522 3523 static void 3524 ifnet_array_free(struct ifnet_array *arr) 3525 { 3526 if (arr == &ifnet_array0) 3527 return; 3528 kfree(arr, M_IFNET); 3529 } 3530 3531 static struct ifnet_array * 3532 ifnet_array_add(struct ifnet *ifp, const struct ifnet_array *old_arr) 3533 { 3534 struct ifnet_array *arr; 3535 int count, i; 3536 3537 KASSERT(old_arr->ifnet_count >= 0, 3538 ("invalid ifnet array count %d", old_arr->ifnet_count)); 3539 count = old_arr->ifnet_count + 1; 3540 arr = ifnet_array_alloc(count); 3541 3542 /* 3543 * Save the old ifnet array and append this ifp to the end of 3544 * the new ifnet array. 3545 */ 3546 for (i = 0; i < old_arr->ifnet_count; ++i) { 3547 KASSERT(old_arr->ifnet_arr[i] != ifp, 3548 ("%s is already in ifnet array", ifp->if_xname)); 3549 arr->ifnet_arr[i] = old_arr->ifnet_arr[i]; 3550 } 3551 KASSERT(i == count - 1, 3552 ("add %s, ifnet array index mismatch, should be %d, but got %d", 3553 ifp->if_xname, count - 1, i)); 3554 arr->ifnet_arr[i] = ifp; 3555 3556 return arr; 3557 } 3558 3559 static struct ifnet_array * 3560 ifnet_array_del(struct ifnet *ifp, const struct ifnet_array *old_arr) 3561 { 3562 struct ifnet_array *arr; 3563 int count, i, idx, found = 0; 3564 3565 KASSERT(old_arr->ifnet_count > 0, 3566 ("invalid ifnet array count %d", old_arr->ifnet_count)); 3567 count = old_arr->ifnet_count - 1; 3568 arr = ifnet_array_alloc(count); 3569 3570 /* 3571 * Save the old ifnet array, but skip this ifp. 3572 */ 3573 idx = 0; 3574 for (i = 0; i < old_arr->ifnet_count; ++i) { 3575 if (old_arr->ifnet_arr[i] == ifp) { 3576 KASSERT(!found, 3577 ("dup %s is in ifnet array", ifp->if_xname)); 3578 found = 1; 3579 continue; 3580 } 3581 KASSERT(idx < count, 3582 ("invalid ifnet array index %d, count %d", idx, count)); 3583 arr->ifnet_arr[idx] = old_arr->ifnet_arr[i]; 3584 ++idx; 3585 } 3586 KASSERT(found, ("%s is not in ifnet array", ifp->if_xname)); 3587 KASSERT(idx == count, 3588 ("del %s, ifnet array count mismatch, should be %d, but got %d ", 3589 ifp->if_xname, count, idx)); 3590 3591 return arr; 3592 } 3593 3594 const struct ifnet_array * 3595 ifnet_array_get(void) 3596 { 3597 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 3598 return ifnet_array; 3599 } 3600 3601 int 3602 ifnet_array_isempty(void) 3603 { 3604 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 3605 if (ifnet_array->ifnet_count == 0) 3606 return 1; 3607 else 3608 return 0; 3609 } 3610 3611 void 3612 ifa_marker_init(struct ifaddr_marker *mark, struct ifnet *ifp) 3613 { 3614 struct ifaddr *ifa; 3615 3616 memset(mark, 0, sizeof(*mark)); 3617 ifa = &mark->ifa; 3618 3619 mark->ifac.ifa = ifa; 3620 3621 ifa->ifa_addr = &mark->addr; 3622 ifa->ifa_dstaddr = &mark->dstaddr; 3623 ifa->ifa_netmask = &mark->netmask; 3624 ifa->ifa_ifp = ifp; 3625 } 3626