1 /* $NetBSD: altq_subr.c,v 1.9 2002/10/09 07:28:57 jdolecek Exp $ */ 2 /* $KAME: altq_subr.c,v 1.11 2002/01/11 08:11:49 kjc Exp $ */ 3 4 /* 5 * Copyright (C) 1997-2002 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __KERNEL_RCSID(0, "$NetBSD: altq_subr.c,v 1.9 2002/10/09 07:28:57 jdolecek Exp $"); 32 33 #if defined(__FreeBSD__) || defined(__NetBSD__) 34 #include "opt_altq.h" 35 #if (__FreeBSD__ != 2) 36 #include "opt_inet.h" 37 #ifdef __FreeBSD__ 38 #include "opt_inet6.h" 39 #endif 40 #endif 41 #endif /* __FreeBSD__ || __NetBSD__ */ 42 43 #include <sys/param.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/systm.h> 47 #include <sys/proc.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/kernel.h> 51 #include <sys/errno.h> 52 #include <sys/syslog.h> 53 #include <sys/sysctl.h> 54 #include <sys/queue.h> 55 56 #include <net/if.h> 57 #include <net/if_dl.h> 58 #include <net/if_types.h> 59 60 #include <netinet/in.h> 61 #include <netinet/in_systm.h> 62 #include <netinet/ip.h> 63 #ifdef INET6 64 #include <netinet/ip6.h> 65 #endif 66 #include <netinet/tcp.h> 67 #include <netinet/udp.h> 68 69 #include <altq/altq.h> 70 #include <altq/altq_conf.h> 71 72 /* machine dependent clock related includes */ 73 #ifdef __FreeBSD__ 74 #include "opt_cpu.h" /* for FreeBSD-2.2.8 to get i586_ctr_freq */ 75 #include <machine/clock.h> 76 #endif 77 #if defined(__i386__) 78 #include <machine/specialreg.h> /* for CPUID_TSC */ 79 #ifdef __FreeBSD__ 80 #include <machine/md_var.h> /* for cpu_feature */ 81 #elif defined(__NetBSD__) || defined(__OpenBSD__) 82 #include <machine/cpu.h> /* for cpu_feature */ 83 #endif 84 #endif /* __i386__ */ 85 86 /* 87 * internal function prototypes 88 */ 89 static void tbr_timeout __P((void *)); 90 static int extract_ports4 __P((struct mbuf *, struct ip *, 91 struct flowinfo_in *)); 92 #ifdef INET6 93 static int extract_ports6 __P((struct mbuf *, struct ip6_hdr *, 94 struct flowinfo_in6 *)); 95 #endif 96 static int apply_filter4 __P((u_int32_t, struct flow_filter *, 97 struct flowinfo_in *)); 98 static int apply_ppfilter4 __P((u_int32_t, struct flow_filter *, 99 struct flowinfo_in *)); 100 #ifdef INET6 101 static int apply_filter6 __P((u_int32_t, struct flow_filter6 *, 102 struct flowinfo_in6 *)); 103 #endif 104 static int apply_tosfilter4 __P((u_int32_t, struct flow_filter *, 105 struct flowinfo_in *)); 106 static u_long get_filt_handle __P((struct acc_classifier *, int)); 107 static struct acc_filter *filth_to_filtp __P((struct acc_classifier *, 108 u_long)); 109 static u_int32_t filt2fibmask __P((struct flow_filter *)); 110 111 static void ip4f_cache __P((struct ip *, struct flowinfo_in *)); 112 static int ip4f_lookup __P((struct ip *, struct flowinfo_in *)); 113 static int ip4f_init __P((void)); 114 static struct ip4_frag *ip4f_alloc __P((void)); 115 static void ip4f_free __P((struct ip4_frag *)); 116 117 int (*altq_input) __P((struct mbuf *, int)) = NULL; 118 static int tbr_timer = 0; /* token bucket regulator timer */ 119 static struct callout tbr_callout = CALLOUT_INITIALIZER; 120 121 /* 122 * alternate queueing support routines 123 */ 124 125 /* look up the queue state by the interface name and the queuing type. */ 126 void * 127 altq_lookup(name, type) 128 char *name; 129 int type; 130 { 131 struct ifnet *ifp; 132 133 if ((ifp = ifunit(name)) != NULL) { 134 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) 135 return (ifp->if_snd.altq_disc); 136 } 137 138 return NULL; 139 } 140 141 int 142 altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify) 143 struct ifaltq *ifq; 144 int type; 145 void *discipline; 146 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 147 struct mbuf *(*dequeue)(struct ifaltq *, int); 148 int (*request)(struct ifaltq *, int, void *); 149 void *clfier; 150 void *(*classify)(void *, struct mbuf *, int); 151 { 152 if (!ALTQ_IS_READY(ifq)) 153 return ENXIO; 154 if (ALTQ_IS_ENABLED(ifq)) 155 return EBUSY; 156 if (ALTQ_IS_ATTACHED(ifq)) 157 return EEXIST; 158 ifq->altq_type = type; 159 ifq->altq_disc = discipline; 160 ifq->altq_enqueue = enqueue; 161 ifq->altq_dequeue = dequeue; 162 ifq->altq_request = request; 163 ifq->altq_clfier = clfier; 164 ifq->altq_classify = classify; 165 ifq->altq_flags &= ALTQF_CANTCHANGE; 166 #ifdef ALTQ_KLD 167 altq_module_incref(type); 168 #endif 169 return 0; 170 } 171 172 int 173 altq_detach(ifq) 174 struct ifaltq *ifq; 175 { 176 if (!ALTQ_IS_READY(ifq)) 177 return ENXIO; 178 if (ALTQ_IS_ENABLED(ifq)) 179 return EBUSY; 180 if (!ALTQ_IS_ATTACHED(ifq)) 181 return (0); 182 183 #ifdef ALTQ_KLD 184 altq_module_declref(ifq->altq_type); 185 #endif 186 ifq->altq_type = ALTQT_NONE; 187 ifq->altq_disc = NULL; 188 ifq->altq_enqueue = NULL; 189 ifq->altq_dequeue = NULL; 190 ifq->altq_request = NULL; 191 ifq->altq_clfier = NULL; 192 ifq->altq_classify = NULL; 193 ifq->altq_flags &= ALTQF_CANTCHANGE; 194 return 0; 195 } 196 197 int 198 altq_enable(ifq) 199 struct ifaltq *ifq; 200 { 201 int s; 202 203 if (!ALTQ_IS_READY(ifq)) 204 return ENXIO; 205 if (ALTQ_IS_ENABLED(ifq)) 206 return 0; 207 208 s = splnet(); 209 IFQ_PURGE(ifq); 210 ASSERT(ifq->ifq_len == 0); 211 ifq->altq_flags |= ALTQF_ENABLED; 212 if (ifq->altq_clfier != NULL) 213 ifq->altq_flags |= ALTQF_CLASSIFY; 214 splx(s); 215 216 return 0; 217 } 218 219 int 220 altq_disable(ifq) 221 struct ifaltq *ifq; 222 { 223 int s; 224 225 if (!ALTQ_IS_ENABLED(ifq)) 226 return 0; 227 228 s = splnet(); 229 IFQ_PURGE(ifq); 230 ASSERT(ifq->ifq_len == 0); 231 ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY); 232 splx(s); 233 return 0; 234 } 235 236 void 237 altq_assert(file, line, failedexpr) 238 const char *file, *failedexpr; 239 int line; 240 { 241 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", 242 failedexpr, file, line); 243 panic("altq assertion"); 244 /* NOTREACHED */ 245 } 246 247 /* 248 * internal representation of token bucket parameters 249 * rate: byte_per_unittime << 32 250 * (((bits_per_sec) / 8) << 32) / machclk_freq 251 * depth: byte << 32 252 * 253 */ 254 #define TBR_SHIFT 32 255 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) 256 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) 257 258 struct mbuf * 259 tbr_dequeue(ifq, op) 260 struct ifaltq *ifq; 261 int op; 262 { 263 struct tb_regulator *tbr; 264 struct mbuf *m; 265 int64_t interval; 266 u_int64_t now; 267 268 tbr = ifq->altq_tbr; 269 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { 270 /* if this is a remove after poll, bypass tbr check */ 271 } else { 272 /* update token only when it is negative */ 273 if (tbr->tbr_token <= 0) { 274 now = read_machclk(); 275 interval = now - tbr->tbr_last; 276 if (interval >= tbr->tbr_filluptime) 277 tbr->tbr_token = tbr->tbr_depth; 278 else { 279 tbr->tbr_token += interval * tbr->tbr_rate; 280 if (tbr->tbr_token > tbr->tbr_depth) 281 tbr->tbr_token = tbr->tbr_depth; 282 } 283 tbr->tbr_last = now; 284 } 285 /* if token is still negative, don't allow dequeue */ 286 if (tbr->tbr_token <= 0) 287 return (NULL); 288 } 289 290 if (ALTQ_IS_ENABLED(ifq)) 291 m = (*ifq->altq_dequeue)(ifq, op); 292 else { 293 if (op == ALTDQ_POLL) 294 IF_POLL(ifq, m); 295 else 296 IF_DEQUEUE(ifq, m); 297 } 298 299 if (m != NULL && op == ALTDQ_REMOVE) 300 tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); 301 tbr->tbr_lastop = op; 302 return (m); 303 } 304 305 /* 306 * set a token bucket regulator. 307 * if the specified rate is zero, the token bucket regulator is deleted. 308 */ 309 int 310 tbr_set(ifq, profile) 311 struct ifaltq *ifq; 312 struct tb_profile *profile; 313 { 314 struct tb_regulator *tbr, *otbr; 315 316 if (machclk_freq == 0) 317 init_machclk(); 318 if (machclk_freq == 0) { 319 printf("tbr_set: no cpu clock available!\n"); 320 return (ENXIO); 321 } 322 323 if (profile->rate == 0) { 324 /* delete this tbr */ 325 if ((tbr = ifq->altq_tbr) == NULL) 326 return (ENOENT); 327 ifq->altq_tbr = NULL; 328 FREE(tbr, M_DEVBUF); 329 return (0); 330 } 331 332 MALLOC(tbr, struct tb_regulator *, sizeof(struct tb_regulator), 333 M_DEVBUF, M_WAITOK); 334 if (tbr == NULL) 335 return (ENOMEM); 336 bzero(tbr, sizeof(struct tb_regulator)); 337 338 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; 339 tbr->tbr_depth = TBR_SCALE(profile->depth); 340 if (tbr->tbr_rate > 0) 341 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; 342 else 343 tbr->tbr_filluptime = 0xffffffffffffffffLL; 344 tbr->tbr_token = tbr->tbr_depth; 345 tbr->tbr_last = read_machclk(); 346 tbr->tbr_lastop = ALTDQ_REMOVE; 347 348 otbr = ifq->altq_tbr; 349 ifq->altq_tbr = tbr; /* set the new tbr */ 350 351 if (otbr != NULL) 352 FREE(otbr, M_DEVBUF); 353 else { 354 if (tbr_timer == 0) { 355 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 356 tbr_timer = 1; 357 } 358 } 359 return (0); 360 } 361 362 /* 363 * tbr_timeout goes through the interface list, and kicks the drivers 364 * if necessary. 365 */ 366 static void 367 tbr_timeout(arg) 368 void *arg; 369 { 370 struct ifnet *ifp; 371 int active, s; 372 373 active = 0; 374 s = splnet(); 375 #ifdef __FreeBSD__ 376 #if (__FreeBSD_version < 300000) 377 for (ifp = ifnet; ifp; ifp = ifp->if_next) 378 #else 379 for (ifp = ifnet.tqh_first; ifp != NULL; ifp = ifp->if_link.tqe_next) 380 #endif 381 #else /* !FreeBSD */ 382 for (ifp = ifnet.tqh_first; ifp != NULL; ifp = ifp->if_list.tqe_next) 383 #endif 384 { 385 if (!TBR_IS_ENABLED(&ifp->if_snd)) 386 continue; 387 active++; 388 if (!IFQ_IS_EMPTY(&ifp->if_snd) && ifp->if_start != NULL) 389 (*ifp->if_start)(ifp); 390 } 391 splx(s); 392 if (active > 0) 393 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 394 else 395 tbr_timer = 0; /* don't need tbr_timer anymore */ 396 #if defined(__alpha__) && !defined(ALTQ_NOPCC) 397 { 398 /* 399 * XXX read out the machine dependent clock once a second 400 * to detect counter wrap-around. 401 */ 402 static u_int cnt; 403 404 if (++cnt >= hz) { 405 (void)read_machclk(); 406 cnt = 0; 407 } 408 } 409 #endif /* __alpha__ && !ALTQ_NOPCC */ 410 } 411 412 /* 413 * get token bucket regulator profile 414 */ 415 int 416 tbr_get(ifq, profile) 417 struct ifaltq *ifq; 418 struct tb_profile *profile; 419 { 420 struct tb_regulator *tbr; 421 422 if ((tbr = ifq->altq_tbr) == NULL) { 423 profile->rate = 0; 424 profile->depth = 0; 425 } else { 426 profile->rate = 427 (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq); 428 profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth); 429 } 430 return (0); 431 } 432 433 434 #ifndef IPPROTO_ESP 435 #define IPPROTO_ESP 50 /* encapsulating security payload */ 436 #endif 437 #ifndef IPPROTO_AH 438 #define IPPROTO_AH 51 /* authentication header */ 439 #endif 440 441 /* 442 * extract flow information from a given packet. 443 * filt_mask shows flowinfo fields required. 444 * we assume the ip header is in one mbuf, and addresses and ports are 445 * in network byte order. 446 */ 447 int 448 altq_extractflow(m, af, flow, filt_bmask) 449 struct mbuf *m; 450 int af; 451 struct flowinfo *flow; 452 u_int32_t filt_bmask; 453 { 454 455 switch (af) { 456 case PF_INET: { 457 struct flowinfo_in *fin; 458 struct ip *ip; 459 460 ip = mtod(m, struct ip *); 461 462 if (ip->ip_v != 4) 463 break; 464 465 fin = (struct flowinfo_in *)flow; 466 fin->fi_len = sizeof(struct flowinfo_in); 467 fin->fi_family = AF_INET; 468 469 fin->fi_proto = ip->ip_p; 470 fin->fi_tos = ip->ip_tos; 471 472 fin->fi_src.s_addr = ip->ip_src.s_addr; 473 fin->fi_dst.s_addr = ip->ip_dst.s_addr; 474 475 if (filt_bmask & FIMB4_PORTS) 476 /* if port info is required, extract port numbers */ 477 extract_ports4(m, ip, fin); 478 else { 479 fin->fi_sport = 0; 480 fin->fi_dport = 0; 481 fin->fi_gpi = 0; 482 } 483 return (1); 484 } 485 486 #ifdef INET6 487 case PF_INET6: { 488 struct flowinfo_in6 *fin6; 489 struct ip6_hdr *ip6; 490 491 ip6 = mtod(m, struct ip6_hdr *); 492 /* should we check the ip version? */ 493 494 fin6 = (struct flowinfo_in6 *)flow; 495 fin6->fi6_len = sizeof(struct flowinfo_in6); 496 fin6->fi6_family = AF_INET6; 497 498 fin6->fi6_proto = ip6->ip6_nxt; 499 fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 500 501 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); 502 fin6->fi6_src = ip6->ip6_src; 503 fin6->fi6_dst = ip6->ip6_dst; 504 505 if ((filt_bmask & FIMB6_PORTS) || 506 ((filt_bmask & FIMB6_PROTO) 507 && ip6->ip6_nxt > IPPROTO_IPV6)) 508 /* 509 * if port info is required, or proto is required 510 * but there are option headers, extract port 511 * and protocol numbers. 512 */ 513 extract_ports6(m, ip6, fin6); 514 else { 515 fin6->fi6_sport = 0; 516 fin6->fi6_dport = 0; 517 fin6->fi6_gpi = 0; 518 } 519 return (1); 520 } 521 #endif /* INET6 */ 522 523 default: 524 break; 525 } 526 527 /* failed */ 528 flow->fi_len = sizeof(struct flowinfo); 529 flow->fi_family = AF_UNSPEC; 530 return (0); 531 } 532 533 /* 534 * helper routine to extract port numbers 535 */ 536 /* structure for ipsec and ipv6 option header template */ 537 struct _opt6 { 538 u_int8_t opt6_nxt; /* next header */ 539 u_int8_t opt6_hlen; /* header extension length */ 540 u_int16_t _pad; 541 u_int32_t ah_spi; /* security parameter index 542 for authentication header */ 543 }; 544 545 /* 546 * extract port numbers from a ipv4 packet. 547 */ 548 static int 549 extract_ports4(m, ip, fin) 550 struct mbuf *m; 551 struct ip *ip; 552 struct flowinfo_in *fin; 553 { 554 struct mbuf *m0; 555 u_short ip_off; 556 u_int8_t proto; 557 int off; 558 559 fin->fi_sport = 0; 560 fin->fi_dport = 0; 561 fin->fi_gpi = 0; 562 563 ip_off = ntohs(ip->ip_off); 564 /* if it is a fragment, try cached fragment info */ 565 if (ip_off & IP_OFFMASK) { 566 ip4f_lookup(ip, fin); 567 return (1); 568 } 569 570 /* locate the mbuf containing the protocol header */ 571 for (m0 = m; m0 != NULL; m0 = m0->m_next) 572 if (((caddr_t)ip >= m0->m_data) && 573 ((caddr_t)ip < m0->m_data + m0->m_len)) 574 break; 575 if (m0 == NULL) { 576 #ifdef ALTQ_DEBUG 577 printf("extract_ports4: can't locate header! ip=%p\n", ip); 578 #endif 579 return (0); 580 } 581 off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); 582 proto = ip->ip_p; 583 584 #ifdef ALTQ_IPSEC 585 again: 586 #endif 587 while (off >= m0->m_len) { 588 off -= m0->m_len; 589 m0 = m0->m_next; 590 if (m0 == NULL) 591 return (0); /* bogus ip_hl! */ 592 } 593 if (m0->m_len < off + 4) 594 return (0); 595 596 switch (proto) { 597 case IPPROTO_TCP: 598 case IPPROTO_UDP: { 599 struct udphdr *udp; 600 601 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 602 fin->fi_sport = udp->uh_sport; 603 fin->fi_dport = udp->uh_dport; 604 fin->fi_proto = proto; 605 } 606 break; 607 608 #ifdef ALTQ_IPSEC 609 case IPPROTO_ESP: 610 if (fin->fi_gpi == 0){ 611 u_int32_t *gpi; 612 613 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 614 fin->fi_gpi = *gpi; 615 } 616 fin->fi_proto = proto; 617 break; 618 619 case IPPROTO_AH: { 620 /* get next header and header length */ 621 struct _opt6 *opt6; 622 623 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 624 proto = opt6->opt6_nxt; 625 off += 8 + (opt6->opt6_hlen * 4); 626 if (fin->fi_gpi == 0 && m0->m_len >= off + 8) 627 fin->fi_gpi = opt6->ah_spi; 628 } 629 /* goto the next header */ 630 goto again; 631 #endif /* ALTQ_IPSEC */ 632 633 default: 634 fin->fi_proto = proto; 635 return (0); 636 } 637 638 /* if this is a first fragment, cache it. */ 639 if (ip_off & IP_MF) 640 ip4f_cache(ip, fin); 641 642 return (1); 643 } 644 645 #ifdef INET6 646 static int 647 extract_ports6(m, ip6, fin6) 648 struct mbuf *m; 649 struct ip6_hdr *ip6; 650 struct flowinfo_in6 *fin6; 651 { 652 struct mbuf *m0; 653 int off; 654 u_int8_t proto; 655 656 fin6->fi6_gpi = 0; 657 fin6->fi6_sport = 0; 658 fin6->fi6_dport = 0; 659 660 /* locate the mbuf containing the protocol header */ 661 for (m0 = m; m0 != NULL; m0 = m0->m_next) 662 if (((caddr_t)ip6 >= m0->m_data) && 663 ((caddr_t)ip6 < m0->m_data + m0->m_len)) 664 break; 665 if (m0 == NULL) { 666 #ifdef ALTQ_DEBUG 667 printf("extract_ports6: can't locate header! ip6=%p\n", ip6); 668 #endif 669 return (0); 670 } 671 off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); 672 673 proto = ip6->ip6_nxt; 674 do { 675 while (off >= m0->m_len) { 676 off -= m0->m_len; 677 m0 = m0->m_next; 678 if (m0 == NULL) 679 return (0); 680 } 681 if (m0->m_len < off + 4) 682 return (0); 683 684 switch (proto) { 685 case IPPROTO_TCP: 686 case IPPROTO_UDP: { 687 struct udphdr *udp; 688 689 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 690 fin6->fi6_sport = udp->uh_sport; 691 fin6->fi6_dport = udp->uh_dport; 692 fin6->fi6_proto = proto; 693 } 694 return (1); 695 696 case IPPROTO_ESP: 697 if (fin6->fi6_gpi == 0) { 698 u_int32_t *gpi; 699 700 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 701 fin6->fi6_gpi = *gpi; 702 } 703 fin6->fi6_proto = proto; 704 return (1); 705 706 case IPPROTO_AH: { 707 /* get next header and header length */ 708 struct _opt6 *opt6; 709 710 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 711 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) 712 fin6->fi6_gpi = opt6->ah_spi; 713 proto = opt6->opt6_nxt; 714 off += 8 + (opt6->opt6_hlen * 4); 715 /* goto the next header */ 716 break; 717 } 718 719 case IPPROTO_HOPOPTS: 720 case IPPROTO_ROUTING: 721 case IPPROTO_DSTOPTS: { 722 /* get next header and header length */ 723 struct _opt6 *opt6; 724 725 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 726 proto = opt6->opt6_nxt; 727 off += (opt6->opt6_hlen + 1) * 8; 728 /* goto the next header */ 729 break; 730 } 731 732 case IPPROTO_FRAGMENT: 733 /* ipv6 fragmentations are not supported yet */ 734 default: 735 fin6->fi6_proto = proto; 736 return (0); 737 } 738 } while (1); 739 /*NOTREACHED*/ 740 } 741 #endif /* INET6 */ 742 743 /* 744 * altq common classifier 745 */ 746 int 747 acc_add_filter(classifier, filter, class, phandle) 748 struct acc_classifier *classifier; 749 struct flow_filter *filter; 750 void *class; 751 u_long *phandle; 752 { 753 struct acc_filter *afp, *prev, *tmp; 754 int i, s; 755 756 #ifdef INET6 757 if (filter->ff_flow.fi_family != AF_INET && 758 filter->ff_flow.fi_family != AF_INET6) 759 return (EINVAL); 760 #else 761 if (filter->ff_flow.fi_family != AF_INET) 762 return (EINVAL); 763 #endif 764 765 MALLOC(afp, struct acc_filter *, sizeof(struct acc_filter), 766 M_DEVBUF, M_WAITOK); 767 if (afp == NULL) 768 return (ENOMEM); 769 bzero(afp, sizeof(struct acc_filter)); 770 771 afp->f_filter = *filter; 772 afp->f_class = class; 773 774 i = ACC_WILDCARD_INDEX; 775 if (filter->ff_flow.fi_family == AF_INET) { 776 struct flow_filter *filter4 = &afp->f_filter; 777 778 /* 779 * if address is 0, it's a wildcard. if address mask 780 * isn't set, use full mask. 781 */ 782 if (filter4->ff_flow.fi_dst.s_addr == 0) 783 filter4->ff_mask.mask_dst.s_addr = 0; 784 else if (filter4->ff_mask.mask_dst.s_addr == 0) 785 filter4->ff_mask.mask_dst.s_addr = 0xffffffff; 786 if (filter4->ff_flow.fi_src.s_addr == 0) 787 filter4->ff_mask.mask_src.s_addr = 0; 788 else if (filter4->ff_mask.mask_src.s_addr == 0) 789 filter4->ff_mask.mask_src.s_addr = 0xffffffff; 790 791 /* clear extra bits in addresses */ 792 filter4->ff_flow.fi_dst.s_addr &= 793 filter4->ff_mask.mask_dst.s_addr; 794 filter4->ff_flow.fi_src.s_addr &= 795 filter4->ff_mask.mask_src.s_addr; 796 797 /* 798 * if dst address is a wildcard, use hash-entry 799 * ACC_WILDCARD_INDEX. 800 */ 801 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) 802 i = ACC_WILDCARD_INDEX; 803 else 804 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); 805 } 806 #ifdef INET6 807 else if (filter->ff_flow.fi_family == AF_INET6) { 808 struct flow_filter6 *filter6 = 809 (struct flow_filter6 *)&afp->f_filter; 810 #ifndef IN6MASK0 /* taken from kame ipv6 */ 811 #define IN6MASK0 {{{ 0, 0, 0, 0 }}} 812 #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} 813 const struct in6_addr in6mask0 = IN6MASK0; 814 const struct in6_addr in6mask128 = IN6MASK128; 815 #endif 816 817 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) 818 filter6->ff_mask6.mask6_dst = in6mask0; 819 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) 820 filter6->ff_mask6.mask6_dst = in6mask128; 821 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) 822 filter6->ff_mask6.mask6_src = in6mask0; 823 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) 824 filter6->ff_mask6.mask6_src = in6mask128; 825 826 /* clear extra bits in addresses */ 827 for (i = 0; i < 16; i++) 828 filter6->ff_flow6.fi6_dst.s6_addr[i] &= 829 filter6->ff_mask6.mask6_dst.s6_addr[i]; 830 for (i = 0; i < 16; i++) 831 filter6->ff_flow6.fi6_src.s6_addr[i] &= 832 filter6->ff_mask6.mask6_src.s6_addr[i]; 833 834 if (filter6->ff_flow6.fi6_flowlabel == 0) 835 i = ACC_WILDCARD_INDEX; 836 else 837 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); 838 } 839 #endif /* INET6 */ 840 841 afp->f_handle = get_filt_handle(classifier, i); 842 843 /* update filter bitmask */ 844 afp->f_fbmask = filt2fibmask(filter); 845 classifier->acc_fbmask |= afp->f_fbmask; 846 847 /* 848 * add this filter to the filter list. 849 * filters are ordered from the highest rule number. 850 */ 851 s = splnet(); 852 prev = NULL; 853 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { 854 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) 855 prev = tmp; 856 else 857 break; 858 } 859 if (prev == NULL) 860 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); 861 else 862 LIST_INSERT_AFTER(prev, afp, f_chain); 863 splx(s); 864 865 *phandle = afp->f_handle; 866 return (0); 867 } 868 869 int 870 acc_delete_filter(classifier, handle) 871 struct acc_classifier *classifier; 872 u_long handle; 873 { 874 struct acc_filter *afp; 875 int s; 876 877 if ((afp = filth_to_filtp(classifier, handle)) == NULL) 878 return (EINVAL); 879 880 s = splnet(); 881 LIST_REMOVE(afp, f_chain); 882 splx(s); 883 884 FREE(afp, M_DEVBUF); 885 886 /* todo: update filt_bmask */ 887 888 return (0); 889 } 890 891 /* 892 * delete filters referencing to the specified class. 893 * if the all flag is not 0, delete all the filters. 894 */ 895 int 896 acc_discard_filters(classifier, class, all) 897 struct acc_classifier *classifier; 898 void *class; 899 int all; 900 { 901 struct acc_filter *afp; 902 int i, s; 903 904 s = splnet(); 905 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { 906 do { 907 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 908 if (all || afp->f_class == class) { 909 LIST_REMOVE(afp, f_chain); 910 FREE(afp, M_DEVBUF); 911 /* start again from the head */ 912 break; 913 } 914 } while (afp != NULL); 915 } 916 splx(s); 917 918 if (all) 919 classifier->acc_fbmask = 0; 920 921 return (0); 922 } 923 924 void * 925 acc_classify(clfier, m, af) 926 void *clfier; 927 struct mbuf *m; 928 int af; 929 { 930 struct acc_classifier *classifier; 931 struct flowinfo flow; 932 struct acc_filter *afp; 933 int i; 934 935 classifier = (struct acc_classifier *)clfier; 936 altq_extractflow(m, af, &flow, classifier->acc_fbmask); 937 938 if (flow.fi_family == AF_INET) { 939 struct flowinfo_in *fp = (struct flowinfo_in *)&flow; 940 941 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { 942 /* only tos is used */ 943 LIST_FOREACH(afp, 944 &classifier->acc_filters[ACC_WILDCARD_INDEX], 945 f_chain) 946 if (apply_tosfilter4(afp->f_fbmask, 947 &afp->f_filter, fp)) 948 /* filter matched */ 949 return (afp->f_class); 950 } else if ((classifier->acc_fbmask & 951 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) 952 == 0) { 953 /* only proto and ports are used */ 954 LIST_FOREACH(afp, 955 &classifier->acc_filters[ACC_WILDCARD_INDEX], 956 f_chain) 957 if (apply_ppfilter4(afp->f_fbmask, 958 &afp->f_filter, fp)) 959 /* filter matched */ 960 return (afp->f_class); 961 } else { 962 /* get the filter hash entry from its dest address */ 963 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); 964 do { 965 /* 966 * go through this loop twice. first for dst 967 * hash, second for wildcards. 968 */ 969 LIST_FOREACH(afp, &classifier->acc_filters[i], 970 f_chain) 971 if (apply_filter4(afp->f_fbmask, 972 &afp->f_filter, fp)) 973 /* filter matched */ 974 return (afp->f_class); 975 976 /* 977 * check again for filters with a dst addr 978 * wildcard. 979 * (daddr == 0 || dmask != 0xffffffff). 980 */ 981 if (i != ACC_WILDCARD_INDEX) 982 i = ACC_WILDCARD_INDEX; 983 else 984 break; 985 } while (1); 986 } 987 } 988 #ifdef INET6 989 else if (flow.fi_family == AF_INET6) { 990 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; 991 992 /* get the filter hash entry from its flow ID */ 993 if (fp6->fi6_flowlabel != 0) 994 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); 995 else 996 /* flowlable can be zero */ 997 i = ACC_WILDCARD_INDEX; 998 999 /* go through this loop twice. first for flow hash, second 1000 for wildcards. */ 1001 do { 1002 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1003 if (apply_filter6(afp->f_fbmask, 1004 (struct flow_filter6 *)&afp->f_filter, 1005 fp6)) 1006 /* filter matched */ 1007 return (afp->f_class); 1008 1009 /* 1010 * check again for filters with a wildcard. 1011 */ 1012 if (i != ACC_WILDCARD_INDEX) 1013 i = ACC_WILDCARD_INDEX; 1014 else 1015 break; 1016 } while (1); 1017 } 1018 #endif /* INET6 */ 1019 1020 /* no filter matched */ 1021 return (NULL); 1022 } 1023 1024 static int 1025 apply_filter4(fbmask, filt, pkt) 1026 u_int32_t fbmask; 1027 struct flow_filter *filt; 1028 struct flowinfo_in *pkt; 1029 { 1030 if (filt->ff_flow.fi_family != AF_INET) 1031 return (0); 1032 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1033 return (0); 1034 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1035 return (0); 1036 if ((fbmask & FIMB4_DADDR) && 1037 filt->ff_flow.fi_dst.s_addr != 1038 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) 1039 return (0); 1040 if ((fbmask & FIMB4_SADDR) && 1041 filt->ff_flow.fi_src.s_addr != 1042 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) 1043 return (0); 1044 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1045 return (0); 1046 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1047 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1048 return (0); 1049 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) 1050 return (0); 1051 /* match */ 1052 return (1); 1053 } 1054 1055 /* 1056 * filter matching function optimized for a common case that checks 1057 * only protocol and port numbers 1058 */ 1059 static int 1060 apply_ppfilter4(fbmask, filt, pkt) 1061 u_int32_t fbmask; 1062 struct flow_filter *filt; 1063 struct flowinfo_in *pkt; 1064 { 1065 if (filt->ff_flow.fi_family != AF_INET) 1066 return (0); 1067 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1068 return (0); 1069 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1070 return (0); 1071 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1072 return (0); 1073 /* match */ 1074 return (1); 1075 } 1076 1077 /* 1078 * filter matching function only for tos field. 1079 */ 1080 static int 1081 apply_tosfilter4(fbmask, filt, pkt) 1082 u_int32_t fbmask; 1083 struct flow_filter *filt; 1084 struct flowinfo_in *pkt; 1085 { 1086 if (filt->ff_flow.fi_family != AF_INET) 1087 return (0); 1088 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1089 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1090 return (0); 1091 /* match */ 1092 return (1); 1093 } 1094 1095 #ifdef INET6 1096 static int 1097 apply_filter6(fbmask, filt, pkt) 1098 u_int32_t fbmask; 1099 struct flow_filter6 *filt; 1100 struct flowinfo_in6 *pkt; 1101 { 1102 int i; 1103 1104 if (filt->ff_flow6.fi6_family != AF_INET6) 1105 return (0); 1106 if ((fbmask & FIMB6_FLABEL) && 1107 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) 1108 return (0); 1109 if ((fbmask & FIMB6_PROTO) && 1110 filt->ff_flow6.fi6_proto != pkt->fi6_proto) 1111 return (0); 1112 if ((fbmask & FIMB6_SPORT) && 1113 filt->ff_flow6.fi6_sport != pkt->fi6_sport) 1114 return (0); 1115 if ((fbmask & FIMB6_DPORT) && 1116 filt->ff_flow6.fi6_dport != pkt->fi6_dport) 1117 return (0); 1118 if (fbmask & FIMB6_SADDR) { 1119 for (i = 0; i < 4; i++) 1120 if (filt->ff_flow6.fi6_src.s6_addr32[i] != 1121 (pkt->fi6_src.s6_addr32[i] & 1122 filt->ff_mask6.mask6_src.s6_addr32[i])) 1123 return (0); 1124 } 1125 if (fbmask & FIMB6_DADDR) { 1126 for (i = 0; i < 4; i++) 1127 if (filt->ff_flow6.fi6_dst.s6_addr32[i] != 1128 (pkt->fi6_dst.s6_addr32[i] & 1129 filt->ff_mask6.mask6_dst.s6_addr32[i])) 1130 return (0); 1131 } 1132 if ((fbmask & FIMB6_TCLASS) && 1133 filt->ff_flow6.fi6_tclass != 1134 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) 1135 return (0); 1136 if ((fbmask & FIMB6_GPI) && 1137 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) 1138 return (0); 1139 /* match */ 1140 return (1); 1141 } 1142 #endif /* INET6 */ 1143 1144 /* 1145 * filter handle: 1146 * bit 20-28: index to the filter hash table 1147 * bit 0-19: unique id in the hash bucket. 1148 */ 1149 static u_long 1150 get_filt_handle(classifier, i) 1151 struct acc_classifier *classifier; 1152 int i; 1153 { 1154 static u_long handle_number = 1; 1155 u_long handle; 1156 struct acc_filter *afp; 1157 1158 while (1) { 1159 handle = handle_number++ & 0x000fffff; 1160 1161 if (LIST_EMPTY(&classifier->acc_filters[i])) 1162 break; 1163 1164 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1165 if ((afp->f_handle & 0x000fffff) == handle) 1166 break; 1167 if (afp == NULL) 1168 break; 1169 /* this handle is already used, try again */ 1170 } 1171 1172 return ((i << 20) | handle); 1173 } 1174 1175 /* convert filter handle to filter pointer */ 1176 static struct acc_filter * 1177 filth_to_filtp(classifier, handle) 1178 struct acc_classifier *classifier; 1179 u_long handle; 1180 { 1181 struct acc_filter *afp; 1182 int i; 1183 1184 i = ACC_GET_HINDEX(handle); 1185 1186 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1187 if (afp->f_handle == handle) 1188 return (afp); 1189 1190 return (NULL); 1191 } 1192 1193 /* create flowinfo bitmask */ 1194 static u_int32_t 1195 filt2fibmask(filt) 1196 struct flow_filter *filt; 1197 { 1198 u_int32_t mask = 0; 1199 #ifdef INET6 1200 struct flow_filter6 *filt6; 1201 #endif 1202 1203 switch (filt->ff_flow.fi_family) { 1204 case AF_INET: 1205 if (filt->ff_flow.fi_proto != 0) 1206 mask |= FIMB4_PROTO; 1207 if (filt->ff_flow.fi_tos != 0) 1208 mask |= FIMB4_TOS; 1209 if (filt->ff_flow.fi_dst.s_addr != 0) 1210 mask |= FIMB4_DADDR; 1211 if (filt->ff_flow.fi_src.s_addr != 0) 1212 mask |= FIMB4_SADDR; 1213 if (filt->ff_flow.fi_sport != 0) 1214 mask |= FIMB4_SPORT; 1215 if (filt->ff_flow.fi_dport != 0) 1216 mask |= FIMB4_DPORT; 1217 if (filt->ff_flow.fi_gpi != 0) 1218 mask |= FIMB4_GPI; 1219 break; 1220 #ifdef INET6 1221 case AF_INET6: 1222 filt6 = (struct flow_filter6 *)filt; 1223 1224 if (filt6->ff_flow6.fi6_proto != 0) 1225 mask |= FIMB6_PROTO; 1226 if (filt6->ff_flow6.fi6_tclass != 0) 1227 mask |= FIMB6_TCLASS; 1228 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) 1229 mask |= FIMB6_DADDR; 1230 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) 1231 mask |= FIMB6_SADDR; 1232 if (filt6->ff_flow6.fi6_sport != 0) 1233 mask |= FIMB6_SPORT; 1234 if (filt6->ff_flow6.fi6_dport != 0) 1235 mask |= FIMB6_DPORT; 1236 if (filt6->ff_flow6.fi6_gpi != 0) 1237 mask |= FIMB6_GPI; 1238 if (filt6->ff_flow6.fi6_flowlabel != 0) 1239 mask |= FIMB6_FLABEL; 1240 break; 1241 #endif /* INET6 */ 1242 } 1243 return (mask); 1244 } 1245 1246 1247 /* 1248 * helper functions to handle IPv4 fragments. 1249 * currently only in-sequence fragments are handled. 1250 * - fragment info is cached in a LRU list. 1251 * - when a first fragment is found, cache its flow info. 1252 * - when a non-first fragment is found, lookup the cache. 1253 */ 1254 1255 struct ip4_frag { 1256 TAILQ_ENTRY(ip4_frag) ip4f_chain; 1257 char ip4f_valid; 1258 u_short ip4f_id; 1259 struct flowinfo_in ip4f_info; 1260 }; 1261 1262 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ 1263 1264 #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ 1265 1266 1267 static void 1268 ip4f_cache(ip, fin) 1269 struct ip *ip; 1270 struct flowinfo_in *fin; 1271 { 1272 struct ip4_frag *fp; 1273 1274 if (TAILQ_EMPTY(&ip4f_list)) { 1275 /* first time call, allocate fragment cache entries. */ 1276 if (ip4f_init() < 0) 1277 /* allocation failed! */ 1278 return; 1279 } 1280 1281 fp = ip4f_alloc(); 1282 fp->ip4f_id = ip->ip_id; 1283 fp->ip4f_info.fi_proto = ip->ip_p; 1284 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; 1285 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; 1286 1287 /* save port numbers */ 1288 fp->ip4f_info.fi_sport = fin->fi_sport; 1289 fp->ip4f_info.fi_dport = fin->fi_dport; 1290 fp->ip4f_info.fi_gpi = fin->fi_gpi; 1291 } 1292 1293 static int 1294 ip4f_lookup(ip, fin) 1295 struct ip *ip; 1296 struct flowinfo_in *fin; 1297 { 1298 struct ip4_frag *fp; 1299 1300 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; 1301 fp = TAILQ_NEXT(fp, ip4f_chain)) 1302 if (ip->ip_id == fp->ip4f_id && 1303 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && 1304 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && 1305 ip->ip_p == fp->ip4f_info.fi_proto) { 1306 1307 /* found the matching entry */ 1308 fin->fi_sport = fp->ip4f_info.fi_sport; 1309 fin->fi_dport = fp->ip4f_info.fi_dport; 1310 fin->fi_gpi = fp->ip4f_info.fi_gpi; 1311 1312 if ((ntohs(ip->ip_off) & IP_MF) == 0) 1313 /* this is the last fragment, 1314 release the entry. */ 1315 ip4f_free(fp); 1316 1317 return (1); 1318 } 1319 1320 /* no matching entry found */ 1321 return (0); 1322 } 1323 1324 static int 1325 ip4f_init(void) 1326 { 1327 struct ip4_frag *fp; 1328 int i; 1329 1330 TAILQ_INIT(&ip4f_list); 1331 for (i=0; i<IP4F_TABSIZE; i++) { 1332 MALLOC(fp, struct ip4_frag *, sizeof(struct ip4_frag), 1333 M_DEVBUF, M_NOWAIT); 1334 if (fp == NULL) { 1335 printf("ip4f_init: can't alloc %dth entry!\n", i); 1336 if (i == 0) 1337 return (-1); 1338 return (0); 1339 } 1340 fp->ip4f_valid = 0; 1341 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1342 } 1343 return (0); 1344 } 1345 1346 static struct ip4_frag * 1347 ip4f_alloc(void) 1348 { 1349 struct ip4_frag *fp; 1350 1351 /* reclaim an entry at the tail, put it at the head */ 1352 fp = TAILQ_LAST(&ip4f_list, ip4f_list); 1353 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1354 fp->ip4f_valid = 1; 1355 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); 1356 return (fp); 1357 } 1358 1359 static void 1360 ip4f_free(fp) 1361 struct ip4_frag *fp; 1362 { 1363 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1364 fp->ip4f_valid = 0; 1365 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1366 } 1367 1368 /* 1369 * read and write diffserv field in IPv4 or IPv6 header 1370 */ 1371 u_int8_t 1372 read_dsfield(m, pktattr) 1373 struct mbuf *m; 1374 struct altq_pktattr *pktattr; 1375 { 1376 struct mbuf *m0; 1377 u_int8_t ds_field = 0; 1378 1379 if (pktattr == NULL || 1380 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 1381 return ((u_int8_t)0); 1382 1383 /* verify that pattr_hdr is within the mbuf data */ 1384 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1385 if ((pktattr->pattr_hdr >= m0->m_data) && 1386 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 1387 break; 1388 if (m0 == NULL) { 1389 /* ick, pattr_hdr is stale */ 1390 pktattr->pattr_af = AF_UNSPEC; 1391 #ifdef ALTQ_DEBUG 1392 printf("read_dsfield: can't locate header!\n"); 1393 #endif 1394 return ((u_int8_t)0); 1395 } 1396 1397 if (pktattr->pattr_af == AF_INET) { 1398 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 1399 1400 if (ip->ip_v != 4) 1401 return ((u_int8_t)0); /* version mismatch! */ 1402 ds_field = ip->ip_tos; 1403 } 1404 #ifdef INET6 1405 else if (pktattr->pattr_af == AF_INET6) { 1406 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1407 u_int32_t flowlabel; 1408 1409 flowlabel = ntohl(ip6->ip6_flow); 1410 if ((flowlabel >> 28) != 6) 1411 return ((u_int8_t)0); /* version mismatch! */ 1412 ds_field = (flowlabel >> 20) & 0xff; 1413 } 1414 #endif 1415 return (ds_field); 1416 } 1417 1418 void 1419 write_dsfield(m, pktattr, dsfield) 1420 struct mbuf *m; 1421 struct altq_pktattr *pktattr; 1422 u_int8_t dsfield; 1423 { 1424 struct mbuf *m0; 1425 1426 if (pktattr == NULL || 1427 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 1428 return; 1429 1430 /* verify that pattr_hdr is within the mbuf data */ 1431 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1432 if ((pktattr->pattr_hdr >= m0->m_data) && 1433 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 1434 break; 1435 if (m0 == NULL) { 1436 /* ick, pattr_hdr is stale */ 1437 pktattr->pattr_af = AF_UNSPEC; 1438 #ifdef ALTQ_DEBUG 1439 printf("write_dsfield: can't locate header!\n"); 1440 #endif 1441 return; 1442 } 1443 1444 if (pktattr->pattr_af == AF_INET) { 1445 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 1446 u_int8_t old; 1447 int32_t sum; 1448 1449 if (ip->ip_v != 4) 1450 return; /* version mismatch! */ 1451 old = ip->ip_tos; 1452 dsfield |= old & 3; /* leave CU bits */ 1453 if (old == dsfield) 1454 return; 1455 ip->ip_tos = dsfield; 1456 /* 1457 * update checksum (from RFC1624) 1458 * HC' = ~(~HC + ~m + m') 1459 */ 1460 sum = ~ntohs(ip->ip_sum) & 0xffff; 1461 sum += 0xff00 + (~old & 0xff) + dsfield; 1462 sum = (sum >> 16) + (sum & 0xffff); 1463 sum += (sum >> 16); /* add carry */ 1464 1465 ip->ip_sum = htons(~sum & 0xffff); 1466 } 1467 #ifdef INET6 1468 else if (pktattr->pattr_af == AF_INET6) { 1469 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1470 u_int32_t flowlabel; 1471 1472 flowlabel = ntohl(ip6->ip6_flow); 1473 if ((flowlabel >> 28) != 6) 1474 return; /* version mismatch! */ 1475 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); 1476 ip6->ip6_flow = htonl(flowlabel); 1477 } 1478 #endif 1479 return; 1480 } 1481 1482 1483 /* 1484 * high resolution clock support taking advantage of a machine dependent 1485 * high resolution time counter (e.g., timestamp counter of intel pentium). 1486 * we assume 1487 * - 64-bit-long monotonically-increasing counter 1488 * - frequency range is 100M-4GHz (CPU speed) 1489 */ 1490 u_int32_t machclk_freq = 0; 1491 u_int32_t machclk_per_tick = 0; 1492 1493 #if (defined(__i386__) || defined(__alpha__)) && !defined(ALTQ_NOPCC) 1494 1495 #if defined(__FreeBSD__) && defined(SMP) 1496 #error SMP system! use ALTQ_NOPCC option. 1497 #endif 1498 1499 #ifdef __alpha__ 1500 #ifdef __FreeBSD__ 1501 extern u_int32_t cycles_per_sec; /* alpha cpu clock frequency */ 1502 #elif defined(__NetBSD__) || defined(__OpenBSD__) 1503 extern u_int64_t cycles_per_usec; /* alpha cpu clock frequency */ 1504 #endif 1505 #endif /* __alpha__ */ 1506 1507 void 1508 init_machclk(void) 1509 { 1510 /* sanity check */ 1511 #ifdef __i386__ 1512 /* check if TSC is available */ 1513 if ((cpu_feature & CPUID_TSC) == 0) { 1514 printf("altq: TSC isn't available! use ALTQ_NOPCC option.\n"); 1515 return; 1516 } 1517 #endif 1518 1519 /* 1520 * if the clock frequency (of Pentium TSC or Alpha PCC) is 1521 * accessible, just use it. 1522 */ 1523 #ifdef __i386__ 1524 #ifdef __FreeBSD__ 1525 #if (__FreeBSD_version > 300000) 1526 machclk_freq = tsc_freq; 1527 #else 1528 machclk_freq = i586_ctr_freq; 1529 #endif 1530 #elif defined(__NetBSD__) 1531 machclk_freq = (u_int32_t)curcpu()->ci_tsc_freq; 1532 #elif defined(__OpenBSD__) 1533 machclk_freq = pentium_mhz * 1000000; 1534 #endif 1535 #elif defined(__alpha__) 1536 #ifdef __FreeBSD__ 1537 machclk_freq = cycles_per_sec; 1538 #elif defined(__NetBSD__) || defined(__OpenBSD__) 1539 machclk_freq = (u_int32_t)(cycles_per_usec * 1000000); 1540 #endif 1541 #endif /* __alpha__ */ 1542 1543 /* 1544 * if we don't know the clock frequency, measure it. 1545 */ 1546 if (machclk_freq == 0) { 1547 static int wait; 1548 struct timeval tv_start, tv_end; 1549 u_int64_t start, end, diff; 1550 int timo; 1551 1552 microtime(&tv_start); 1553 start = read_machclk(); 1554 timo = hz; /* 1 sec */ 1555 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); 1556 microtime(&tv_end); 1557 end = read_machclk(); 1558 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 1559 + tv_end.tv_usec - tv_start.tv_usec; 1560 if (diff != 0) 1561 machclk_freq = (u_int)((end - start) * 1000000 / diff); 1562 } 1563 1564 machclk_per_tick = machclk_freq / hz; 1565 1566 #ifdef ALTQ_DEBUG 1567 printf("altq: CPU clock: %uHz\n", machclk_freq); 1568 #endif 1569 } 1570 1571 #ifdef __alpha__ 1572 /* 1573 * make a 64bit counter value out of the 32bit alpha processor cycle counter. 1574 * read_machclk must be called within a half of its wrap-around cycle 1575 * (about 5 sec for 400MHz cpu) to properly detect a counter wrap-around. 1576 * tbr_timeout calls read_machclk once a second. 1577 */ 1578 u_int64_t 1579 read_machclk(void) 1580 { 1581 static u_int32_t last_pcc, upper; 1582 u_int32_t pcc; 1583 1584 pcc = (u_int32_t)alpha_rpcc(); 1585 if (pcc <= last_pcc) 1586 upper++; 1587 last_pcc = pcc; 1588 return (((u_int64_t)upper << 32) + pcc); 1589 } 1590 #endif /* __alpha__ */ 1591 #else /* !i386 && !alpha */ 1592 /* use microtime() for now */ 1593 void 1594 init_machclk(void) 1595 { 1596 machclk_freq = 1000000 << MACHCLK_SHIFT; 1597 machclk_per_tick = machclk_freq / hz; 1598 printf("altq: emulate %uHz cpu clock\n", machclk_freq); 1599 } 1600 #endif /* !i386 && !alpha */ 1601