1 /* $FreeBSD$ */ 2 /* $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $ */ 3 4 /* 5 * Copyright (C) 1997-2003 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30 /* 31 * Copyright (c) 1990-1994 Regents of the University of California. 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. All advertising materials mentioning features or use of this software 43 * must display the following acknowledgement: 44 * This product includes software developed by the Computer Systems 45 * Engineering Group at Lawrence Berkeley Laboratory. 46 * 4. Neither the name of the University nor of the Laboratory may be used 47 * to endorse or promote products derived from this software without 48 * specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63 #if defined(__FreeBSD__) || defined(__NetBSD__) 64 #include "opt_altq.h" 65 #include "opt_inet.h" 66 #ifdef __FreeBSD__ 67 #include "opt_inet6.h" 68 #endif 69 #endif /* __FreeBSD__ || __NetBSD__ */ 70 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */ 71 72 #include <sys/param.h> 73 #include <sys/malloc.h> 74 #include <sys/mbuf.h> 75 #include <sys/socket.h> 76 #include <sys/systm.h> 77 #include <sys/errno.h> 78 #if 1 /* ALTQ3_COMPAT */ 79 #include <sys/sockio.h> 80 #include <sys/proc.h> 81 #include <sys/kernel.h> 82 #ifdef ALTQ_FLOWVALVE 83 #include <sys/queue.h> 84 #include <sys/time.h> 85 #endif 86 #endif /* ALTQ3_COMPAT */ 87 88 #include <net/if.h> 89 #include <net/if_var.h> 90 91 #include <netinet/in.h> 92 #include <netinet/in_systm.h> 93 #include <netinet/ip.h> 94 #ifdef INET6 95 #include <netinet/ip6.h> 96 #endif 97 98 #include <netpfil/pf/pf.h> 99 #include <netpfil/pf/pf_altq.h> 100 #include <netpfil/pf/pf_mtag.h> 101 #include <net/altq/altq.h> 102 #include <net/altq/altq_red.h> 103 #ifdef ALTQ3_COMPAT 104 #include <net/altq/altq_conf.h> 105 #ifdef ALTQ_FLOWVALVE 106 #include <net/altq/altq_flowvalve.h> 107 #endif 108 #endif 109 110 /* 111 * ALTQ/RED (Random Early Detection) implementation using 32-bit 112 * fixed-point calculation. 113 * 114 * written by kjc using the ns code as a reference. 115 * you can learn more about red and ns from Sally's home page at 116 * http://www-nrg.ee.lbl.gov/floyd/ 117 * 118 * most of the red parameter values are fixed in this implementation 119 * to prevent fixed-point overflow/underflow. 120 * if you change the parameters, watch out for overflow/underflow! 121 * 122 * the parameters used are recommended values by Sally. 123 * the corresponding ns config looks: 124 * q_weight=0.00195 125 * minthresh=5 maxthresh=15 queue-size=60 126 * linterm=30 127 * dropmech=drop-tail 128 * bytes=false (can't be handled by 32-bit fixed-point) 129 * doubleq=false dqthresh=false 130 * wait=true 131 */ 132 /* 133 * alternative red parameters for a slow link. 134 * 135 * assume the queue length becomes from zero to L and keeps L, it takes 136 * N packets for q_avg to reach 63% of L. 137 * when q_weight is 0.002, N is about 500 packets. 138 * for a slow link like dial-up, 500 packets takes more than 1 minute! 139 * when q_weight is 0.008, N is about 127 packets. 140 * when q_weight is 0.016, N is about 63 packets. 141 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets 142 * are allowed for 0.016. 143 * see Sally's paper for more details. 144 */ 145 /* normal red parameters */ 146 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ 147 /* q_weight = 0.00195 */ 148 149 /* red parameters for a slow link */ 150 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ 151 /* q_weight = 0.0078125 */ 152 153 /* red parameters for a very slow link (e.g., dialup) */ 154 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ 155 /* q_weight = 0.015625 */ 156 157 /* fixed-point uses 12-bit decimal places */ 158 #define FP_SHIFT 12 /* fixed-point shift */ 159 160 /* red parameters for drop probability */ 161 #define INV_P_MAX 10 /* inverse of max drop probability */ 162 #define TH_MIN 5 /* min threshold */ 163 #define TH_MAX 15 /* max threshold */ 164 165 #define RED_LIMIT 60 /* default max queue lenght */ 166 #define RED_STATS /* collect statistics */ 167 168 /* 169 * our default policy for forced-drop is drop-tail. 170 * (in altq-1.1.2 or earlier, the default was random-drop. 171 * but it makes more sense to punish the cause of the surge.) 172 * to switch to the random-drop policy, define "RED_RANDOM_DROP". 173 */ 174 175 #ifdef ALTQ3_COMPAT 176 #ifdef ALTQ_FLOWVALVE 177 /* 178 * flow-valve is an extention to protect red from unresponsive flows 179 * and to promote end-to-end congestion control. 180 * flow-valve observes the average drop rates of the flows that have 181 * experienced packet drops in the recent past. 182 * when the average drop rate exceeds the threshold, the flow is 183 * blocked by the flow-valve. the trapped flow should back off 184 * exponentially to escape from the flow-valve. 185 */ 186 #ifdef RED_RANDOM_DROP 187 #error "random-drop can't be used with flow-valve!" 188 #endif 189 #endif /* ALTQ_FLOWVALVE */ 190 191 /* red_list keeps all red_queue_t's allocated. */ 192 static red_queue_t *red_list = NULL; 193 194 #endif /* ALTQ3_COMPAT */ 195 196 /* default red parameter values */ 197 static int default_th_min = TH_MIN; 198 static int default_th_max = TH_MAX; 199 static int default_inv_pmax = INV_P_MAX; 200 201 #ifdef ALTQ3_COMPAT 202 /* internal function prototypes */ 203 static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 204 static struct mbuf *red_dequeue(struct ifaltq *, int); 205 static int red_request(struct ifaltq *, int, void *); 206 static void red_purgeq(red_queue_t *); 207 static int red_detach(red_queue_t *); 208 #ifdef ALTQ_FLOWVALVE 209 static __inline struct fve *flowlist_lookup(struct flowvalve *, 210 struct altq_pktattr *, struct timeval *); 211 static __inline struct fve *flowlist_reclaim(struct flowvalve *, 212 struct altq_pktattr *); 213 static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *); 214 static __inline int fv_p2f(struct flowvalve *, int); 215 #if 0 /* XXX: make the compiler happy (fv_alloc unused) */ 216 static struct flowvalve *fv_alloc(struct red *); 217 #endif 218 static void fv_destroy(struct flowvalve *); 219 static int fv_checkflow(struct flowvalve *, struct altq_pktattr *, 220 struct fve **); 221 static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *, 222 struct fve *); 223 #endif 224 #endif /* ALTQ3_COMPAT */ 225 226 /* 227 * red support routines 228 */ 229 red_t * 230 red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags, 231 int pkttime) 232 { 233 red_t *rp; 234 int w, i; 235 int npkts_per_sec; 236 237 rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO); 238 if (rp == NULL) 239 return (NULL); 240 241 if (weight == 0) 242 rp->red_weight = W_WEIGHT; 243 else 244 rp->red_weight = weight; 245 246 /* allocate weight table */ 247 rp->red_wtab = wtab_alloc(rp->red_weight); 248 if (rp->red_wtab == NULL) { 249 free(rp, M_DEVBUF); 250 return (NULL); 251 } 252 253 rp->red_avg = 0; 254 rp->red_idle = 1; 255 256 if (inv_pmax == 0) 257 rp->red_inv_pmax = default_inv_pmax; 258 else 259 rp->red_inv_pmax = inv_pmax; 260 if (th_min == 0) 261 rp->red_thmin = default_th_min; 262 else 263 rp->red_thmin = th_min; 264 if (th_max == 0) 265 rp->red_thmax = default_th_max; 266 else 267 rp->red_thmax = th_max; 268 269 rp->red_flags = flags; 270 271 if (pkttime == 0) 272 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 273 rp->red_pkttime = 800; 274 else 275 rp->red_pkttime = pkttime; 276 277 if (weight == 0) { 278 /* when the link is very slow, adjust red parameters */ 279 npkts_per_sec = 1000000 / rp->red_pkttime; 280 if (npkts_per_sec < 50) { 281 /* up to about 400Kbps */ 282 rp->red_weight = W_WEIGHT_2; 283 } else if (npkts_per_sec < 300) { 284 /* up to about 2.4Mbps */ 285 rp->red_weight = W_WEIGHT_1; 286 } 287 } 288 289 /* calculate wshift. weight must be power of 2 */ 290 w = rp->red_weight; 291 for (i = 0; w > 1; i++) 292 w = w >> 1; 293 rp->red_wshift = i; 294 w = 1 << rp->red_wshift; 295 if (w != rp->red_weight) { 296 printf("invalid weight value %d for red! use %d\n", 297 rp->red_weight, w); 298 rp->red_weight = w; 299 } 300 301 /* 302 * thmin_s and thmax_s are scaled versions of th_min and th_max 303 * to be compared with avg. 304 */ 305 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); 306 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); 307 308 /* 309 * precompute probability denominator 310 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point 311 */ 312 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) 313 * rp->red_inv_pmax) << FP_SHIFT; 314 315 microtime(&rp->red_last); 316 return (rp); 317 } 318 319 void 320 red_destroy(red_t *rp) 321 { 322 #ifdef ALTQ3_COMPAT 323 #ifdef ALTQ_FLOWVALVE 324 if (rp->red_flowvalve != NULL) 325 fv_destroy(rp->red_flowvalve); 326 #endif 327 #endif /* ALTQ3_COMPAT */ 328 wtab_destroy(rp->red_wtab); 329 free(rp, M_DEVBUF); 330 } 331 332 void 333 red_getstats(red_t *rp, struct redstats *sp) 334 { 335 sp->q_avg = rp->red_avg >> rp->red_wshift; 336 sp->xmit_cnt = rp->red_stats.xmit_cnt; 337 sp->drop_cnt = rp->red_stats.drop_cnt; 338 sp->drop_forced = rp->red_stats.drop_forced; 339 sp->drop_unforced = rp->red_stats.drop_unforced; 340 sp->marked_packets = rp->red_stats.marked_packets; 341 } 342 343 int 344 red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, 345 struct altq_pktattr *pktattr) 346 { 347 int avg, droptype; 348 int n; 349 #ifdef ALTQ3_COMPAT 350 #ifdef ALTQ_FLOWVALVE 351 struct fve *fve = NULL; 352 353 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0) 354 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) { 355 m_freem(m); 356 return (-1); 357 } 358 #endif 359 #endif /* ALTQ3_COMPAT */ 360 361 avg = rp->red_avg; 362 363 /* 364 * if we were idle, we pretend that n packets arrived during 365 * the idle period. 366 */ 367 if (rp->red_idle) { 368 struct timeval now; 369 int t; 370 371 rp->red_idle = 0; 372 microtime(&now); 373 t = (now.tv_sec - rp->red_last.tv_sec); 374 if (t > 60) { 375 /* 376 * being idle for more than 1 minute, set avg to zero. 377 * this prevents t from overflow. 378 */ 379 avg = 0; 380 } else { 381 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec); 382 n = t / rp->red_pkttime - 1; 383 384 /* the following line does (avg = (1 - Wq)^n * avg) */ 385 if (n > 0) 386 avg = (avg >> FP_SHIFT) * 387 pow_w(rp->red_wtab, n); 388 } 389 } 390 391 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */ 392 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift); 393 rp->red_avg = avg; /* save the new value */ 394 395 /* 396 * red_count keeps a tally of arriving traffic that has not 397 * been dropped. 398 */ 399 rp->red_count++; 400 401 /* see if we drop early */ 402 droptype = DTYPE_NODROP; 403 if (avg >= rp->red_thmin_s && qlen(q) > 1) { 404 if (avg >= rp->red_thmax_s) { 405 /* avg >= th_max: forced drop */ 406 droptype = DTYPE_FORCED; 407 } else if (rp->red_old == 0) { 408 /* first exceeds th_min */ 409 rp->red_count = 1; 410 rp->red_old = 1; 411 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, 412 rp->red_probd, rp->red_count)) { 413 /* mark or drop by red */ 414 if ((rp->red_flags & REDF_ECN) && 415 mark_ecn(m, pktattr, rp->red_flags)) { 416 /* successfully marked. do not drop. */ 417 rp->red_count = 0; 418 #ifdef RED_STATS 419 rp->red_stats.marked_packets++; 420 #endif 421 } else { 422 /* unforced drop by red */ 423 droptype = DTYPE_EARLY; 424 } 425 } 426 } else { 427 /* avg < th_min */ 428 rp->red_old = 0; 429 } 430 431 /* 432 * if the queue length hits the hard limit, it's a forced drop. 433 */ 434 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 435 droptype = DTYPE_FORCED; 436 437 #ifdef RED_RANDOM_DROP 438 /* if successful or forced drop, enqueue this packet. */ 439 if (droptype != DTYPE_EARLY) 440 _addq(q, m); 441 #else 442 /* if successful, enqueue this packet. */ 443 if (droptype == DTYPE_NODROP) 444 _addq(q, m); 445 #endif 446 if (droptype != DTYPE_NODROP) { 447 if (droptype == DTYPE_EARLY) { 448 /* drop the incoming packet */ 449 #ifdef RED_STATS 450 rp->red_stats.drop_unforced++; 451 #endif 452 } else { 453 /* forced drop, select a victim packet in the queue. */ 454 #ifdef RED_RANDOM_DROP 455 m = _getq_random(q); 456 #endif 457 #ifdef RED_STATS 458 rp->red_stats.drop_forced++; 459 #endif 460 } 461 #ifdef RED_STATS 462 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m)); 463 #endif 464 rp->red_count = 0; 465 #ifdef ALTQ3_COMPAT 466 #ifdef ALTQ_FLOWVALVE 467 if (rp->red_flowvalve != NULL) 468 fv_dropbyred(rp->red_flowvalve, pktattr, fve); 469 #endif 470 #endif /* ALTQ3_COMPAT */ 471 m_freem(m); 472 return (-1); 473 } 474 /* successfully queued */ 475 #ifdef RED_STATS 476 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m)); 477 #endif 478 return (0); 479 } 480 481 /* 482 * early-drop probability is calculated as follows: 483 * prob = p_max * (avg - th_min) / (th_max - th_min) 484 * prob_a = prob / (2 - count*prob) 485 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min)) 486 * here prob_a increases as successive undrop count increases. 487 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)), 488 * becomes 1 when (count >= (2 / prob))). 489 */ 490 int 491 drop_early(int fp_len, int fp_probd, int count) 492 { 493 int d; /* denominator of drop-probability */ 494 495 d = fp_probd - count * fp_len; 496 if (d <= 0) 497 /* count exceeds the hard limit: drop or mark */ 498 return (1); 499 500 /* 501 * now the range of d is [1..600] in fixed-point. (when 502 * th_max-th_min=10 and p_max=1/30) 503 * drop probability = (avg - TH_MIN) / d 504 */ 505 506 if ((arc4random() % d) < fp_len) { 507 /* drop or mark */ 508 return (1); 509 } 510 /* no drop/mark */ 511 return (0); 512 } 513 514 /* 515 * try to mark CE bit to the packet. 516 * returns 1 if successfully marked, 0 otherwise. 517 */ 518 int 519 mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) 520 { 521 struct mbuf *m0; 522 struct pf_mtag *at; 523 void *hdr; 524 525 at = pf_find_mtag(m); 526 if (at != NULL) { 527 hdr = at->hdr; 528 #ifdef ALTQ3_COMPAT 529 } else if (pktattr != NULL) { 530 af = pktattr->pattr_af; 531 hdr = pktattr->pattr_hdr; 532 #endif /* ALTQ3_COMPAT */ 533 } else 534 return (0); 535 536 /* verify that pattr_hdr is within the mbuf data */ 537 for (m0 = m; m0 != NULL; m0 = m0->m_next) 538 if (((caddr_t)hdr >= m0->m_data) && 539 ((caddr_t)hdr < m0->m_data + m0->m_len)) 540 break; 541 if (m0 == NULL) { 542 /* ick, tag info is stale */ 543 return (0); 544 } 545 546 switch (((struct ip *)hdr)->ip_v) { 547 case IPVERSION: 548 if (flags & REDF_ECN4) { 549 struct ip *ip = hdr; 550 u_int8_t otos; 551 int sum; 552 553 if (ip->ip_v != 4) 554 return (0); /* version mismatch! */ 555 556 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) 557 return (0); /* not-ECT */ 558 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 559 return (1); /* already marked */ 560 561 /* 562 * ecn-capable but not marked, 563 * mark CE and update checksum 564 */ 565 otos = ip->ip_tos; 566 ip->ip_tos |= IPTOS_ECN_CE; 567 /* 568 * update checksum (from RFC1624) 569 * HC' = ~(~HC + ~m + m') 570 */ 571 sum = ~ntohs(ip->ip_sum) & 0xffff; 572 sum += (~otos & 0xffff) + ip->ip_tos; 573 sum = (sum >> 16) + (sum & 0xffff); 574 sum += (sum >> 16); /* add carry */ 575 ip->ip_sum = htons(~sum & 0xffff); 576 return (1); 577 } 578 break; 579 #ifdef INET6 580 case (IPV6_VERSION >> 4): 581 if (flags & REDF_ECN6) { 582 struct ip6_hdr *ip6 = hdr; 583 u_int32_t flowlabel; 584 585 flowlabel = ntohl(ip6->ip6_flow); 586 if ((flowlabel >> 28) != 6) 587 return (0); /* version mismatch! */ 588 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 589 (IPTOS_ECN_NOTECT << 20)) 590 return (0); /* not-ECT */ 591 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 592 (IPTOS_ECN_CE << 20)) 593 return (1); /* already marked */ 594 /* 595 * ecn-capable but not marked, mark CE 596 */ 597 flowlabel |= (IPTOS_ECN_CE << 20); 598 ip6->ip6_flow = htonl(flowlabel); 599 return (1); 600 } 601 break; 602 #endif /* INET6 */ 603 } 604 605 /* not marked */ 606 return (0); 607 } 608 609 struct mbuf * 610 red_getq(rp, q) 611 red_t *rp; 612 class_queue_t *q; 613 { 614 struct mbuf *m; 615 616 if ((m = _getq(q)) == NULL) { 617 if (rp->red_idle == 0) { 618 rp->red_idle = 1; 619 microtime(&rp->red_last); 620 } 621 return NULL; 622 } 623 624 rp->red_idle = 0; 625 return (m); 626 } 627 628 /* 629 * helper routine to calibrate avg during idle. 630 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point 631 * here Wq = 1/weight and the code assumes Wq is close to zero. 632 * 633 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point. 634 */ 635 static struct wtab *wtab_list = NULL; /* pointer to wtab list */ 636 637 struct wtab * 638 wtab_alloc(int weight) 639 { 640 struct wtab *w; 641 int i; 642 643 for (w = wtab_list; w != NULL; w = w->w_next) 644 if (w->w_weight == weight) { 645 w->w_refcount++; 646 return (w); 647 } 648 649 w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO); 650 if (w == NULL) 651 return (NULL); 652 w->w_weight = weight; 653 w->w_refcount = 1; 654 w->w_next = wtab_list; 655 wtab_list = w; 656 657 /* initialize the weight table */ 658 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight; 659 for (i = 1; i < 32; i++) { 660 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT; 661 if (w->w_tab[i] == 0 && w->w_param_max == 0) 662 w->w_param_max = 1 << i; 663 } 664 665 return (w); 666 } 667 668 int 669 wtab_destroy(struct wtab *w) 670 { 671 struct wtab *prev; 672 673 if (--w->w_refcount > 0) 674 return (0); 675 676 if (wtab_list == w) 677 wtab_list = w->w_next; 678 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next) 679 if (prev->w_next == w) { 680 prev->w_next = w->w_next; 681 break; 682 } 683 684 free(w, M_DEVBUF); 685 return (0); 686 } 687 688 int32_t 689 pow_w(struct wtab *w, int n) 690 { 691 int i, bit; 692 int32_t val; 693 694 if (n >= w->w_param_max) 695 return (0); 696 697 val = 1 << FP_SHIFT; 698 if (n <= 0) 699 return (val); 700 701 bit = 1; 702 i = 0; 703 while (n) { 704 if (n & bit) { 705 val = (val * w->w_tab[i]) >> FP_SHIFT; 706 n &= ~bit; 707 } 708 i++; 709 bit <<= 1; 710 } 711 return (val); 712 } 713 714 #ifdef ALTQ3_COMPAT 715 /* 716 * red device interface 717 */ 718 altqdev_decl(red); 719 720 int 721 redopen(dev, flag, fmt, p) 722 dev_t dev; 723 int flag, fmt; 724 #if (__FreeBSD_version > 500000) 725 struct thread *p; 726 #else 727 struct proc *p; 728 #endif 729 { 730 /* everything will be done when the queueing scheme is attached. */ 731 return 0; 732 } 733 734 int 735 redclose(dev, flag, fmt, p) 736 dev_t dev; 737 int flag, fmt; 738 #if (__FreeBSD_version > 500000) 739 struct thread *p; 740 #else 741 struct proc *p; 742 #endif 743 { 744 red_queue_t *rqp; 745 int err, error = 0; 746 747 while ((rqp = red_list) != NULL) { 748 /* destroy all */ 749 err = red_detach(rqp); 750 if (err != 0 && error == 0) 751 error = err; 752 } 753 754 return error; 755 } 756 757 int 758 redioctl(dev, cmd, addr, flag, p) 759 dev_t dev; 760 ioctlcmd_t cmd; 761 caddr_t addr; 762 int flag; 763 #if (__FreeBSD_version > 500000) 764 struct thread *p; 765 #else 766 struct proc *p; 767 #endif 768 { 769 red_queue_t *rqp; 770 struct red_interface *ifacep; 771 struct ifnet *ifp; 772 int error = 0; 773 774 /* check super-user privilege */ 775 switch (cmd) { 776 case RED_GETSTATS: 777 break; 778 default: 779 #if (__FreeBSD_version > 700000) 780 if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) 781 #elsif (__FreeBSD_version > 400000) 782 if ((error = suser(p)) != 0) 783 #else 784 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 785 #endif 786 return (error); 787 break; 788 } 789 790 switch (cmd) { 791 792 case RED_ENABLE: 793 ifacep = (struct red_interface *)addr; 794 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 795 error = EBADF; 796 break; 797 } 798 error = altq_enable(rqp->rq_ifq); 799 break; 800 801 case RED_DISABLE: 802 ifacep = (struct red_interface *)addr; 803 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 804 error = EBADF; 805 break; 806 } 807 error = altq_disable(rqp->rq_ifq); 808 break; 809 810 case RED_IF_ATTACH: 811 ifp = ifunit(((struct red_interface *)addr)->red_ifname); 812 if (ifp == NULL) { 813 error = ENXIO; 814 break; 815 } 816 817 /* allocate and initialize red_queue_t */ 818 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK); 819 if (rqp == NULL) { 820 error = ENOMEM; 821 break; 822 } 823 bzero(rqp, sizeof(red_queue_t)); 824 825 rqp->rq_q = malloc(sizeof(class_queue_t), 826 M_DEVBUF, M_WAITOK); 827 if (rqp->rq_q == NULL) { 828 free(rqp, M_DEVBUF); 829 error = ENOMEM; 830 break; 831 } 832 bzero(rqp->rq_q, sizeof(class_queue_t)); 833 834 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0); 835 if (rqp->rq_red == NULL) { 836 free(rqp->rq_q, M_DEVBUF); 837 free(rqp, M_DEVBUF); 838 error = ENOMEM; 839 break; 840 } 841 842 rqp->rq_ifq = &ifp->if_snd; 843 qtail(rqp->rq_q) = NULL; 844 qlen(rqp->rq_q) = 0; 845 qlimit(rqp->rq_q) = RED_LIMIT; 846 qtype(rqp->rq_q) = Q_RED; 847 848 /* 849 * set RED to this ifnet structure. 850 */ 851 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp, 852 red_enqueue, red_dequeue, red_request, 853 NULL, NULL); 854 if (error) { 855 red_destroy(rqp->rq_red); 856 free(rqp->rq_q, M_DEVBUF); 857 free(rqp, M_DEVBUF); 858 break; 859 } 860 861 /* add this state to the red list */ 862 rqp->rq_next = red_list; 863 red_list = rqp; 864 break; 865 866 case RED_IF_DETACH: 867 ifacep = (struct red_interface *)addr; 868 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 869 error = EBADF; 870 break; 871 } 872 error = red_detach(rqp); 873 break; 874 875 case RED_GETSTATS: 876 do { 877 struct red_stats *q_stats; 878 red_t *rp; 879 880 q_stats = (struct red_stats *)addr; 881 if ((rqp = altq_lookup(q_stats->iface.red_ifname, 882 ALTQT_RED)) == NULL) { 883 error = EBADF; 884 break; 885 } 886 887 q_stats->q_len = qlen(rqp->rq_q); 888 q_stats->q_limit = qlimit(rqp->rq_q); 889 890 rp = rqp->rq_red; 891 q_stats->q_avg = rp->red_avg >> rp->red_wshift; 892 q_stats->xmit_cnt = rp->red_stats.xmit_cnt; 893 q_stats->drop_cnt = rp->red_stats.drop_cnt; 894 q_stats->drop_forced = rp->red_stats.drop_forced; 895 q_stats->drop_unforced = rp->red_stats.drop_unforced; 896 q_stats->marked_packets = rp->red_stats.marked_packets; 897 898 q_stats->weight = rp->red_weight; 899 q_stats->inv_pmax = rp->red_inv_pmax; 900 q_stats->th_min = rp->red_thmin; 901 q_stats->th_max = rp->red_thmax; 902 903 #ifdef ALTQ_FLOWVALVE 904 if (rp->red_flowvalve != NULL) { 905 struct flowvalve *fv = rp->red_flowvalve; 906 q_stats->fv_flows = fv->fv_flows; 907 q_stats->fv_pass = fv->fv_stats.pass; 908 q_stats->fv_predrop = fv->fv_stats.predrop; 909 q_stats->fv_alloc = fv->fv_stats.alloc; 910 q_stats->fv_escape = fv->fv_stats.escape; 911 } else { 912 #endif /* ALTQ_FLOWVALVE */ 913 q_stats->fv_flows = 0; 914 q_stats->fv_pass = 0; 915 q_stats->fv_predrop = 0; 916 q_stats->fv_alloc = 0; 917 q_stats->fv_escape = 0; 918 #ifdef ALTQ_FLOWVALVE 919 } 920 #endif /* ALTQ_FLOWVALVE */ 921 } while (/*CONSTCOND*/ 0); 922 break; 923 924 case RED_CONFIG: 925 do { 926 struct red_conf *fc; 927 red_t *new; 928 int s, limit; 929 930 fc = (struct red_conf *)addr; 931 if ((rqp = altq_lookup(fc->iface.red_ifname, 932 ALTQT_RED)) == NULL) { 933 error = EBADF; 934 break; 935 } 936 new = red_alloc(fc->red_weight, 937 fc->red_inv_pmax, 938 fc->red_thmin, 939 fc->red_thmax, 940 fc->red_flags, 941 fc->red_pkttime); 942 if (new == NULL) { 943 error = ENOMEM; 944 break; 945 } 946 947 #ifdef __NetBSD__ 948 s = splnet(); 949 #else 950 s = splimp(); 951 #endif 952 red_purgeq(rqp); 953 limit = fc->red_limit; 954 if (limit < fc->red_thmax) 955 limit = fc->red_thmax; 956 qlimit(rqp->rq_q) = limit; 957 fc->red_limit = limit; /* write back the new value */ 958 959 red_destroy(rqp->rq_red); 960 rqp->rq_red = new; 961 962 splx(s); 963 964 /* write back new values */ 965 fc->red_limit = limit; 966 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax; 967 fc->red_thmin = rqp->rq_red->red_thmin; 968 fc->red_thmax = rqp->rq_red->red_thmax; 969 970 } while (/*CONSTCOND*/ 0); 971 break; 972 973 case RED_SETDEFAULTS: 974 do { 975 struct redparams *rp; 976 977 rp = (struct redparams *)addr; 978 979 default_th_min = rp->th_min; 980 default_th_max = rp->th_max; 981 default_inv_pmax = rp->inv_pmax; 982 } while (/*CONSTCOND*/ 0); 983 break; 984 985 default: 986 error = EINVAL; 987 break; 988 } 989 return error; 990 } 991 992 static int 993 red_detach(rqp) 994 red_queue_t *rqp; 995 { 996 red_queue_t *tmp; 997 int error = 0; 998 999 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 1000 altq_disable(rqp->rq_ifq); 1001 1002 if ((error = altq_detach(rqp->rq_ifq))) 1003 return (error); 1004 1005 if (red_list == rqp) 1006 red_list = rqp->rq_next; 1007 else { 1008 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next) 1009 if (tmp->rq_next == rqp) { 1010 tmp->rq_next = rqp->rq_next; 1011 break; 1012 } 1013 if (tmp == NULL) 1014 printf("red_detach: no state found in red_list!\n"); 1015 } 1016 1017 red_destroy(rqp->rq_red); 1018 free(rqp->rq_q, M_DEVBUF); 1019 free(rqp, M_DEVBUF); 1020 return (error); 1021 } 1022 1023 /* 1024 * enqueue routine: 1025 * 1026 * returns: 0 when successfully queued. 1027 * ENOBUFS when drop occurs. 1028 */ 1029 static int 1030 red_enqueue(ifq, m, pktattr) 1031 struct ifaltq *ifq; 1032 struct mbuf *m; 1033 struct altq_pktattr *pktattr; 1034 { 1035 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1036 1037 IFQ_LOCK_ASSERT(ifq); 1038 1039 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0) 1040 return ENOBUFS; 1041 ifq->ifq_len++; 1042 return 0; 1043 } 1044 1045 /* 1046 * dequeue routine: 1047 * must be called in splimp. 1048 * 1049 * returns: mbuf dequeued. 1050 * NULL when no packet is available in the queue. 1051 */ 1052 1053 static struct mbuf * 1054 red_dequeue(ifq, op) 1055 struct ifaltq *ifq; 1056 int op; 1057 { 1058 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1059 struct mbuf *m; 1060 1061 IFQ_LOCK_ASSERT(ifq); 1062 1063 if (op == ALTDQ_POLL) 1064 return qhead(rqp->rq_q); 1065 1066 /* op == ALTDQ_REMOVE */ 1067 m = red_getq(rqp->rq_red, rqp->rq_q); 1068 if (m != NULL) 1069 ifq->ifq_len--; 1070 return (m); 1071 } 1072 1073 static int 1074 red_request(ifq, req, arg) 1075 struct ifaltq *ifq; 1076 int req; 1077 void *arg; 1078 { 1079 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1080 1081 IFQ_LOCK_ASSERT(ifq); 1082 1083 switch (req) { 1084 case ALTRQ_PURGE: 1085 red_purgeq(rqp); 1086 break; 1087 } 1088 return (0); 1089 } 1090 1091 static void 1092 red_purgeq(rqp) 1093 red_queue_t *rqp; 1094 { 1095 _flushq(rqp->rq_q); 1096 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 1097 rqp->rq_ifq->ifq_len = 0; 1098 } 1099 1100 #ifdef ALTQ_FLOWVALVE 1101 1102 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */ 1103 #define FV_PSCALE(x) ((x) << FV_PSHIFT) 1104 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT) 1105 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */ 1106 #define FV_FSCALE(x) ((x) << FV_FSHIFT) 1107 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT) 1108 1109 #define FV_TIMER (3 * hz) /* timer value for garbage collector */ 1110 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */ 1111 1112 #define FV_N 10 /* update fve_f every FV_N packets */ 1113 1114 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */ 1115 #define FV_TTHRESH 3 /* time threshold to delete fve */ 1116 #define FV_ALPHA 5 /* extra packet count */ 1117 1118 #define FV_STATS 1119 1120 #if (__FreeBSD_version > 300000) 1121 #define FV_TIMESTAMP(tp) getmicrotime(tp) 1122 #else 1123 #define FV_TIMESTAMP(tp) { (*(tp)) = time; } 1124 #endif 1125 1126 /* 1127 * Brtt table: 127 entry table to convert drop rate (p) to 1128 * the corresponding bandwidth fraction (f) 1129 * the following equation is implemented to use scaled values, 1130 * fve_p and fve_f, in the fixed point format. 1131 * 1132 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p)) 1133 * f = Brtt(p) / (max_th + alpha) 1134 */ 1135 #define BRTT_SIZE 128 1136 #define BRTT_SHIFT 12 1137 #define BRTT_MASK 0x0007f000 1138 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT)) 1139 1140 const int brtt_tab[BRTT_SIZE] = { 1141 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728, 1142 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361, 1143 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333, 1144 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612, 1145 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957, 1146 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440, 1147 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184, 1148 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611, 1149 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062, 1150 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487, 1151 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222, 1152 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844, 1153 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079, 1154 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746, 1155 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722, 1156 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924 1157 }; 1158 1159 static __inline struct fve * 1160 flowlist_lookup(fv, pktattr, now) 1161 struct flowvalve *fv; 1162 struct altq_pktattr *pktattr; 1163 struct timeval *now; 1164 { 1165 struct fve *fve; 1166 int flows; 1167 struct ip *ip; 1168 #ifdef INET6 1169 struct ip6_hdr *ip6; 1170 #endif 1171 struct timeval tthresh; 1172 1173 if (pktattr == NULL) 1174 return (NULL); 1175 1176 tthresh.tv_sec = now->tv_sec - FV_TTHRESH; 1177 flows = 0; 1178 /* 1179 * search the flow list 1180 */ 1181 switch (pktattr->pattr_af) { 1182 case AF_INET: 1183 ip = (struct ip *)pktattr->pattr_hdr; 1184 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1185 if (fve->fve_lastdrop.tv_sec == 0) 1186 break; 1187 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1188 fve->fve_lastdrop.tv_sec = 0; 1189 break; 1190 } 1191 if (fve->fve_flow.flow_af == AF_INET && 1192 fve->fve_flow.flow_ip.ip_src.s_addr == 1193 ip->ip_src.s_addr && 1194 fve->fve_flow.flow_ip.ip_dst.s_addr == 1195 ip->ip_dst.s_addr) 1196 return (fve); 1197 flows++; 1198 } 1199 break; 1200 #ifdef INET6 1201 case AF_INET6: 1202 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1203 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1204 if (fve->fve_lastdrop.tv_sec == 0) 1205 break; 1206 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1207 fve->fve_lastdrop.tv_sec = 0; 1208 break; 1209 } 1210 if (fve->fve_flow.flow_af == AF_INET6 && 1211 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src, 1212 &ip6->ip6_src) && 1213 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst, 1214 &ip6->ip6_dst)) 1215 return (fve); 1216 flows++; 1217 } 1218 break; 1219 #endif /* INET6 */ 1220 1221 default: 1222 /* unknown protocol. no drop. */ 1223 return (NULL); 1224 } 1225 fv->fv_flows = flows; /* save the number of active fve's */ 1226 return (NULL); 1227 } 1228 1229 static __inline struct fve * 1230 flowlist_reclaim(fv, pktattr) 1231 struct flowvalve *fv; 1232 struct altq_pktattr *pktattr; 1233 { 1234 struct fve *fve; 1235 struct ip *ip; 1236 #ifdef INET6 1237 struct ip6_hdr *ip6; 1238 #endif 1239 1240 /* 1241 * get an entry from the tail of the LRU list. 1242 */ 1243 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead); 1244 1245 switch (pktattr->pattr_af) { 1246 case AF_INET: 1247 ip = (struct ip *)pktattr->pattr_hdr; 1248 fve->fve_flow.flow_af = AF_INET; 1249 fve->fve_flow.flow_ip.ip_src = ip->ip_src; 1250 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst; 1251 break; 1252 #ifdef INET6 1253 case AF_INET6: 1254 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1255 fve->fve_flow.flow_af = AF_INET6; 1256 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src; 1257 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst; 1258 break; 1259 #endif 1260 } 1261 1262 fve->fve_state = Green; 1263 fve->fve_p = 0.0; 1264 fve->fve_f = 0.0; 1265 fve->fve_ifseq = fv->fv_ifseq - 1; 1266 fve->fve_count = 0; 1267 1268 fv->fv_flows++; 1269 #ifdef FV_STATS 1270 fv->fv_stats.alloc++; 1271 #endif 1272 return (fve); 1273 } 1274 1275 static __inline void 1276 flowlist_move_to_head(fv, fve) 1277 struct flowvalve *fv; 1278 struct fve *fve; 1279 { 1280 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) { 1281 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru); 1282 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru); 1283 } 1284 } 1285 1286 #if 0 /* XXX: make the compiler happy (fv_alloc unused) */ 1287 /* 1288 * allocate flowvalve structure 1289 */ 1290 static struct flowvalve * 1291 fv_alloc(rp) 1292 struct red *rp; 1293 { 1294 struct flowvalve *fv; 1295 struct fve *fve; 1296 int i, num; 1297 1298 num = FV_FLOWLISTSIZE; 1299 fv = malloc(sizeof(struct flowvalve), 1300 M_DEVBUF, M_WAITOK); 1301 if (fv == NULL) 1302 return (NULL); 1303 bzero(fv, sizeof(struct flowvalve)); 1304 1305 fv->fv_fves = malloc(sizeof(struct fve) * num, 1306 M_DEVBUF, M_WAITOK); 1307 if (fv->fv_fves == NULL) { 1308 free(fv, M_DEVBUF); 1309 return (NULL); 1310 } 1311 bzero(fv->fv_fves, sizeof(struct fve) * num); 1312 1313 fv->fv_flows = 0; 1314 TAILQ_INIT(&fv->fv_flowlist); 1315 for (i = 0; i < num; i++) { 1316 fve = &fv->fv_fves[i]; 1317 fve->fve_lastdrop.tv_sec = 0; 1318 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru); 1319 } 1320 1321 /* initialize drop rate threshold in scaled fixed-point */ 1322 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax; 1323 1324 /* initialize drop rate to fraction table */ 1325 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, 1326 M_DEVBUF, M_WAITOK); 1327 if (fv->fv_p2ftab == NULL) { 1328 free(fv->fv_fves, M_DEVBUF); 1329 free(fv, M_DEVBUF); 1330 return (NULL); 1331 } 1332 /* 1333 * create the p2f table. 1334 * (shift is used to keep the precision) 1335 */ 1336 for (i = 1; i < BRTT_SIZE; i++) { 1337 int f; 1338 1339 f = brtt_tab[i] << 8; 1340 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8; 1341 } 1342 1343 return (fv); 1344 } 1345 #endif 1346 1347 static void fv_destroy(fv) 1348 struct flowvalve *fv; 1349 { 1350 free(fv->fv_p2ftab, M_DEVBUF); 1351 free(fv->fv_fves, M_DEVBUF); 1352 free(fv, M_DEVBUF); 1353 } 1354 1355 static __inline int 1356 fv_p2f(fv, p) 1357 struct flowvalve *fv; 1358 int p; 1359 { 1360 int val, f; 1361 1362 if (p >= BRTT_PMAX) 1363 f = fv->fv_p2ftab[BRTT_SIZE-1]; 1364 else if ((val = (p & BRTT_MASK))) 1365 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)]; 1366 else 1367 f = fv->fv_p2ftab[1]; 1368 return (f); 1369 } 1370 1371 /* 1372 * check if an arriving packet should be pre-dropped. 1373 * called from red_addq() when a packet arrives. 1374 * returns 1 when the packet should be pre-dropped. 1375 * should be called in splimp. 1376 */ 1377 static int 1378 fv_checkflow(fv, pktattr, fcache) 1379 struct flowvalve *fv; 1380 struct altq_pktattr *pktattr; 1381 struct fve **fcache; 1382 { 1383 struct fve *fve; 1384 struct timeval now; 1385 1386 fv->fv_ifseq++; 1387 FV_TIMESTAMP(&now); 1388 1389 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1390 /* no matching entry in the flowlist */ 1391 return (0); 1392 1393 *fcache = fve; 1394 1395 /* update fraction f for every FV_N packets */ 1396 if (++fve->fve_count == FV_N) { 1397 /* 1398 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f 1399 */ 1400 fve->fve_f = 1401 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq) 1402 + fve->fve_f - FV_FUNSCALE(fve->fve_f); 1403 fve->fve_ifseq = fv->fv_ifseq; 1404 fve->fve_count = 0; 1405 } 1406 1407 /* 1408 * overpumping test 1409 */ 1410 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) { 1411 int fthresh; 1412 1413 /* calculate a threshold */ 1414 fthresh = fv_p2f(fv, fve->fve_p); 1415 if (fve->fve_f > fthresh) 1416 fve->fve_state = Red; 1417 } 1418 1419 if (fve->fve_state == Red) { 1420 /* 1421 * backoff test 1422 */ 1423 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) { 1424 /* no drop for at least FV_BACKOFFTHRESH sec */ 1425 fve->fve_p = 0; 1426 fve->fve_state = Green; 1427 #ifdef FV_STATS 1428 fv->fv_stats.escape++; 1429 #endif 1430 } else { 1431 /* block this flow */ 1432 flowlist_move_to_head(fv, fve); 1433 fve->fve_lastdrop = now; 1434 #ifdef FV_STATS 1435 fv->fv_stats.predrop++; 1436 #endif 1437 return (1); 1438 } 1439 } 1440 1441 /* 1442 * p = (1 - Wp) * p 1443 */ 1444 fve->fve_p -= FV_PUNSCALE(fve->fve_p); 1445 if (fve->fve_p < 0) 1446 fve->fve_p = 0; 1447 #ifdef FV_STATS 1448 fv->fv_stats.pass++; 1449 #endif 1450 return (0); 1451 } 1452 1453 /* 1454 * called from red_addq when a packet is dropped by red. 1455 * should be called in splimp. 1456 */ 1457 static void fv_dropbyred(fv, pktattr, fcache) 1458 struct flowvalve *fv; 1459 struct altq_pktattr *pktattr; 1460 struct fve *fcache; 1461 { 1462 struct fve *fve; 1463 struct timeval now; 1464 1465 if (pktattr == NULL) 1466 return; 1467 FV_TIMESTAMP(&now); 1468 1469 if (fcache != NULL) 1470 /* the fve of this packet is already cached */ 1471 fve = fcache; 1472 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1473 fve = flowlist_reclaim(fv, pktattr); 1474 1475 flowlist_move_to_head(fv, fve); 1476 1477 /* 1478 * update p: the following line cancels the update 1479 * in fv_checkflow() and calculate 1480 * p = Wp + (1 - Wp) * p 1481 */ 1482 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p; 1483 1484 fve->fve_lastdrop = now; 1485 } 1486 1487 #endif /* ALTQ_FLOWVALVE */ 1488 1489 #ifdef KLD_MODULE 1490 1491 static struct altqsw red_sw = 1492 {"red", redopen, redclose, redioctl}; 1493 1494 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw); 1495 MODULE_VERSION(altq_red, 1); 1496 1497 #endif /* KLD_MODULE */ 1498 #endif /* ALTQ3_COMPAT */ 1499 1500 #endif /* ALTQ_RED */ 1501