14d723e5aSJoerg Sonnenberger /* $KAME: altq_red.c,v 1.19 2004/04/17 10:54:49 kjc Exp $ */ 2*4b1cf444SSascha Wildner /* $DragonFly: src/sys/net/altq/altq_red.c,v 1.4 2006/12/22 23:44:55 swildner Exp $ */ 34d723e5aSJoerg Sonnenberger 44d723e5aSJoerg Sonnenberger /* 54d723e5aSJoerg Sonnenberger * Copyright (C) 1997-2003 64d723e5aSJoerg Sonnenberger * Sony Computer Science Laboratories Inc. All rights reserved. 74d723e5aSJoerg Sonnenberger * 84d723e5aSJoerg Sonnenberger * Redistribution and use in source and binary forms, with or without 94d723e5aSJoerg Sonnenberger * modification, are permitted provided that the following conditions 104d723e5aSJoerg Sonnenberger * are met: 114d723e5aSJoerg Sonnenberger * 1. Redistributions of source code must retain the above copyright 124d723e5aSJoerg Sonnenberger * notice, this list of conditions and the following disclaimer. 134d723e5aSJoerg Sonnenberger * 2. Redistributions in binary form must reproduce the above copyright 144d723e5aSJoerg Sonnenberger * notice, this list of conditions and the following disclaimer in the 154d723e5aSJoerg Sonnenberger * documentation and/or other materials provided with the distribution. 164d723e5aSJoerg Sonnenberger * 174d723e5aSJoerg Sonnenberger * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 184d723e5aSJoerg Sonnenberger * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 194d723e5aSJoerg Sonnenberger * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 204d723e5aSJoerg Sonnenberger * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 214d723e5aSJoerg Sonnenberger * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 224d723e5aSJoerg Sonnenberger * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 234d723e5aSJoerg Sonnenberger * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 244d723e5aSJoerg Sonnenberger * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 254d723e5aSJoerg Sonnenberger * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 264d723e5aSJoerg Sonnenberger * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 274d723e5aSJoerg Sonnenberger * SUCH DAMAGE. 284d723e5aSJoerg Sonnenberger * 294d723e5aSJoerg Sonnenberger */ 304d723e5aSJoerg Sonnenberger /* 314d723e5aSJoerg Sonnenberger * Copyright (c) 1990-1994 Regents of the University of California. 324d723e5aSJoerg Sonnenberger * All rights reserved. 334d723e5aSJoerg Sonnenberger * 344d723e5aSJoerg Sonnenberger * Redistribution and use in source and binary forms, with or without 354d723e5aSJoerg Sonnenberger * modification, are permitted provided that the following conditions 364d723e5aSJoerg Sonnenberger * are met: 374d723e5aSJoerg Sonnenberger * 1. Redistributions of source code must retain the above copyright 384d723e5aSJoerg Sonnenberger * notice, this list of conditions and the following disclaimer. 394d723e5aSJoerg Sonnenberger * 2. Redistributions in binary form must reproduce the above copyright 404d723e5aSJoerg Sonnenberger * notice, this list of conditions and the following disclaimer in the 414d723e5aSJoerg Sonnenberger * documentation and/or other materials provided with the distribution. 424d723e5aSJoerg Sonnenberger * 3. All advertising materials mentioning features or use of this software 434d723e5aSJoerg Sonnenberger * must display the following acknowledgement: 444d723e5aSJoerg Sonnenberger * This product includes software developed by the Computer Systems 454d723e5aSJoerg Sonnenberger * Engineering Group at Lawrence Berkeley Laboratory. 464d723e5aSJoerg Sonnenberger * 4. Neither the name of the University nor of the Laboratory may be used 474d723e5aSJoerg Sonnenberger * to endorse or promote products derived from this software without 484d723e5aSJoerg Sonnenberger * specific prior written permission. 494d723e5aSJoerg Sonnenberger * 504d723e5aSJoerg Sonnenberger * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 514d723e5aSJoerg Sonnenberger * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 524d723e5aSJoerg Sonnenberger * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 534d723e5aSJoerg Sonnenberger * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 544d723e5aSJoerg Sonnenberger * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 554d723e5aSJoerg Sonnenberger * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 564d723e5aSJoerg Sonnenberger * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 574d723e5aSJoerg Sonnenberger * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 584d723e5aSJoerg Sonnenberger * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 594d723e5aSJoerg Sonnenberger * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 604d723e5aSJoerg Sonnenberger * SUCH DAMAGE. 614d723e5aSJoerg Sonnenberger */ 624d723e5aSJoerg Sonnenberger 634d723e5aSJoerg Sonnenberger #include "opt_altq.h" 644d723e5aSJoerg Sonnenberger #include "opt_inet.h" 654d723e5aSJoerg Sonnenberger #include "opt_inet6.h" 664d723e5aSJoerg Sonnenberger 674d723e5aSJoerg Sonnenberger #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */ 684d723e5aSJoerg Sonnenberger 694d723e5aSJoerg Sonnenberger #include <sys/param.h> 704d723e5aSJoerg Sonnenberger #include <sys/malloc.h> 714d723e5aSJoerg Sonnenberger #include <sys/mbuf.h> 724d723e5aSJoerg Sonnenberger #include <sys/socket.h> 734d723e5aSJoerg Sonnenberger #include <sys/systm.h> 744d723e5aSJoerg Sonnenberger #include <sys/errno.h> 754d723e5aSJoerg Sonnenberger 764d723e5aSJoerg Sonnenberger #include <net/if.h> 774d723e5aSJoerg Sonnenberger 784d723e5aSJoerg Sonnenberger #include <netinet/in.h> 794d723e5aSJoerg Sonnenberger #include <netinet/in_systm.h> 804d723e5aSJoerg Sonnenberger #include <netinet/ip.h> 814d723e5aSJoerg Sonnenberger #ifdef INET6 824d723e5aSJoerg Sonnenberger #include <netinet/ip6.h> 834d723e5aSJoerg Sonnenberger #endif 844d723e5aSJoerg Sonnenberger 854d723e5aSJoerg Sonnenberger #include <net/pf/pfvar.h> 864d723e5aSJoerg Sonnenberger #include <net/altq/altq.h> 874d723e5aSJoerg Sonnenberger #include <net/altq/altq_red.h> 884d723e5aSJoerg Sonnenberger 894d723e5aSJoerg Sonnenberger /* 904d723e5aSJoerg Sonnenberger * ALTQ/RED (Random Early Detection) implementation using 32-bit 914d723e5aSJoerg Sonnenberger * fixed-point calculation. 924d723e5aSJoerg Sonnenberger * 934d723e5aSJoerg Sonnenberger * written by kjc using the ns code as a reference. 944d723e5aSJoerg Sonnenberger * you can learn more about red and ns from Sally's home page at 954d723e5aSJoerg Sonnenberger * http://www-nrg.ee.lbl.gov/floyd/ 964d723e5aSJoerg Sonnenberger * 974d723e5aSJoerg Sonnenberger * most of the red parameter values are fixed in this implementation 984d723e5aSJoerg Sonnenberger * to prevent fixed-point overflow/underflow. 994d723e5aSJoerg Sonnenberger * if you change the parameters, watch out for overflow/underflow! 1004d723e5aSJoerg Sonnenberger * 1014d723e5aSJoerg Sonnenberger * the parameters used are recommended values by Sally. 1024d723e5aSJoerg Sonnenberger * the corresponding ns config looks: 1034d723e5aSJoerg Sonnenberger * q_weight=0.00195 1044d723e5aSJoerg Sonnenberger * minthresh=5 maxthresh=15 queue-size=60 1054d723e5aSJoerg Sonnenberger * linterm=30 1064d723e5aSJoerg Sonnenberger * dropmech=drop-tail 1074d723e5aSJoerg Sonnenberger * bytes=false (can't be handled by 32-bit fixed-point) 1084d723e5aSJoerg Sonnenberger * doubleq=false dqthresh=false 1094d723e5aSJoerg Sonnenberger * wait=true 1104d723e5aSJoerg Sonnenberger */ 1114d723e5aSJoerg Sonnenberger /* 1124d723e5aSJoerg Sonnenberger * alternative red parameters for a slow link. 1134d723e5aSJoerg Sonnenberger * 1144d723e5aSJoerg Sonnenberger * assume the queue length becomes from zero to L and keeps L, it takes 1154d723e5aSJoerg Sonnenberger * N packets for q_avg to reach 63% of L. 1164d723e5aSJoerg Sonnenberger * when q_weight is 0.002, N is about 500 packets. 1174d723e5aSJoerg Sonnenberger * for a slow link like dial-up, 500 packets takes more than 1 minute! 1184d723e5aSJoerg Sonnenberger * when q_weight is 0.008, N is about 127 packets. 1194d723e5aSJoerg Sonnenberger * when q_weight is 0.016, N is about 63 packets. 1204d723e5aSJoerg Sonnenberger * bursts of 50 packets are allowed for 0.002, bursts of 25 packets 1214d723e5aSJoerg Sonnenberger * are allowed for 0.016. 1224d723e5aSJoerg Sonnenberger * see Sally's paper for more details. 1234d723e5aSJoerg Sonnenberger */ 1244d723e5aSJoerg Sonnenberger /* normal red parameters */ 1254d723e5aSJoerg Sonnenberger #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ 1264d723e5aSJoerg Sonnenberger /* q_weight = 0.00195 */ 1274d723e5aSJoerg Sonnenberger 1284d723e5aSJoerg Sonnenberger /* red parameters for a slow link */ 1294d723e5aSJoerg Sonnenberger #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ 1304d723e5aSJoerg Sonnenberger /* q_weight = 0.0078125 */ 1314d723e5aSJoerg Sonnenberger 1324d723e5aSJoerg Sonnenberger /* red parameters for a very slow link (e.g., dialup) */ 1334d723e5aSJoerg Sonnenberger #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ 1344d723e5aSJoerg Sonnenberger /* q_weight = 0.015625 */ 1354d723e5aSJoerg Sonnenberger 1364d723e5aSJoerg Sonnenberger /* fixed-point uses 12-bit decimal places */ 1374d723e5aSJoerg Sonnenberger #define FP_SHIFT 12 /* fixed-point shift */ 1384d723e5aSJoerg Sonnenberger 1394d723e5aSJoerg Sonnenberger /* red parameters for drop probability */ 1404d723e5aSJoerg Sonnenberger #define INV_P_MAX 10 /* inverse of max drop probability */ 1414d723e5aSJoerg Sonnenberger #define TH_MIN 5 /* min threshold */ 1424d723e5aSJoerg Sonnenberger #define TH_MAX 15 /* max threshold */ 1434d723e5aSJoerg Sonnenberger 1444d723e5aSJoerg Sonnenberger #define RED_LIMIT 60 /* default max queue lenght */ 1454d723e5aSJoerg Sonnenberger #define RED_STATS /* collect statistics */ 1464d723e5aSJoerg Sonnenberger 1474d723e5aSJoerg Sonnenberger /* 1484d723e5aSJoerg Sonnenberger * our default policy for forced-drop is drop-tail. 1494d723e5aSJoerg Sonnenberger * (in altq-1.1.2 or earlier, the default was random-drop. 1504d723e5aSJoerg Sonnenberger * but it makes more sense to punish the cause of the surge.) 1514d723e5aSJoerg Sonnenberger * to switch to the random-drop policy, define "RED_RANDOM_DROP". 1524d723e5aSJoerg Sonnenberger */ 1534d723e5aSJoerg Sonnenberger 1544d723e5aSJoerg Sonnenberger /* default red parameter values */ 1554d723e5aSJoerg Sonnenberger static int default_th_min = TH_MIN; 1564d723e5aSJoerg Sonnenberger static int default_th_max = TH_MAX; 1574d723e5aSJoerg Sonnenberger static int default_inv_pmax = INV_P_MAX; 1584d723e5aSJoerg Sonnenberger 1594d723e5aSJoerg Sonnenberger /* 1604d723e5aSJoerg Sonnenberger * red support routines 1614d723e5aSJoerg Sonnenberger */ 1624d723e5aSJoerg Sonnenberger red_t * 1634d723e5aSJoerg Sonnenberger red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags, int pkttime) 1644d723e5aSJoerg Sonnenberger { 1654d723e5aSJoerg Sonnenberger red_t *rp; 1664d723e5aSJoerg Sonnenberger int w, i; 1674d723e5aSJoerg Sonnenberger int npkts_per_sec; 1684d723e5aSJoerg Sonnenberger 169efda3bd0SMatthew Dillon rp = kmalloc(sizeof(*rp), M_ALTQ, M_WAITOK | M_ZERO); 1704d723e5aSJoerg Sonnenberger rp->red_avg = 0; 1714d723e5aSJoerg Sonnenberger rp->red_idle = 1; 1724d723e5aSJoerg Sonnenberger 1734d723e5aSJoerg Sonnenberger if (weight == 0) 1744d723e5aSJoerg Sonnenberger rp->red_weight = W_WEIGHT; 1754d723e5aSJoerg Sonnenberger else 1764d723e5aSJoerg Sonnenberger rp->red_weight = weight; 1774d723e5aSJoerg Sonnenberger if (inv_pmax == 0) 1784d723e5aSJoerg Sonnenberger rp->red_inv_pmax = default_inv_pmax; 1794d723e5aSJoerg Sonnenberger else 1804d723e5aSJoerg Sonnenberger rp->red_inv_pmax = inv_pmax; 1814d723e5aSJoerg Sonnenberger if (th_min == 0) 1824d723e5aSJoerg Sonnenberger rp->red_thmin = default_th_min; 1834d723e5aSJoerg Sonnenberger else 1844d723e5aSJoerg Sonnenberger rp->red_thmin = th_min; 1854d723e5aSJoerg Sonnenberger if (th_max == 0) 1864d723e5aSJoerg Sonnenberger rp->red_thmax = default_th_max; 1874d723e5aSJoerg Sonnenberger else 1884d723e5aSJoerg Sonnenberger rp->red_thmax = th_max; 1894d723e5aSJoerg Sonnenberger 1904d723e5aSJoerg Sonnenberger rp->red_flags = flags; 1914d723e5aSJoerg Sonnenberger 1924d723e5aSJoerg Sonnenberger if (pkttime == 0) 1934d723e5aSJoerg Sonnenberger /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 1944d723e5aSJoerg Sonnenberger rp->red_pkttime = 800; 1954d723e5aSJoerg Sonnenberger else 1964d723e5aSJoerg Sonnenberger rp->red_pkttime = pkttime; 1974d723e5aSJoerg Sonnenberger 1984d723e5aSJoerg Sonnenberger if (weight == 0) { 1994d723e5aSJoerg Sonnenberger /* when the link is very slow, adjust red parameters */ 2004d723e5aSJoerg Sonnenberger npkts_per_sec = 1000000 / rp->red_pkttime; 2014d723e5aSJoerg Sonnenberger if (npkts_per_sec < 50) { 2024d723e5aSJoerg Sonnenberger /* up to about 400Kbps */ 2034d723e5aSJoerg Sonnenberger rp->red_weight = W_WEIGHT_2; 2044d723e5aSJoerg Sonnenberger } else if (npkts_per_sec < 300) { 2054d723e5aSJoerg Sonnenberger /* up to about 2.4Mbps */ 2064d723e5aSJoerg Sonnenberger rp->red_weight = W_WEIGHT_1; 2074d723e5aSJoerg Sonnenberger } 2084d723e5aSJoerg Sonnenberger } 2094d723e5aSJoerg Sonnenberger 2104d723e5aSJoerg Sonnenberger /* calculate wshift. weight must be power of 2 */ 2114d723e5aSJoerg Sonnenberger w = rp->red_weight; 2124d723e5aSJoerg Sonnenberger for (i = 0; w > 1; i++) 2134d723e5aSJoerg Sonnenberger w = w >> 1; 2144d723e5aSJoerg Sonnenberger rp->red_wshift = i; 2154d723e5aSJoerg Sonnenberger w = 1 << rp->red_wshift; 2164d723e5aSJoerg Sonnenberger if (w != rp->red_weight) { 217*4b1cf444SSascha Wildner kprintf("invalid weight value %d for red! use %d\n", 2184d723e5aSJoerg Sonnenberger rp->red_weight, w); 2194d723e5aSJoerg Sonnenberger rp->red_weight = w; 2204d723e5aSJoerg Sonnenberger } 2214d723e5aSJoerg Sonnenberger 2224d723e5aSJoerg Sonnenberger /* 2234d723e5aSJoerg Sonnenberger * thmin_s and thmax_s are scaled versions of th_min and th_max 2244d723e5aSJoerg Sonnenberger * to be compared with avg. 2254d723e5aSJoerg Sonnenberger */ 2264d723e5aSJoerg Sonnenberger rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); 2274d723e5aSJoerg Sonnenberger rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); 2284d723e5aSJoerg Sonnenberger 2294d723e5aSJoerg Sonnenberger /* 2304d723e5aSJoerg Sonnenberger * precompute probability denominator 2314d723e5aSJoerg Sonnenberger * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point 2324d723e5aSJoerg Sonnenberger */ 2334d723e5aSJoerg Sonnenberger rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) 2344d723e5aSJoerg Sonnenberger * rp->red_inv_pmax) << FP_SHIFT; 2354d723e5aSJoerg Sonnenberger 2364d723e5aSJoerg Sonnenberger /* allocate weight table */ 2374d723e5aSJoerg Sonnenberger rp->red_wtab = wtab_alloc(rp->red_weight); 2384d723e5aSJoerg Sonnenberger 2394d723e5aSJoerg Sonnenberger microtime(&rp->red_last); 2404d723e5aSJoerg Sonnenberger return (rp); 2414d723e5aSJoerg Sonnenberger } 2424d723e5aSJoerg Sonnenberger 2434d723e5aSJoerg Sonnenberger void 2444d723e5aSJoerg Sonnenberger red_destroy(red_t *rp) 2454d723e5aSJoerg Sonnenberger { 2464d723e5aSJoerg Sonnenberger wtab_destroy(rp->red_wtab); 247efda3bd0SMatthew Dillon kfree(rp, M_ALTQ); 2484d723e5aSJoerg Sonnenberger } 2494d723e5aSJoerg Sonnenberger 2504d723e5aSJoerg Sonnenberger void 2514d723e5aSJoerg Sonnenberger red_getstats(red_t *rp, struct redstats *sp) 2524d723e5aSJoerg Sonnenberger { 2534d723e5aSJoerg Sonnenberger sp->q_avg = rp->red_avg >> rp->red_wshift; 2544d723e5aSJoerg Sonnenberger sp->xmit_cnt = rp->red_stats.xmit_cnt; 2554d723e5aSJoerg Sonnenberger sp->drop_cnt = rp->red_stats.drop_cnt; 2564d723e5aSJoerg Sonnenberger sp->drop_forced = rp->red_stats.drop_forced; 2574d723e5aSJoerg Sonnenberger sp->drop_unforced = rp->red_stats.drop_unforced; 2584d723e5aSJoerg Sonnenberger sp->marked_packets = rp->red_stats.marked_packets; 2594d723e5aSJoerg Sonnenberger } 2604d723e5aSJoerg Sonnenberger 2614d723e5aSJoerg Sonnenberger int 2624d723e5aSJoerg Sonnenberger red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, struct altq_pktattr *pktattr) 2634d723e5aSJoerg Sonnenberger { 2644d723e5aSJoerg Sonnenberger int avg, droptype; 2654d723e5aSJoerg Sonnenberger int n; 2664d723e5aSJoerg Sonnenberger 2674d723e5aSJoerg Sonnenberger avg = rp->red_avg; 2684d723e5aSJoerg Sonnenberger 2694d723e5aSJoerg Sonnenberger /* 2704d723e5aSJoerg Sonnenberger * if we were idle, we pretend that n packets arrived during 2714d723e5aSJoerg Sonnenberger * the idle period. 2724d723e5aSJoerg Sonnenberger */ 2734d723e5aSJoerg Sonnenberger if (rp->red_idle) { 2744d723e5aSJoerg Sonnenberger struct timeval now; 2754d723e5aSJoerg Sonnenberger int t; 2764d723e5aSJoerg Sonnenberger 2774d723e5aSJoerg Sonnenberger rp->red_idle = 0; 2784d723e5aSJoerg Sonnenberger microtime(&now); 2794d723e5aSJoerg Sonnenberger t = (now.tv_sec - rp->red_last.tv_sec); 2804d723e5aSJoerg Sonnenberger if (t > 60) { 2814d723e5aSJoerg Sonnenberger /* 2824d723e5aSJoerg Sonnenberger * being idle for more than 1 minute, set avg to zero. 2834d723e5aSJoerg Sonnenberger * this prevents t from overflow. 2844d723e5aSJoerg Sonnenberger */ 2854d723e5aSJoerg Sonnenberger avg = 0; 2864d723e5aSJoerg Sonnenberger } else { 2874d723e5aSJoerg Sonnenberger t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec); 2884d723e5aSJoerg Sonnenberger n = t / rp->red_pkttime - 1; 2894d723e5aSJoerg Sonnenberger 2904d723e5aSJoerg Sonnenberger /* the following line does (avg = (1 - Wq)^n * avg) */ 2914d723e5aSJoerg Sonnenberger if (n > 0) 2924d723e5aSJoerg Sonnenberger avg = (avg >> FP_SHIFT) * 2934d723e5aSJoerg Sonnenberger pow_w(rp->red_wtab, n); 2944d723e5aSJoerg Sonnenberger } 2954d723e5aSJoerg Sonnenberger } 2964d723e5aSJoerg Sonnenberger 2974d723e5aSJoerg Sonnenberger /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */ 2984d723e5aSJoerg Sonnenberger avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift); 2994d723e5aSJoerg Sonnenberger rp->red_avg = avg; /* save the new value */ 3004d723e5aSJoerg Sonnenberger 3014d723e5aSJoerg Sonnenberger /* 3024d723e5aSJoerg Sonnenberger * red_count keeps a tally of arriving traffic that has not 3034d723e5aSJoerg Sonnenberger * been dropped. 3044d723e5aSJoerg Sonnenberger */ 3054d723e5aSJoerg Sonnenberger rp->red_count++; 3064d723e5aSJoerg Sonnenberger 3074d723e5aSJoerg Sonnenberger /* see if we drop early */ 3084d723e5aSJoerg Sonnenberger droptype = DTYPE_NODROP; 3094d723e5aSJoerg Sonnenberger if (avg >= rp->red_thmin_s && qlen(q) > 1) { 3104d723e5aSJoerg Sonnenberger if (avg >= rp->red_thmax_s) { 3114d723e5aSJoerg Sonnenberger /* avg >= th_max: forced drop */ 3124d723e5aSJoerg Sonnenberger droptype = DTYPE_FORCED; 3134d723e5aSJoerg Sonnenberger } else if (rp->red_old == 0) { 3144d723e5aSJoerg Sonnenberger /* first exceeds th_min */ 3154d723e5aSJoerg Sonnenberger rp->red_count = 1; 3164d723e5aSJoerg Sonnenberger rp->red_old = 1; 3174d723e5aSJoerg Sonnenberger } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, 3184d723e5aSJoerg Sonnenberger rp->red_probd, rp->red_count)) { 3194d723e5aSJoerg Sonnenberger /* mark or drop by red */ 3204d723e5aSJoerg Sonnenberger if ((rp->red_flags & REDF_ECN) && 3214d723e5aSJoerg Sonnenberger mark_ecn(m, pktattr, rp->red_flags)) { 3224d723e5aSJoerg Sonnenberger /* successfully marked. do not drop. */ 3234d723e5aSJoerg Sonnenberger rp->red_count = 0; 3244d723e5aSJoerg Sonnenberger #ifdef RED_STATS 3254d723e5aSJoerg Sonnenberger rp->red_stats.marked_packets++; 3264d723e5aSJoerg Sonnenberger #endif 3274d723e5aSJoerg Sonnenberger } else { 3284d723e5aSJoerg Sonnenberger /* unforced drop by red */ 3294d723e5aSJoerg Sonnenberger droptype = DTYPE_EARLY; 3304d723e5aSJoerg Sonnenberger } 3314d723e5aSJoerg Sonnenberger } 3324d723e5aSJoerg Sonnenberger } else { 3334d723e5aSJoerg Sonnenberger /* avg < th_min */ 3344d723e5aSJoerg Sonnenberger rp->red_old = 0; 3354d723e5aSJoerg Sonnenberger } 3364d723e5aSJoerg Sonnenberger 3374d723e5aSJoerg Sonnenberger /* 3384d723e5aSJoerg Sonnenberger * if the queue length hits the hard limit, it's a forced drop. 3394d723e5aSJoerg Sonnenberger */ 3404d723e5aSJoerg Sonnenberger if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 3414d723e5aSJoerg Sonnenberger droptype = DTYPE_FORCED; 3424d723e5aSJoerg Sonnenberger 3434d723e5aSJoerg Sonnenberger #ifdef RED_RANDOM_DROP 3444d723e5aSJoerg Sonnenberger /* if successful or forced drop, enqueue this packet. */ 3454d723e5aSJoerg Sonnenberger if (droptype != DTYPE_EARLY) 3464d723e5aSJoerg Sonnenberger _addq(q, m); 3474d723e5aSJoerg Sonnenberger #else 3484d723e5aSJoerg Sonnenberger /* if successful, enqueue this packet. */ 3494d723e5aSJoerg Sonnenberger if (droptype == DTYPE_NODROP) 3504d723e5aSJoerg Sonnenberger _addq(q, m); 3514d723e5aSJoerg Sonnenberger #endif 3524d723e5aSJoerg Sonnenberger if (droptype != DTYPE_NODROP) { 3534d723e5aSJoerg Sonnenberger if (droptype == DTYPE_EARLY) { 3544d723e5aSJoerg Sonnenberger /* drop the incoming packet */ 3554d723e5aSJoerg Sonnenberger #ifdef RED_STATS 3564d723e5aSJoerg Sonnenberger rp->red_stats.drop_unforced++; 3574d723e5aSJoerg Sonnenberger #endif 3584d723e5aSJoerg Sonnenberger } else { 3594d723e5aSJoerg Sonnenberger /* forced drop, select a victim packet in the queue. */ 3604d723e5aSJoerg Sonnenberger #ifdef RED_RANDOM_DROP 3614d723e5aSJoerg Sonnenberger m = _getq_random(q); 3624d723e5aSJoerg Sonnenberger #endif 3634d723e5aSJoerg Sonnenberger #ifdef RED_STATS 3644d723e5aSJoerg Sonnenberger rp->red_stats.drop_forced++; 3654d723e5aSJoerg Sonnenberger #endif 3664d723e5aSJoerg Sonnenberger } 3674d723e5aSJoerg Sonnenberger #ifdef RED_STATS 3684d723e5aSJoerg Sonnenberger PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m)); 3694d723e5aSJoerg Sonnenberger #endif 3704d723e5aSJoerg Sonnenberger rp->red_count = 0; 3714d723e5aSJoerg Sonnenberger m_freem(m); 3724d723e5aSJoerg Sonnenberger return (-1); 3734d723e5aSJoerg Sonnenberger } 3744d723e5aSJoerg Sonnenberger /* successfully queued */ 3754d723e5aSJoerg Sonnenberger #ifdef RED_STATS 3764d723e5aSJoerg Sonnenberger PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m)); 3774d723e5aSJoerg Sonnenberger #endif 3784d723e5aSJoerg Sonnenberger return (0); 3794d723e5aSJoerg Sonnenberger } 3804d723e5aSJoerg Sonnenberger 3814d723e5aSJoerg Sonnenberger /* 3824d723e5aSJoerg Sonnenberger * early-drop probability is calculated as follows: 3834d723e5aSJoerg Sonnenberger * prob = p_max * (avg - th_min) / (th_max - th_min) 3844d723e5aSJoerg Sonnenberger * prob_a = prob / (2 - count*prob) 3854d723e5aSJoerg Sonnenberger * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min)) 3864d723e5aSJoerg Sonnenberger * here prob_a increases as successive undrop count increases. 3874d723e5aSJoerg Sonnenberger * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)), 3884d723e5aSJoerg Sonnenberger * becomes 1 when (count >= (2 / prob))). 3894d723e5aSJoerg Sonnenberger */ 3904d723e5aSJoerg Sonnenberger int 3914d723e5aSJoerg Sonnenberger drop_early(int fp_len, int fp_probd, int count) 3924d723e5aSJoerg Sonnenberger { 3934d723e5aSJoerg Sonnenberger int d; /* denominator of drop-probability */ 3944d723e5aSJoerg Sonnenberger 3954d723e5aSJoerg Sonnenberger d = fp_probd - count * fp_len; 3964d723e5aSJoerg Sonnenberger if (d <= 0) { 3974d723e5aSJoerg Sonnenberger /* count exceeds the hard limit: drop or mark */ 3984d723e5aSJoerg Sonnenberger return (1); 3994d723e5aSJoerg Sonnenberger } 4004d723e5aSJoerg Sonnenberger 4014d723e5aSJoerg Sonnenberger /* 4024d723e5aSJoerg Sonnenberger * now the range of d is [1..600] in fixed-point. (when 4034d723e5aSJoerg Sonnenberger * th_max-th_min=10 and p_max=1/30) 4044d723e5aSJoerg Sonnenberger * drop probability = (avg - TH_MIN) / d 4054d723e5aSJoerg Sonnenberger */ 4064d723e5aSJoerg Sonnenberger 4070ced1954SMatthew Dillon if ((karc4random() % d) < fp_len) { 4084d723e5aSJoerg Sonnenberger /* drop or mark */ 4094d723e5aSJoerg Sonnenberger return (1); 4104d723e5aSJoerg Sonnenberger } 4114d723e5aSJoerg Sonnenberger /* no drop/mark */ 4124d723e5aSJoerg Sonnenberger return (0); 4134d723e5aSJoerg Sonnenberger } 4144d723e5aSJoerg Sonnenberger 4154d723e5aSJoerg Sonnenberger /* 4164d723e5aSJoerg Sonnenberger * try to mark CE bit to the packet. 4174d723e5aSJoerg Sonnenberger * returns 1 if successfully marked, 0 otherwise. 4184d723e5aSJoerg Sonnenberger */ 4194d723e5aSJoerg Sonnenberger int 4204d723e5aSJoerg Sonnenberger mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) 4214d723e5aSJoerg Sonnenberger { 4224d723e5aSJoerg Sonnenberger struct mbuf *m0; 4234d723e5aSJoerg Sonnenberger void *hdr; 4244d723e5aSJoerg Sonnenberger int af; 4254d723e5aSJoerg Sonnenberger 426dd5c35afSMatthew Dillon if (m->m_pkthdr.fw_flags & PF_MBUF_STRUCTURE) { 427315a7da3SJan Lentfer af = m->m_pkthdr.pf.ecn_af; 428315a7da3SJan Lentfer hdr = m->m_pkthdr.pf.hdr; 429dd5c35afSMatthew Dillon } else if (pktattr) { 430dd5c35afSMatthew Dillon af = pktattr->pattr_af; 431dd5c35afSMatthew Dillon hdr = pktattr->pattr_hdr; 432dd5c35afSMatthew Dillon } else { 433dd5c35afSMatthew Dillon return (0); 434dd5c35afSMatthew Dillon } 4354d723e5aSJoerg Sonnenberger 4364d723e5aSJoerg Sonnenberger if (af != AF_INET && af != AF_INET6) 4374d723e5aSJoerg Sonnenberger return (0); 4384d723e5aSJoerg Sonnenberger 4394d723e5aSJoerg Sonnenberger /* verify that pattr_hdr is within the mbuf data */ 4404d723e5aSJoerg Sonnenberger for (m0 = m; m0 != NULL; m0 = m0->m_next) { 4414d723e5aSJoerg Sonnenberger if (((caddr_t)hdr >= m0->m_data) && 4424d723e5aSJoerg Sonnenberger ((caddr_t)hdr < m0->m_data + m0->m_len)) 4434d723e5aSJoerg Sonnenberger break; 4444d723e5aSJoerg Sonnenberger } 4454d723e5aSJoerg Sonnenberger if (m0 == NULL) { 4464d723e5aSJoerg Sonnenberger /* ick, tag info is stale */ 4474d723e5aSJoerg Sonnenberger return (0); 4484d723e5aSJoerg Sonnenberger } 4494d723e5aSJoerg Sonnenberger 4504d723e5aSJoerg Sonnenberger switch (af) { 4514d723e5aSJoerg Sonnenberger case AF_INET: 4524d723e5aSJoerg Sonnenberger if (flags & REDF_ECN4) { 4534d723e5aSJoerg Sonnenberger struct ip *ip = hdr; 4544d723e5aSJoerg Sonnenberger uint8_t otos; 4554d723e5aSJoerg Sonnenberger int sum; 4564d723e5aSJoerg Sonnenberger 4574d723e5aSJoerg Sonnenberger if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) 4584d723e5aSJoerg Sonnenberger return (0); /* not-ECT */ 4594d723e5aSJoerg Sonnenberger if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 4604d723e5aSJoerg Sonnenberger return (1); /* already marked */ 4614d723e5aSJoerg Sonnenberger 4624d723e5aSJoerg Sonnenberger /* 4634d723e5aSJoerg Sonnenberger * ecn-capable but not marked, 4644d723e5aSJoerg Sonnenberger * mark CE and update checksum 4654d723e5aSJoerg Sonnenberger */ 4664d723e5aSJoerg Sonnenberger otos = ip->ip_tos; 4674d723e5aSJoerg Sonnenberger ip->ip_tos |= IPTOS_ECN_CE; 4684d723e5aSJoerg Sonnenberger /* 4694d723e5aSJoerg Sonnenberger * update checksum (from RFC1624) 4704d723e5aSJoerg Sonnenberger * HC' = ~(~HC + ~m + m') 4714d723e5aSJoerg Sonnenberger */ 4724d723e5aSJoerg Sonnenberger sum = ~ntohs(ip->ip_sum) & 0xffff; 4734d723e5aSJoerg Sonnenberger sum += (~otos & 0xffff) + ip->ip_tos; 4744d723e5aSJoerg Sonnenberger sum = (sum >> 16) + (sum & 0xffff); 4754d723e5aSJoerg Sonnenberger sum += (sum >> 16); /* add carry */ 4764d723e5aSJoerg Sonnenberger ip->ip_sum = htons(~sum & 0xffff); 4774d723e5aSJoerg Sonnenberger return (1); 4784d723e5aSJoerg Sonnenberger } 4794d723e5aSJoerg Sonnenberger break; 4804d723e5aSJoerg Sonnenberger #ifdef INET6 4814d723e5aSJoerg Sonnenberger case AF_INET6: 4824d723e5aSJoerg Sonnenberger if (flags & REDF_ECN6) { 4834d723e5aSJoerg Sonnenberger struct ip6_hdr *ip6 = hdr; 4844d723e5aSJoerg Sonnenberger uint32_t flowlabel; 4854d723e5aSJoerg Sonnenberger 4864d723e5aSJoerg Sonnenberger flowlabel = ntohl(ip6->ip6_flow); 4874d723e5aSJoerg Sonnenberger if ((flowlabel >> 28) != 6) 4884d723e5aSJoerg Sonnenberger return (0); /* version mismatch! */ 4894d723e5aSJoerg Sonnenberger if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 4904d723e5aSJoerg Sonnenberger (IPTOS_ECN_NOTECT << 20)) 4914d723e5aSJoerg Sonnenberger return (0); /* not-ECT */ 4924d723e5aSJoerg Sonnenberger if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 4934d723e5aSJoerg Sonnenberger (IPTOS_ECN_CE << 20)) 4944d723e5aSJoerg Sonnenberger return (1); /* already marked */ 4954d723e5aSJoerg Sonnenberger /* 4964d723e5aSJoerg Sonnenberger * ecn-capable but not marked, mark CE 4974d723e5aSJoerg Sonnenberger */ 4984d723e5aSJoerg Sonnenberger flowlabel |= (IPTOS_ECN_CE << 20); 4994d723e5aSJoerg Sonnenberger ip6->ip6_flow = htonl(flowlabel); 5004d723e5aSJoerg Sonnenberger return (1); 5014d723e5aSJoerg Sonnenberger } 5024d723e5aSJoerg Sonnenberger break; 5034d723e5aSJoerg Sonnenberger #endif /* INET6 */ 5044d723e5aSJoerg Sonnenberger } 5054d723e5aSJoerg Sonnenberger 5064d723e5aSJoerg Sonnenberger /* not marked */ 5074d723e5aSJoerg Sonnenberger return (0); 5084d723e5aSJoerg Sonnenberger } 5094d723e5aSJoerg Sonnenberger 5104d723e5aSJoerg Sonnenberger struct mbuf * 5114d723e5aSJoerg Sonnenberger red_getq(red_t *rp, class_queue_t *q) 5124d723e5aSJoerg Sonnenberger { 5134d723e5aSJoerg Sonnenberger struct mbuf *m; 5144d723e5aSJoerg Sonnenberger 5154d723e5aSJoerg Sonnenberger if ((m = _getq(q)) == NULL) { 5164d723e5aSJoerg Sonnenberger if (rp->red_idle == 0) { 5174d723e5aSJoerg Sonnenberger rp->red_idle = 1; 5184d723e5aSJoerg Sonnenberger microtime(&rp->red_last); 5194d723e5aSJoerg Sonnenberger } 5204d723e5aSJoerg Sonnenberger return NULL; 5214d723e5aSJoerg Sonnenberger } 5224d723e5aSJoerg Sonnenberger 5234d723e5aSJoerg Sonnenberger rp->red_idle = 0; 5244d723e5aSJoerg Sonnenberger return (m); 5254d723e5aSJoerg Sonnenberger } 5264d723e5aSJoerg Sonnenberger 5274d723e5aSJoerg Sonnenberger /* 5284d723e5aSJoerg Sonnenberger * helper routine to calibrate avg during idle. 5294d723e5aSJoerg Sonnenberger * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point 5304d723e5aSJoerg Sonnenberger * here Wq = 1/weight and the code assumes Wq is close to zero. 5314d723e5aSJoerg Sonnenberger * 5324d723e5aSJoerg Sonnenberger * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point. 5334d723e5aSJoerg Sonnenberger */ 5344d723e5aSJoerg Sonnenberger static SLIST_HEAD(, wtab) wtab_list = SLIST_HEAD_INITIALIZER(&wtab_list); 5354d723e5aSJoerg Sonnenberger 5364d723e5aSJoerg Sonnenberger struct wtab * 5374d723e5aSJoerg Sonnenberger wtab_alloc(int weight) 5384d723e5aSJoerg Sonnenberger { 5394d723e5aSJoerg Sonnenberger struct wtab *w; 5404d723e5aSJoerg Sonnenberger int i; 5414d723e5aSJoerg Sonnenberger 5424d723e5aSJoerg Sonnenberger SLIST_FOREACH(w, &wtab_list, w_link) { 5434d723e5aSJoerg Sonnenberger if (w->w_weight == weight) { 5444d723e5aSJoerg Sonnenberger w->w_refcount++; 5454d723e5aSJoerg Sonnenberger return (w); 5464d723e5aSJoerg Sonnenberger } 5474d723e5aSJoerg Sonnenberger } 5484d723e5aSJoerg Sonnenberger 549efda3bd0SMatthew Dillon w = kmalloc(sizeof(*w), M_ALTQ, M_WAITOK | M_ZERO); 5504d723e5aSJoerg Sonnenberger w->w_weight = weight; 5514d723e5aSJoerg Sonnenberger w->w_refcount = 1; 5524d723e5aSJoerg Sonnenberger SLIST_INSERT_HEAD(&wtab_list, w, w_link); 5534d723e5aSJoerg Sonnenberger 5544d723e5aSJoerg Sonnenberger /* initialize the weight table */ 5554d723e5aSJoerg Sonnenberger w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight; 5564d723e5aSJoerg Sonnenberger for (i = 1; i < 32; i++) { 5574d723e5aSJoerg Sonnenberger w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT; 5584d723e5aSJoerg Sonnenberger if (w->w_tab[i] == 0 && w->w_param_max == 0) 5594d723e5aSJoerg Sonnenberger w->w_param_max = 1 << i; 5604d723e5aSJoerg Sonnenberger } 5614d723e5aSJoerg Sonnenberger 5624d723e5aSJoerg Sonnenberger return (w); 5634d723e5aSJoerg Sonnenberger } 5644d723e5aSJoerg Sonnenberger 5654d723e5aSJoerg Sonnenberger int 5664d723e5aSJoerg Sonnenberger wtab_destroy(struct wtab *w) 5674d723e5aSJoerg Sonnenberger { 5684d723e5aSJoerg Sonnenberger if (--w->w_refcount > 0) 5694d723e5aSJoerg Sonnenberger return (0); 5704d723e5aSJoerg Sonnenberger 5714d723e5aSJoerg Sonnenberger SLIST_REMOVE(&wtab_list, w, wtab, w_link); 572efda3bd0SMatthew Dillon kfree(w, M_ALTQ); 5734d723e5aSJoerg Sonnenberger 5744d723e5aSJoerg Sonnenberger return (0); 5754d723e5aSJoerg Sonnenberger } 5764d723e5aSJoerg Sonnenberger 5774d723e5aSJoerg Sonnenberger int32_t 5784d723e5aSJoerg Sonnenberger pow_w(struct wtab *w, int n) 5794d723e5aSJoerg Sonnenberger { 5804d723e5aSJoerg Sonnenberger int i, bit; 5814d723e5aSJoerg Sonnenberger int32_t val; 5824d723e5aSJoerg Sonnenberger 5834d723e5aSJoerg Sonnenberger if (n >= w->w_param_max) 5844d723e5aSJoerg Sonnenberger return (0); 5854d723e5aSJoerg Sonnenberger 5864d723e5aSJoerg Sonnenberger val = 1 << FP_SHIFT; 5874d723e5aSJoerg Sonnenberger if (n <= 0) 5884d723e5aSJoerg Sonnenberger return (val); 5894d723e5aSJoerg Sonnenberger 5904d723e5aSJoerg Sonnenberger bit = 1; 5914d723e5aSJoerg Sonnenberger i = 0; 5924d723e5aSJoerg Sonnenberger while (n) { 5934d723e5aSJoerg Sonnenberger if (n & bit) { 5944d723e5aSJoerg Sonnenberger val = (val * w->w_tab[i]) >> FP_SHIFT; 5954d723e5aSJoerg Sonnenberger n &= ~bit; 5964d723e5aSJoerg Sonnenberger } 5974d723e5aSJoerg Sonnenberger i++; 5984d723e5aSJoerg Sonnenberger bit <<= 1; 5994d723e5aSJoerg Sonnenberger } 6004d723e5aSJoerg Sonnenberger return (val); 6014d723e5aSJoerg Sonnenberger } 6024d723e5aSJoerg Sonnenberger 6034d723e5aSJoerg Sonnenberger #endif /* ALTQ_RED */ 604