1*4d723e5aSJoerg Sonnenberger /* $KAME: altq_red.c,v 1.19 2004/04/17 10:54:49 kjc Exp $ */ 2*4d723e5aSJoerg Sonnenberger /* $DragonFly: src/sys/net/altq/altq_red.c,v 1.1 2005/02/11 22:25:57 joerg Exp $ */ 3*4d723e5aSJoerg Sonnenberger 4*4d723e5aSJoerg Sonnenberger /* 5*4d723e5aSJoerg Sonnenberger * Copyright (C) 1997-2003 6*4d723e5aSJoerg Sonnenberger * Sony Computer Science Laboratories Inc. All rights reserved. 7*4d723e5aSJoerg Sonnenberger * 8*4d723e5aSJoerg Sonnenberger * Redistribution and use in source and binary forms, with or without 9*4d723e5aSJoerg Sonnenberger * modification, are permitted provided that the following conditions 10*4d723e5aSJoerg Sonnenberger * are met: 11*4d723e5aSJoerg Sonnenberger * 1. Redistributions of source code must retain the above copyright 12*4d723e5aSJoerg Sonnenberger * notice, this list of conditions and the following disclaimer. 13*4d723e5aSJoerg Sonnenberger * 2. Redistributions in binary form must reproduce the above copyright 14*4d723e5aSJoerg Sonnenberger * notice, this list of conditions and the following disclaimer in the 15*4d723e5aSJoerg Sonnenberger * documentation and/or other materials provided with the distribution. 16*4d723e5aSJoerg Sonnenberger * 17*4d723e5aSJoerg Sonnenberger * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18*4d723e5aSJoerg Sonnenberger * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19*4d723e5aSJoerg Sonnenberger * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20*4d723e5aSJoerg Sonnenberger * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21*4d723e5aSJoerg Sonnenberger * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22*4d723e5aSJoerg Sonnenberger * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23*4d723e5aSJoerg Sonnenberger * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24*4d723e5aSJoerg Sonnenberger * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25*4d723e5aSJoerg Sonnenberger * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26*4d723e5aSJoerg Sonnenberger * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27*4d723e5aSJoerg Sonnenberger * SUCH DAMAGE. 28*4d723e5aSJoerg Sonnenberger * 29*4d723e5aSJoerg Sonnenberger */ 30*4d723e5aSJoerg Sonnenberger /* 31*4d723e5aSJoerg Sonnenberger * Copyright (c) 1990-1994 Regents of the University of California. 32*4d723e5aSJoerg Sonnenberger * All rights reserved. 33*4d723e5aSJoerg Sonnenberger * 34*4d723e5aSJoerg Sonnenberger * Redistribution and use in source and binary forms, with or without 35*4d723e5aSJoerg Sonnenberger * modification, are permitted provided that the following conditions 36*4d723e5aSJoerg Sonnenberger * are met: 37*4d723e5aSJoerg Sonnenberger * 1. Redistributions of source code must retain the above copyright 38*4d723e5aSJoerg Sonnenberger * notice, this list of conditions and the following disclaimer. 39*4d723e5aSJoerg Sonnenberger * 2. Redistributions in binary form must reproduce the above copyright 40*4d723e5aSJoerg Sonnenberger * notice, this list of conditions and the following disclaimer in the 41*4d723e5aSJoerg Sonnenberger * documentation and/or other materials provided with the distribution. 42*4d723e5aSJoerg Sonnenberger * 3. All advertising materials mentioning features or use of this software 43*4d723e5aSJoerg Sonnenberger * must display the following acknowledgement: 44*4d723e5aSJoerg Sonnenberger * This product includes software developed by the Computer Systems 45*4d723e5aSJoerg Sonnenberger * Engineering Group at Lawrence Berkeley Laboratory. 46*4d723e5aSJoerg Sonnenberger * 4. Neither the name of the University nor of the Laboratory may be used 47*4d723e5aSJoerg Sonnenberger * to endorse or promote products derived from this software without 48*4d723e5aSJoerg Sonnenberger * specific prior written permission. 49*4d723e5aSJoerg Sonnenberger * 50*4d723e5aSJoerg Sonnenberger * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51*4d723e5aSJoerg Sonnenberger * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52*4d723e5aSJoerg Sonnenberger * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53*4d723e5aSJoerg Sonnenberger * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54*4d723e5aSJoerg Sonnenberger * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55*4d723e5aSJoerg Sonnenberger * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56*4d723e5aSJoerg Sonnenberger * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57*4d723e5aSJoerg Sonnenberger * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58*4d723e5aSJoerg Sonnenberger * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59*4d723e5aSJoerg Sonnenberger * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60*4d723e5aSJoerg Sonnenberger * SUCH DAMAGE. 61*4d723e5aSJoerg Sonnenberger */ 62*4d723e5aSJoerg Sonnenberger 63*4d723e5aSJoerg Sonnenberger #include "opt_altq.h" 64*4d723e5aSJoerg Sonnenberger #include "opt_inet.h" 65*4d723e5aSJoerg Sonnenberger #include "opt_inet6.h" 66*4d723e5aSJoerg Sonnenberger 67*4d723e5aSJoerg Sonnenberger #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */ 68*4d723e5aSJoerg Sonnenberger 69*4d723e5aSJoerg Sonnenberger #include <sys/param.h> 70*4d723e5aSJoerg Sonnenberger #include <sys/malloc.h> 71*4d723e5aSJoerg Sonnenberger #include <sys/mbuf.h> 72*4d723e5aSJoerg Sonnenberger #include <sys/socket.h> 73*4d723e5aSJoerg Sonnenberger #include <sys/systm.h> 74*4d723e5aSJoerg Sonnenberger #include <sys/errno.h> 75*4d723e5aSJoerg Sonnenberger 76*4d723e5aSJoerg Sonnenberger #include <net/if.h> 77*4d723e5aSJoerg Sonnenberger 78*4d723e5aSJoerg Sonnenberger #include <netinet/in.h> 79*4d723e5aSJoerg Sonnenberger #include <netinet/in_systm.h> 80*4d723e5aSJoerg Sonnenberger #include <netinet/ip.h> 81*4d723e5aSJoerg Sonnenberger #ifdef INET6 82*4d723e5aSJoerg Sonnenberger #include <netinet/ip6.h> 83*4d723e5aSJoerg Sonnenberger #endif 84*4d723e5aSJoerg Sonnenberger 85*4d723e5aSJoerg Sonnenberger #include <net/pf/pfvar.h> 86*4d723e5aSJoerg Sonnenberger #include <net/altq/altq.h> 87*4d723e5aSJoerg Sonnenberger #include <net/altq/altq_red.h> 88*4d723e5aSJoerg Sonnenberger 89*4d723e5aSJoerg Sonnenberger /* 90*4d723e5aSJoerg Sonnenberger * ALTQ/RED (Random Early Detection) implementation using 32-bit 91*4d723e5aSJoerg Sonnenberger * fixed-point calculation. 92*4d723e5aSJoerg Sonnenberger * 93*4d723e5aSJoerg Sonnenberger * written by kjc using the ns code as a reference. 94*4d723e5aSJoerg Sonnenberger * you can learn more about red and ns from Sally's home page at 95*4d723e5aSJoerg Sonnenberger * http://www-nrg.ee.lbl.gov/floyd/ 96*4d723e5aSJoerg Sonnenberger * 97*4d723e5aSJoerg Sonnenberger * most of the red parameter values are fixed in this implementation 98*4d723e5aSJoerg Sonnenberger * to prevent fixed-point overflow/underflow. 99*4d723e5aSJoerg Sonnenberger * if you change the parameters, watch out for overflow/underflow! 100*4d723e5aSJoerg Sonnenberger * 101*4d723e5aSJoerg Sonnenberger * the parameters used are recommended values by Sally. 102*4d723e5aSJoerg Sonnenberger * the corresponding ns config looks: 103*4d723e5aSJoerg Sonnenberger * q_weight=0.00195 104*4d723e5aSJoerg Sonnenberger * minthresh=5 maxthresh=15 queue-size=60 105*4d723e5aSJoerg Sonnenberger * linterm=30 106*4d723e5aSJoerg Sonnenberger * dropmech=drop-tail 107*4d723e5aSJoerg Sonnenberger * bytes=false (can't be handled by 32-bit fixed-point) 108*4d723e5aSJoerg Sonnenberger * doubleq=false dqthresh=false 109*4d723e5aSJoerg Sonnenberger * wait=true 110*4d723e5aSJoerg Sonnenberger */ 111*4d723e5aSJoerg Sonnenberger /* 112*4d723e5aSJoerg Sonnenberger * alternative red parameters for a slow link. 113*4d723e5aSJoerg Sonnenberger * 114*4d723e5aSJoerg Sonnenberger * assume the queue length becomes from zero to L and keeps L, it takes 115*4d723e5aSJoerg Sonnenberger * N packets for q_avg to reach 63% of L. 116*4d723e5aSJoerg Sonnenberger * when q_weight is 0.002, N is about 500 packets. 117*4d723e5aSJoerg Sonnenberger * for a slow link like dial-up, 500 packets takes more than 1 minute! 118*4d723e5aSJoerg Sonnenberger * when q_weight is 0.008, N is about 127 packets. 119*4d723e5aSJoerg Sonnenberger * when q_weight is 0.016, N is about 63 packets. 120*4d723e5aSJoerg Sonnenberger * bursts of 50 packets are allowed for 0.002, bursts of 25 packets 121*4d723e5aSJoerg Sonnenberger * are allowed for 0.016. 122*4d723e5aSJoerg Sonnenberger * see Sally's paper for more details. 123*4d723e5aSJoerg Sonnenberger */ 124*4d723e5aSJoerg Sonnenberger /* normal red parameters */ 125*4d723e5aSJoerg Sonnenberger #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ 126*4d723e5aSJoerg Sonnenberger /* q_weight = 0.00195 */ 127*4d723e5aSJoerg Sonnenberger 128*4d723e5aSJoerg Sonnenberger /* red parameters for a slow link */ 129*4d723e5aSJoerg Sonnenberger #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ 130*4d723e5aSJoerg Sonnenberger /* q_weight = 0.0078125 */ 131*4d723e5aSJoerg Sonnenberger 132*4d723e5aSJoerg Sonnenberger /* red parameters for a very slow link (e.g., dialup) */ 133*4d723e5aSJoerg Sonnenberger #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ 134*4d723e5aSJoerg Sonnenberger /* q_weight = 0.015625 */ 135*4d723e5aSJoerg Sonnenberger 136*4d723e5aSJoerg Sonnenberger /* fixed-point uses 12-bit decimal places */ 137*4d723e5aSJoerg Sonnenberger #define FP_SHIFT 12 /* fixed-point shift */ 138*4d723e5aSJoerg Sonnenberger 139*4d723e5aSJoerg Sonnenberger /* red parameters for drop probability */ 140*4d723e5aSJoerg Sonnenberger #define INV_P_MAX 10 /* inverse of max drop probability */ 141*4d723e5aSJoerg Sonnenberger #define TH_MIN 5 /* min threshold */ 142*4d723e5aSJoerg Sonnenberger #define TH_MAX 15 /* max threshold */ 143*4d723e5aSJoerg Sonnenberger 144*4d723e5aSJoerg Sonnenberger #define RED_LIMIT 60 /* default max queue lenght */ 145*4d723e5aSJoerg Sonnenberger #define RED_STATS /* collect statistics */ 146*4d723e5aSJoerg Sonnenberger 147*4d723e5aSJoerg Sonnenberger /* 148*4d723e5aSJoerg Sonnenberger * our default policy for forced-drop is drop-tail. 149*4d723e5aSJoerg Sonnenberger * (in altq-1.1.2 or earlier, the default was random-drop. 150*4d723e5aSJoerg Sonnenberger * but it makes more sense to punish the cause of the surge.) 151*4d723e5aSJoerg Sonnenberger * to switch to the random-drop policy, define "RED_RANDOM_DROP". 152*4d723e5aSJoerg Sonnenberger */ 153*4d723e5aSJoerg Sonnenberger 154*4d723e5aSJoerg Sonnenberger /* default red parameter values */ 155*4d723e5aSJoerg Sonnenberger static int default_th_min = TH_MIN; 156*4d723e5aSJoerg Sonnenberger static int default_th_max = TH_MAX; 157*4d723e5aSJoerg Sonnenberger static int default_inv_pmax = INV_P_MAX; 158*4d723e5aSJoerg Sonnenberger 159*4d723e5aSJoerg Sonnenberger /* 160*4d723e5aSJoerg Sonnenberger * red support routines 161*4d723e5aSJoerg Sonnenberger */ 162*4d723e5aSJoerg Sonnenberger red_t * 163*4d723e5aSJoerg Sonnenberger red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags, int pkttime) 164*4d723e5aSJoerg Sonnenberger { 165*4d723e5aSJoerg Sonnenberger red_t *rp; 166*4d723e5aSJoerg Sonnenberger int w, i; 167*4d723e5aSJoerg Sonnenberger int npkts_per_sec; 168*4d723e5aSJoerg Sonnenberger 169*4d723e5aSJoerg Sonnenberger rp = malloc(sizeof(*rp), M_ALTQ, M_WAITOK | M_ZERO); 170*4d723e5aSJoerg Sonnenberger rp->red_avg = 0; 171*4d723e5aSJoerg Sonnenberger rp->red_idle = 1; 172*4d723e5aSJoerg Sonnenberger 173*4d723e5aSJoerg Sonnenberger if (weight == 0) 174*4d723e5aSJoerg Sonnenberger rp->red_weight = W_WEIGHT; 175*4d723e5aSJoerg Sonnenberger else 176*4d723e5aSJoerg Sonnenberger rp->red_weight = weight; 177*4d723e5aSJoerg Sonnenberger if (inv_pmax == 0) 178*4d723e5aSJoerg Sonnenberger rp->red_inv_pmax = default_inv_pmax; 179*4d723e5aSJoerg Sonnenberger else 180*4d723e5aSJoerg Sonnenberger rp->red_inv_pmax = inv_pmax; 181*4d723e5aSJoerg Sonnenberger if (th_min == 0) 182*4d723e5aSJoerg Sonnenberger rp->red_thmin = default_th_min; 183*4d723e5aSJoerg Sonnenberger else 184*4d723e5aSJoerg Sonnenberger rp->red_thmin = th_min; 185*4d723e5aSJoerg Sonnenberger if (th_max == 0) 186*4d723e5aSJoerg Sonnenberger rp->red_thmax = default_th_max; 187*4d723e5aSJoerg Sonnenberger else 188*4d723e5aSJoerg Sonnenberger rp->red_thmax = th_max; 189*4d723e5aSJoerg Sonnenberger 190*4d723e5aSJoerg Sonnenberger rp->red_flags = flags; 191*4d723e5aSJoerg Sonnenberger 192*4d723e5aSJoerg Sonnenberger if (pkttime == 0) 193*4d723e5aSJoerg Sonnenberger /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 194*4d723e5aSJoerg Sonnenberger rp->red_pkttime = 800; 195*4d723e5aSJoerg Sonnenberger else 196*4d723e5aSJoerg Sonnenberger rp->red_pkttime = pkttime; 197*4d723e5aSJoerg Sonnenberger 198*4d723e5aSJoerg Sonnenberger if (weight == 0) { 199*4d723e5aSJoerg Sonnenberger /* when the link is very slow, adjust red parameters */ 200*4d723e5aSJoerg Sonnenberger npkts_per_sec = 1000000 / rp->red_pkttime; 201*4d723e5aSJoerg Sonnenberger if (npkts_per_sec < 50) { 202*4d723e5aSJoerg Sonnenberger /* up to about 400Kbps */ 203*4d723e5aSJoerg Sonnenberger rp->red_weight = W_WEIGHT_2; 204*4d723e5aSJoerg Sonnenberger } else if (npkts_per_sec < 300) { 205*4d723e5aSJoerg Sonnenberger /* up to about 2.4Mbps */ 206*4d723e5aSJoerg Sonnenberger rp->red_weight = W_WEIGHT_1; 207*4d723e5aSJoerg Sonnenberger } 208*4d723e5aSJoerg Sonnenberger } 209*4d723e5aSJoerg Sonnenberger 210*4d723e5aSJoerg Sonnenberger /* calculate wshift. weight must be power of 2 */ 211*4d723e5aSJoerg Sonnenberger w = rp->red_weight; 212*4d723e5aSJoerg Sonnenberger for (i = 0; w > 1; i++) 213*4d723e5aSJoerg Sonnenberger w = w >> 1; 214*4d723e5aSJoerg Sonnenberger rp->red_wshift = i; 215*4d723e5aSJoerg Sonnenberger w = 1 << rp->red_wshift; 216*4d723e5aSJoerg Sonnenberger if (w != rp->red_weight) { 217*4d723e5aSJoerg Sonnenberger printf("invalid weight value %d for red! use %d\n", 218*4d723e5aSJoerg Sonnenberger rp->red_weight, w); 219*4d723e5aSJoerg Sonnenberger rp->red_weight = w; 220*4d723e5aSJoerg Sonnenberger } 221*4d723e5aSJoerg Sonnenberger 222*4d723e5aSJoerg Sonnenberger /* 223*4d723e5aSJoerg Sonnenberger * thmin_s and thmax_s are scaled versions of th_min and th_max 224*4d723e5aSJoerg Sonnenberger * to be compared with avg. 225*4d723e5aSJoerg Sonnenberger */ 226*4d723e5aSJoerg Sonnenberger rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); 227*4d723e5aSJoerg Sonnenberger rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); 228*4d723e5aSJoerg Sonnenberger 229*4d723e5aSJoerg Sonnenberger /* 230*4d723e5aSJoerg Sonnenberger * precompute probability denominator 231*4d723e5aSJoerg Sonnenberger * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point 232*4d723e5aSJoerg Sonnenberger */ 233*4d723e5aSJoerg Sonnenberger rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) 234*4d723e5aSJoerg Sonnenberger * rp->red_inv_pmax) << FP_SHIFT; 235*4d723e5aSJoerg Sonnenberger 236*4d723e5aSJoerg Sonnenberger /* allocate weight table */ 237*4d723e5aSJoerg Sonnenberger rp->red_wtab = wtab_alloc(rp->red_weight); 238*4d723e5aSJoerg Sonnenberger 239*4d723e5aSJoerg Sonnenberger microtime(&rp->red_last); 240*4d723e5aSJoerg Sonnenberger return (rp); 241*4d723e5aSJoerg Sonnenberger } 242*4d723e5aSJoerg Sonnenberger 243*4d723e5aSJoerg Sonnenberger void 244*4d723e5aSJoerg Sonnenberger red_destroy(red_t *rp) 245*4d723e5aSJoerg Sonnenberger { 246*4d723e5aSJoerg Sonnenberger wtab_destroy(rp->red_wtab); 247*4d723e5aSJoerg Sonnenberger free(rp, M_ALTQ); 248*4d723e5aSJoerg Sonnenberger } 249*4d723e5aSJoerg Sonnenberger 250*4d723e5aSJoerg Sonnenberger void 251*4d723e5aSJoerg Sonnenberger red_getstats(red_t *rp, struct redstats *sp) 252*4d723e5aSJoerg Sonnenberger { 253*4d723e5aSJoerg Sonnenberger sp->q_avg = rp->red_avg >> rp->red_wshift; 254*4d723e5aSJoerg Sonnenberger sp->xmit_cnt = rp->red_stats.xmit_cnt; 255*4d723e5aSJoerg Sonnenberger sp->drop_cnt = rp->red_stats.drop_cnt; 256*4d723e5aSJoerg Sonnenberger sp->drop_forced = rp->red_stats.drop_forced; 257*4d723e5aSJoerg Sonnenberger sp->drop_unforced = rp->red_stats.drop_unforced; 258*4d723e5aSJoerg Sonnenberger sp->marked_packets = rp->red_stats.marked_packets; 259*4d723e5aSJoerg Sonnenberger } 260*4d723e5aSJoerg Sonnenberger 261*4d723e5aSJoerg Sonnenberger int 262*4d723e5aSJoerg Sonnenberger red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, struct altq_pktattr *pktattr) 263*4d723e5aSJoerg Sonnenberger { 264*4d723e5aSJoerg Sonnenberger int avg, droptype; 265*4d723e5aSJoerg Sonnenberger int n; 266*4d723e5aSJoerg Sonnenberger 267*4d723e5aSJoerg Sonnenberger avg = rp->red_avg; 268*4d723e5aSJoerg Sonnenberger 269*4d723e5aSJoerg Sonnenberger /* 270*4d723e5aSJoerg Sonnenberger * if we were idle, we pretend that n packets arrived during 271*4d723e5aSJoerg Sonnenberger * the idle period. 272*4d723e5aSJoerg Sonnenberger */ 273*4d723e5aSJoerg Sonnenberger if (rp->red_idle) { 274*4d723e5aSJoerg Sonnenberger struct timeval now; 275*4d723e5aSJoerg Sonnenberger int t; 276*4d723e5aSJoerg Sonnenberger 277*4d723e5aSJoerg Sonnenberger rp->red_idle = 0; 278*4d723e5aSJoerg Sonnenberger microtime(&now); 279*4d723e5aSJoerg Sonnenberger t = (now.tv_sec - rp->red_last.tv_sec); 280*4d723e5aSJoerg Sonnenberger if (t > 60) { 281*4d723e5aSJoerg Sonnenberger /* 282*4d723e5aSJoerg Sonnenberger * being idle for more than 1 minute, set avg to zero. 283*4d723e5aSJoerg Sonnenberger * this prevents t from overflow. 284*4d723e5aSJoerg Sonnenberger */ 285*4d723e5aSJoerg Sonnenberger avg = 0; 286*4d723e5aSJoerg Sonnenberger } else { 287*4d723e5aSJoerg Sonnenberger t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec); 288*4d723e5aSJoerg Sonnenberger n = t / rp->red_pkttime - 1; 289*4d723e5aSJoerg Sonnenberger 290*4d723e5aSJoerg Sonnenberger /* the following line does (avg = (1 - Wq)^n * avg) */ 291*4d723e5aSJoerg Sonnenberger if (n > 0) 292*4d723e5aSJoerg Sonnenberger avg = (avg >> FP_SHIFT) * 293*4d723e5aSJoerg Sonnenberger pow_w(rp->red_wtab, n); 294*4d723e5aSJoerg Sonnenberger } 295*4d723e5aSJoerg Sonnenberger } 296*4d723e5aSJoerg Sonnenberger 297*4d723e5aSJoerg Sonnenberger /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */ 298*4d723e5aSJoerg Sonnenberger avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift); 299*4d723e5aSJoerg Sonnenberger rp->red_avg = avg; /* save the new value */ 300*4d723e5aSJoerg Sonnenberger 301*4d723e5aSJoerg Sonnenberger /* 302*4d723e5aSJoerg Sonnenberger * red_count keeps a tally of arriving traffic that has not 303*4d723e5aSJoerg Sonnenberger * been dropped. 304*4d723e5aSJoerg Sonnenberger */ 305*4d723e5aSJoerg Sonnenberger rp->red_count++; 306*4d723e5aSJoerg Sonnenberger 307*4d723e5aSJoerg Sonnenberger /* see if we drop early */ 308*4d723e5aSJoerg Sonnenberger droptype = DTYPE_NODROP; 309*4d723e5aSJoerg Sonnenberger if (avg >= rp->red_thmin_s && qlen(q) > 1) { 310*4d723e5aSJoerg Sonnenberger if (avg >= rp->red_thmax_s) { 311*4d723e5aSJoerg Sonnenberger /* avg >= th_max: forced drop */ 312*4d723e5aSJoerg Sonnenberger droptype = DTYPE_FORCED; 313*4d723e5aSJoerg Sonnenberger } else if (rp->red_old == 0) { 314*4d723e5aSJoerg Sonnenberger /* first exceeds th_min */ 315*4d723e5aSJoerg Sonnenberger rp->red_count = 1; 316*4d723e5aSJoerg Sonnenberger rp->red_old = 1; 317*4d723e5aSJoerg Sonnenberger } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, 318*4d723e5aSJoerg Sonnenberger rp->red_probd, rp->red_count)) { 319*4d723e5aSJoerg Sonnenberger /* mark or drop by red */ 320*4d723e5aSJoerg Sonnenberger if ((rp->red_flags & REDF_ECN) && 321*4d723e5aSJoerg Sonnenberger mark_ecn(m, pktattr, rp->red_flags)) { 322*4d723e5aSJoerg Sonnenberger /* successfully marked. do not drop. */ 323*4d723e5aSJoerg Sonnenberger rp->red_count = 0; 324*4d723e5aSJoerg Sonnenberger #ifdef RED_STATS 325*4d723e5aSJoerg Sonnenberger rp->red_stats.marked_packets++; 326*4d723e5aSJoerg Sonnenberger #endif 327*4d723e5aSJoerg Sonnenberger } else { 328*4d723e5aSJoerg Sonnenberger /* unforced drop by red */ 329*4d723e5aSJoerg Sonnenberger droptype = DTYPE_EARLY; 330*4d723e5aSJoerg Sonnenberger } 331*4d723e5aSJoerg Sonnenberger } 332*4d723e5aSJoerg Sonnenberger } else { 333*4d723e5aSJoerg Sonnenberger /* avg < th_min */ 334*4d723e5aSJoerg Sonnenberger rp->red_old = 0; 335*4d723e5aSJoerg Sonnenberger } 336*4d723e5aSJoerg Sonnenberger 337*4d723e5aSJoerg Sonnenberger /* 338*4d723e5aSJoerg Sonnenberger * if the queue length hits the hard limit, it's a forced drop. 339*4d723e5aSJoerg Sonnenberger */ 340*4d723e5aSJoerg Sonnenberger if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 341*4d723e5aSJoerg Sonnenberger droptype = DTYPE_FORCED; 342*4d723e5aSJoerg Sonnenberger 343*4d723e5aSJoerg Sonnenberger #ifdef RED_RANDOM_DROP 344*4d723e5aSJoerg Sonnenberger /* if successful or forced drop, enqueue this packet. */ 345*4d723e5aSJoerg Sonnenberger if (droptype != DTYPE_EARLY) 346*4d723e5aSJoerg Sonnenberger _addq(q, m); 347*4d723e5aSJoerg Sonnenberger #else 348*4d723e5aSJoerg Sonnenberger /* if successful, enqueue this packet. */ 349*4d723e5aSJoerg Sonnenberger if (droptype == DTYPE_NODROP) 350*4d723e5aSJoerg Sonnenberger _addq(q, m); 351*4d723e5aSJoerg Sonnenberger #endif 352*4d723e5aSJoerg Sonnenberger if (droptype != DTYPE_NODROP) { 353*4d723e5aSJoerg Sonnenberger if (droptype == DTYPE_EARLY) { 354*4d723e5aSJoerg Sonnenberger /* drop the incoming packet */ 355*4d723e5aSJoerg Sonnenberger #ifdef RED_STATS 356*4d723e5aSJoerg Sonnenberger rp->red_stats.drop_unforced++; 357*4d723e5aSJoerg Sonnenberger #endif 358*4d723e5aSJoerg Sonnenberger } else { 359*4d723e5aSJoerg Sonnenberger /* forced drop, select a victim packet in the queue. */ 360*4d723e5aSJoerg Sonnenberger #ifdef RED_RANDOM_DROP 361*4d723e5aSJoerg Sonnenberger m = _getq_random(q); 362*4d723e5aSJoerg Sonnenberger #endif 363*4d723e5aSJoerg Sonnenberger #ifdef RED_STATS 364*4d723e5aSJoerg Sonnenberger rp->red_stats.drop_forced++; 365*4d723e5aSJoerg Sonnenberger #endif 366*4d723e5aSJoerg Sonnenberger } 367*4d723e5aSJoerg Sonnenberger #ifdef RED_STATS 368*4d723e5aSJoerg Sonnenberger PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m)); 369*4d723e5aSJoerg Sonnenberger #endif 370*4d723e5aSJoerg Sonnenberger rp->red_count = 0; 371*4d723e5aSJoerg Sonnenberger m_freem(m); 372*4d723e5aSJoerg Sonnenberger return (-1); 373*4d723e5aSJoerg Sonnenberger } 374*4d723e5aSJoerg Sonnenberger /* successfully queued */ 375*4d723e5aSJoerg Sonnenberger #ifdef RED_STATS 376*4d723e5aSJoerg Sonnenberger PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m)); 377*4d723e5aSJoerg Sonnenberger #endif 378*4d723e5aSJoerg Sonnenberger return (0); 379*4d723e5aSJoerg Sonnenberger } 380*4d723e5aSJoerg Sonnenberger 381*4d723e5aSJoerg Sonnenberger /* 382*4d723e5aSJoerg Sonnenberger * early-drop probability is calculated as follows: 383*4d723e5aSJoerg Sonnenberger * prob = p_max * (avg - th_min) / (th_max - th_min) 384*4d723e5aSJoerg Sonnenberger * prob_a = prob / (2 - count*prob) 385*4d723e5aSJoerg Sonnenberger * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min)) 386*4d723e5aSJoerg Sonnenberger * here prob_a increases as successive undrop count increases. 387*4d723e5aSJoerg Sonnenberger * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)), 388*4d723e5aSJoerg Sonnenberger * becomes 1 when (count >= (2 / prob))). 389*4d723e5aSJoerg Sonnenberger */ 390*4d723e5aSJoerg Sonnenberger int 391*4d723e5aSJoerg Sonnenberger drop_early(int fp_len, int fp_probd, int count) 392*4d723e5aSJoerg Sonnenberger { 393*4d723e5aSJoerg Sonnenberger int d; /* denominator of drop-probability */ 394*4d723e5aSJoerg Sonnenberger 395*4d723e5aSJoerg Sonnenberger d = fp_probd - count * fp_len; 396*4d723e5aSJoerg Sonnenberger if (d <= 0) { 397*4d723e5aSJoerg Sonnenberger /* count exceeds the hard limit: drop or mark */ 398*4d723e5aSJoerg Sonnenberger return (1); 399*4d723e5aSJoerg Sonnenberger } 400*4d723e5aSJoerg Sonnenberger 401*4d723e5aSJoerg Sonnenberger /* 402*4d723e5aSJoerg Sonnenberger * now the range of d is [1..600] in fixed-point. (when 403*4d723e5aSJoerg Sonnenberger * th_max-th_min=10 and p_max=1/30) 404*4d723e5aSJoerg Sonnenberger * drop probability = (avg - TH_MIN) / d 405*4d723e5aSJoerg Sonnenberger */ 406*4d723e5aSJoerg Sonnenberger 407*4d723e5aSJoerg Sonnenberger if ((arc4random() % d) < fp_len) { 408*4d723e5aSJoerg Sonnenberger /* drop or mark */ 409*4d723e5aSJoerg Sonnenberger return (1); 410*4d723e5aSJoerg Sonnenberger } 411*4d723e5aSJoerg Sonnenberger /* no drop/mark */ 412*4d723e5aSJoerg Sonnenberger return (0); 413*4d723e5aSJoerg Sonnenberger } 414*4d723e5aSJoerg Sonnenberger 415*4d723e5aSJoerg Sonnenberger /* 416*4d723e5aSJoerg Sonnenberger * try to mark CE bit to the packet. 417*4d723e5aSJoerg Sonnenberger * returns 1 if successfully marked, 0 otherwise. 418*4d723e5aSJoerg Sonnenberger */ 419*4d723e5aSJoerg Sonnenberger int 420*4d723e5aSJoerg Sonnenberger mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) 421*4d723e5aSJoerg Sonnenberger { 422*4d723e5aSJoerg Sonnenberger struct mbuf *m0; 423*4d723e5aSJoerg Sonnenberger void *hdr; 424*4d723e5aSJoerg Sonnenberger int af; 425*4d723e5aSJoerg Sonnenberger 426*4d723e5aSJoerg Sonnenberger if ((m->m_pkthdr.fw_flags & ALTQ_MBUF_TAGGED) == 0) 427*4d723e5aSJoerg Sonnenberger return (0); 428*4d723e5aSJoerg Sonnenberger af = m->m_pkthdr.ecn_af; 429*4d723e5aSJoerg Sonnenberger hdr = m->m_pkthdr.header; 430*4d723e5aSJoerg Sonnenberger 431*4d723e5aSJoerg Sonnenberger if (af != AF_INET && af != AF_INET6) 432*4d723e5aSJoerg Sonnenberger return (0); 433*4d723e5aSJoerg Sonnenberger 434*4d723e5aSJoerg Sonnenberger /* verify that pattr_hdr is within the mbuf data */ 435*4d723e5aSJoerg Sonnenberger for (m0 = m; m0 != NULL; m0 = m0->m_next) { 436*4d723e5aSJoerg Sonnenberger if (((caddr_t)hdr >= m0->m_data) && 437*4d723e5aSJoerg Sonnenberger ((caddr_t)hdr < m0->m_data + m0->m_len)) 438*4d723e5aSJoerg Sonnenberger break; 439*4d723e5aSJoerg Sonnenberger } 440*4d723e5aSJoerg Sonnenberger if (m0 == NULL) { 441*4d723e5aSJoerg Sonnenberger /* ick, tag info is stale */ 442*4d723e5aSJoerg Sonnenberger return (0); 443*4d723e5aSJoerg Sonnenberger } 444*4d723e5aSJoerg Sonnenberger 445*4d723e5aSJoerg Sonnenberger switch (af) { 446*4d723e5aSJoerg Sonnenberger case AF_INET: 447*4d723e5aSJoerg Sonnenberger if (flags & REDF_ECN4) { 448*4d723e5aSJoerg Sonnenberger struct ip *ip = hdr; 449*4d723e5aSJoerg Sonnenberger uint8_t otos; 450*4d723e5aSJoerg Sonnenberger int sum; 451*4d723e5aSJoerg Sonnenberger 452*4d723e5aSJoerg Sonnenberger if (ip->ip_v != 4) 453*4d723e5aSJoerg Sonnenberger return (0); /* version mismatch! */ 454*4d723e5aSJoerg Sonnenberger 455*4d723e5aSJoerg Sonnenberger if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) 456*4d723e5aSJoerg Sonnenberger return (0); /* not-ECT */ 457*4d723e5aSJoerg Sonnenberger if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 458*4d723e5aSJoerg Sonnenberger return (1); /* already marked */ 459*4d723e5aSJoerg Sonnenberger 460*4d723e5aSJoerg Sonnenberger /* 461*4d723e5aSJoerg Sonnenberger * ecn-capable but not marked, 462*4d723e5aSJoerg Sonnenberger * mark CE and update checksum 463*4d723e5aSJoerg Sonnenberger */ 464*4d723e5aSJoerg Sonnenberger otos = ip->ip_tos; 465*4d723e5aSJoerg Sonnenberger ip->ip_tos |= IPTOS_ECN_CE; 466*4d723e5aSJoerg Sonnenberger /* 467*4d723e5aSJoerg Sonnenberger * update checksum (from RFC1624) 468*4d723e5aSJoerg Sonnenberger * HC' = ~(~HC + ~m + m') 469*4d723e5aSJoerg Sonnenberger */ 470*4d723e5aSJoerg Sonnenberger sum = ~ntohs(ip->ip_sum) & 0xffff; 471*4d723e5aSJoerg Sonnenberger sum += (~otos & 0xffff) + ip->ip_tos; 472*4d723e5aSJoerg Sonnenberger sum = (sum >> 16) + (sum & 0xffff); 473*4d723e5aSJoerg Sonnenberger sum += (sum >> 16); /* add carry */ 474*4d723e5aSJoerg Sonnenberger ip->ip_sum = htons(~sum & 0xffff); 475*4d723e5aSJoerg Sonnenberger return (1); 476*4d723e5aSJoerg Sonnenberger } 477*4d723e5aSJoerg Sonnenberger break; 478*4d723e5aSJoerg Sonnenberger #ifdef INET6 479*4d723e5aSJoerg Sonnenberger case AF_INET6: 480*4d723e5aSJoerg Sonnenberger if (flags & REDF_ECN6) { 481*4d723e5aSJoerg Sonnenberger struct ip6_hdr *ip6 = hdr; 482*4d723e5aSJoerg Sonnenberger uint32_t flowlabel; 483*4d723e5aSJoerg Sonnenberger 484*4d723e5aSJoerg Sonnenberger flowlabel = ntohl(ip6->ip6_flow); 485*4d723e5aSJoerg Sonnenberger if ((flowlabel >> 28) != 6) 486*4d723e5aSJoerg Sonnenberger return (0); /* version mismatch! */ 487*4d723e5aSJoerg Sonnenberger if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 488*4d723e5aSJoerg Sonnenberger (IPTOS_ECN_NOTECT << 20)) 489*4d723e5aSJoerg Sonnenberger return (0); /* not-ECT */ 490*4d723e5aSJoerg Sonnenberger if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 491*4d723e5aSJoerg Sonnenberger (IPTOS_ECN_CE << 20)) 492*4d723e5aSJoerg Sonnenberger return (1); /* already marked */ 493*4d723e5aSJoerg Sonnenberger /* 494*4d723e5aSJoerg Sonnenberger * ecn-capable but not marked, mark CE 495*4d723e5aSJoerg Sonnenberger */ 496*4d723e5aSJoerg Sonnenberger flowlabel |= (IPTOS_ECN_CE << 20); 497*4d723e5aSJoerg Sonnenberger ip6->ip6_flow = htonl(flowlabel); 498*4d723e5aSJoerg Sonnenberger return (1); 499*4d723e5aSJoerg Sonnenberger } 500*4d723e5aSJoerg Sonnenberger break; 501*4d723e5aSJoerg Sonnenberger #endif /* INET6 */ 502*4d723e5aSJoerg Sonnenberger } 503*4d723e5aSJoerg Sonnenberger 504*4d723e5aSJoerg Sonnenberger /* not marked */ 505*4d723e5aSJoerg Sonnenberger return (0); 506*4d723e5aSJoerg Sonnenberger } 507*4d723e5aSJoerg Sonnenberger 508*4d723e5aSJoerg Sonnenberger struct mbuf * 509*4d723e5aSJoerg Sonnenberger red_getq(red_t *rp, class_queue_t *q) 510*4d723e5aSJoerg Sonnenberger { 511*4d723e5aSJoerg Sonnenberger struct mbuf *m; 512*4d723e5aSJoerg Sonnenberger 513*4d723e5aSJoerg Sonnenberger if ((m = _getq(q)) == NULL) { 514*4d723e5aSJoerg Sonnenberger if (rp->red_idle == 0) { 515*4d723e5aSJoerg Sonnenberger rp->red_idle = 1; 516*4d723e5aSJoerg Sonnenberger microtime(&rp->red_last); 517*4d723e5aSJoerg Sonnenberger } 518*4d723e5aSJoerg Sonnenberger return NULL; 519*4d723e5aSJoerg Sonnenberger } 520*4d723e5aSJoerg Sonnenberger 521*4d723e5aSJoerg Sonnenberger rp->red_idle = 0; 522*4d723e5aSJoerg Sonnenberger return (m); 523*4d723e5aSJoerg Sonnenberger } 524*4d723e5aSJoerg Sonnenberger 525*4d723e5aSJoerg Sonnenberger /* 526*4d723e5aSJoerg Sonnenberger * helper routine to calibrate avg during idle. 527*4d723e5aSJoerg Sonnenberger * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point 528*4d723e5aSJoerg Sonnenberger * here Wq = 1/weight and the code assumes Wq is close to zero. 529*4d723e5aSJoerg Sonnenberger * 530*4d723e5aSJoerg Sonnenberger * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point. 531*4d723e5aSJoerg Sonnenberger */ 532*4d723e5aSJoerg Sonnenberger static SLIST_HEAD(, wtab) wtab_list = SLIST_HEAD_INITIALIZER(&wtab_list); 533*4d723e5aSJoerg Sonnenberger 534*4d723e5aSJoerg Sonnenberger struct wtab * 535*4d723e5aSJoerg Sonnenberger wtab_alloc(int weight) 536*4d723e5aSJoerg Sonnenberger { 537*4d723e5aSJoerg Sonnenberger struct wtab *w; 538*4d723e5aSJoerg Sonnenberger int i; 539*4d723e5aSJoerg Sonnenberger 540*4d723e5aSJoerg Sonnenberger SLIST_FOREACH(w, &wtab_list, w_link) { 541*4d723e5aSJoerg Sonnenberger if (w->w_weight == weight) { 542*4d723e5aSJoerg Sonnenberger w->w_refcount++; 543*4d723e5aSJoerg Sonnenberger return (w); 544*4d723e5aSJoerg Sonnenberger } 545*4d723e5aSJoerg Sonnenberger } 546*4d723e5aSJoerg Sonnenberger 547*4d723e5aSJoerg Sonnenberger w = malloc(sizeof(*w), M_ALTQ, M_WAITOK | M_ZERO); 548*4d723e5aSJoerg Sonnenberger w->w_weight = weight; 549*4d723e5aSJoerg Sonnenberger w->w_refcount = 1; 550*4d723e5aSJoerg Sonnenberger SLIST_INSERT_HEAD(&wtab_list, w, w_link); 551*4d723e5aSJoerg Sonnenberger 552*4d723e5aSJoerg Sonnenberger /* initialize the weight table */ 553*4d723e5aSJoerg Sonnenberger w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight; 554*4d723e5aSJoerg Sonnenberger for (i = 1; i < 32; i++) { 555*4d723e5aSJoerg Sonnenberger w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT; 556*4d723e5aSJoerg Sonnenberger if (w->w_tab[i] == 0 && w->w_param_max == 0) 557*4d723e5aSJoerg Sonnenberger w->w_param_max = 1 << i; 558*4d723e5aSJoerg Sonnenberger } 559*4d723e5aSJoerg Sonnenberger 560*4d723e5aSJoerg Sonnenberger return (w); 561*4d723e5aSJoerg Sonnenberger } 562*4d723e5aSJoerg Sonnenberger 563*4d723e5aSJoerg Sonnenberger int 564*4d723e5aSJoerg Sonnenberger wtab_destroy(struct wtab *w) 565*4d723e5aSJoerg Sonnenberger { 566*4d723e5aSJoerg Sonnenberger if (--w->w_refcount > 0) 567*4d723e5aSJoerg Sonnenberger return (0); 568*4d723e5aSJoerg Sonnenberger 569*4d723e5aSJoerg Sonnenberger SLIST_REMOVE(&wtab_list, w, wtab, w_link); 570*4d723e5aSJoerg Sonnenberger free(w, M_ALTQ); 571*4d723e5aSJoerg Sonnenberger 572*4d723e5aSJoerg Sonnenberger return (0); 573*4d723e5aSJoerg Sonnenberger } 574*4d723e5aSJoerg Sonnenberger 575*4d723e5aSJoerg Sonnenberger int32_t 576*4d723e5aSJoerg Sonnenberger pow_w(struct wtab *w, int n) 577*4d723e5aSJoerg Sonnenberger { 578*4d723e5aSJoerg Sonnenberger int i, bit; 579*4d723e5aSJoerg Sonnenberger int32_t val; 580*4d723e5aSJoerg Sonnenberger 581*4d723e5aSJoerg Sonnenberger if (n >= w->w_param_max) 582*4d723e5aSJoerg Sonnenberger return (0); 583*4d723e5aSJoerg Sonnenberger 584*4d723e5aSJoerg Sonnenberger val = 1 << FP_SHIFT; 585*4d723e5aSJoerg Sonnenberger if (n <= 0) 586*4d723e5aSJoerg Sonnenberger return (val); 587*4d723e5aSJoerg Sonnenberger 588*4d723e5aSJoerg Sonnenberger bit = 1; 589*4d723e5aSJoerg Sonnenberger i = 0; 590*4d723e5aSJoerg Sonnenberger while (n) { 591*4d723e5aSJoerg Sonnenberger if (n & bit) { 592*4d723e5aSJoerg Sonnenberger val = (val * w->w_tab[i]) >> FP_SHIFT; 593*4d723e5aSJoerg Sonnenberger n &= ~bit; 594*4d723e5aSJoerg Sonnenberger } 595*4d723e5aSJoerg Sonnenberger i++; 596*4d723e5aSJoerg Sonnenberger bit <<= 1; 597*4d723e5aSJoerg Sonnenberger } 598*4d723e5aSJoerg Sonnenberger return (val); 599*4d723e5aSJoerg Sonnenberger } 600*4d723e5aSJoerg Sonnenberger 601*4d723e5aSJoerg Sonnenberger #endif /* ALTQ_RED */ 602