14d723e5aSJoerg Sonnenberger /* $KAME: altq_red.c,v 1.19 2004/04/17 10:54:49 kjc Exp $ */
24b1cf444SSascha Wildner /* $DragonFly: src/sys/net/altq/altq_red.c,v 1.4 2006/12/22 23:44:55 swildner Exp $ */
34d723e5aSJoerg Sonnenberger
44d723e5aSJoerg Sonnenberger /*
54d723e5aSJoerg Sonnenberger * Copyright (C) 1997-2003
64d723e5aSJoerg Sonnenberger * Sony Computer Science Laboratories Inc. All rights reserved.
74d723e5aSJoerg Sonnenberger *
84d723e5aSJoerg Sonnenberger * Redistribution and use in source and binary forms, with or without
94d723e5aSJoerg Sonnenberger * modification, are permitted provided that the following conditions
104d723e5aSJoerg Sonnenberger * are met:
114d723e5aSJoerg Sonnenberger * 1. Redistributions of source code must retain the above copyright
124d723e5aSJoerg Sonnenberger * notice, this list of conditions and the following disclaimer.
134d723e5aSJoerg Sonnenberger * 2. Redistributions in binary form must reproduce the above copyright
144d723e5aSJoerg Sonnenberger * notice, this list of conditions and the following disclaimer in the
154d723e5aSJoerg Sonnenberger * documentation and/or other materials provided with the distribution.
164d723e5aSJoerg Sonnenberger *
174d723e5aSJoerg Sonnenberger * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
184d723e5aSJoerg Sonnenberger * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
194d723e5aSJoerg Sonnenberger * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
204d723e5aSJoerg Sonnenberger * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
214d723e5aSJoerg Sonnenberger * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
224d723e5aSJoerg Sonnenberger * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
234d723e5aSJoerg Sonnenberger * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
244d723e5aSJoerg Sonnenberger * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
254d723e5aSJoerg Sonnenberger * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
264d723e5aSJoerg Sonnenberger * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
274d723e5aSJoerg Sonnenberger * SUCH DAMAGE.
284d723e5aSJoerg Sonnenberger *
294d723e5aSJoerg Sonnenberger */
304d723e5aSJoerg Sonnenberger /*
314d723e5aSJoerg Sonnenberger * Copyright (c) 1990-1994 Regents of the University of California.
324d723e5aSJoerg Sonnenberger * All rights reserved.
334d723e5aSJoerg Sonnenberger *
344d723e5aSJoerg Sonnenberger * Redistribution and use in source and binary forms, with or without
354d723e5aSJoerg Sonnenberger * modification, are permitted provided that the following conditions
364d723e5aSJoerg Sonnenberger * are met:
374d723e5aSJoerg Sonnenberger * 1. Redistributions of source code must retain the above copyright
384d723e5aSJoerg Sonnenberger * notice, this list of conditions and the following disclaimer.
394d723e5aSJoerg Sonnenberger * 2. Redistributions in binary form must reproduce the above copyright
404d723e5aSJoerg Sonnenberger * notice, this list of conditions and the following disclaimer in the
414d723e5aSJoerg Sonnenberger * documentation and/or other materials provided with the distribution.
424d723e5aSJoerg Sonnenberger * 3. All advertising materials mentioning features or use of this software
434d723e5aSJoerg Sonnenberger * must display the following acknowledgement:
444d723e5aSJoerg Sonnenberger * This product includes software developed by the Computer Systems
454d723e5aSJoerg Sonnenberger * Engineering Group at Lawrence Berkeley Laboratory.
464d723e5aSJoerg Sonnenberger * 4. Neither the name of the University nor of the Laboratory may be used
474d723e5aSJoerg Sonnenberger * to endorse or promote products derived from this software without
484d723e5aSJoerg Sonnenberger * specific prior written permission.
494d723e5aSJoerg Sonnenberger *
504d723e5aSJoerg Sonnenberger * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
514d723e5aSJoerg Sonnenberger * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
524d723e5aSJoerg Sonnenberger * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
534d723e5aSJoerg Sonnenberger * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
544d723e5aSJoerg Sonnenberger * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
554d723e5aSJoerg Sonnenberger * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
564d723e5aSJoerg Sonnenberger * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
574d723e5aSJoerg Sonnenberger * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
584d723e5aSJoerg Sonnenberger * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
594d723e5aSJoerg Sonnenberger * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
604d723e5aSJoerg Sonnenberger * SUCH DAMAGE.
614d723e5aSJoerg Sonnenberger */
624d723e5aSJoerg Sonnenberger
634d723e5aSJoerg Sonnenberger #include "opt_altq.h"
644d723e5aSJoerg Sonnenberger #include "opt_inet.h"
654d723e5aSJoerg Sonnenberger #include "opt_inet6.h"
664d723e5aSJoerg Sonnenberger
674d723e5aSJoerg Sonnenberger #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
684d723e5aSJoerg Sonnenberger
694d723e5aSJoerg Sonnenberger #include <sys/param.h>
704d723e5aSJoerg Sonnenberger #include <sys/malloc.h>
714d723e5aSJoerg Sonnenberger #include <sys/mbuf.h>
724d723e5aSJoerg Sonnenberger #include <sys/socket.h>
734d723e5aSJoerg Sonnenberger #include <sys/systm.h>
744d723e5aSJoerg Sonnenberger #include <sys/errno.h>
754d723e5aSJoerg Sonnenberger
764d723e5aSJoerg Sonnenberger #include <net/if.h>
77*bff82488SAaron LI #include <net/ifq_var.h>
784d723e5aSJoerg Sonnenberger
794d723e5aSJoerg Sonnenberger #include <netinet/in.h>
804d723e5aSJoerg Sonnenberger #include <netinet/in_systm.h>
814d723e5aSJoerg Sonnenberger #include <netinet/ip.h>
824d723e5aSJoerg Sonnenberger #ifdef INET6
834d723e5aSJoerg Sonnenberger #include <netinet/ip6.h>
844d723e5aSJoerg Sonnenberger #endif
854d723e5aSJoerg Sonnenberger
864d723e5aSJoerg Sonnenberger #include <net/pf/pfvar.h>
874d723e5aSJoerg Sonnenberger #include <net/altq/altq.h>
884d723e5aSJoerg Sonnenberger #include <net/altq/altq_red.h>
894d723e5aSJoerg Sonnenberger
904d723e5aSJoerg Sonnenberger /*
914d723e5aSJoerg Sonnenberger * ALTQ/RED (Random Early Detection) implementation using 32-bit
924d723e5aSJoerg Sonnenberger * fixed-point calculation.
934d723e5aSJoerg Sonnenberger *
944d723e5aSJoerg Sonnenberger * written by kjc using the ns code as a reference.
954d723e5aSJoerg Sonnenberger * you can learn more about red and ns from Sally's home page at
964d723e5aSJoerg Sonnenberger * http://www-nrg.ee.lbl.gov/floyd/
974d723e5aSJoerg Sonnenberger *
984d723e5aSJoerg Sonnenberger * most of the red parameter values are fixed in this implementation
994d723e5aSJoerg Sonnenberger * to prevent fixed-point overflow/underflow.
1004d723e5aSJoerg Sonnenberger * if you change the parameters, watch out for overflow/underflow!
1014d723e5aSJoerg Sonnenberger *
1024d723e5aSJoerg Sonnenberger * the parameters used are recommended values by Sally.
1034d723e5aSJoerg Sonnenberger * the corresponding ns config looks:
1044d723e5aSJoerg Sonnenberger * q_weight=0.00195
1054d723e5aSJoerg Sonnenberger * minthresh=5 maxthresh=15 queue-size=60
1064d723e5aSJoerg Sonnenberger * linterm=30
1074d723e5aSJoerg Sonnenberger * dropmech=drop-tail
1084d723e5aSJoerg Sonnenberger * bytes=false (can't be handled by 32-bit fixed-point)
1094d723e5aSJoerg Sonnenberger * doubleq=false dqthresh=false
1104d723e5aSJoerg Sonnenberger * wait=true
1114d723e5aSJoerg Sonnenberger */
1124d723e5aSJoerg Sonnenberger /*
1134d723e5aSJoerg Sonnenberger * alternative red parameters for a slow link.
1144d723e5aSJoerg Sonnenberger *
1154d723e5aSJoerg Sonnenberger * assume the queue length becomes from zero to L and keeps L, it takes
1164d723e5aSJoerg Sonnenberger * N packets for q_avg to reach 63% of L.
1174d723e5aSJoerg Sonnenberger * when q_weight is 0.002, N is about 500 packets.
1184d723e5aSJoerg Sonnenberger * for a slow link like dial-up, 500 packets takes more than 1 minute!
1194d723e5aSJoerg Sonnenberger * when q_weight is 0.008, N is about 127 packets.
1204d723e5aSJoerg Sonnenberger * when q_weight is 0.016, N is about 63 packets.
1214d723e5aSJoerg Sonnenberger * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
1224d723e5aSJoerg Sonnenberger * are allowed for 0.016.
1234d723e5aSJoerg Sonnenberger * see Sally's paper for more details.
1244d723e5aSJoerg Sonnenberger */
1254d723e5aSJoerg Sonnenberger /* normal red parameters */
1264d723e5aSJoerg Sonnenberger #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
1274d723e5aSJoerg Sonnenberger /* q_weight = 0.00195 */
1284d723e5aSJoerg Sonnenberger
1294d723e5aSJoerg Sonnenberger /* red parameters for a slow link */
1304d723e5aSJoerg Sonnenberger #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
1314d723e5aSJoerg Sonnenberger /* q_weight = 0.0078125 */
1324d723e5aSJoerg Sonnenberger
1334d723e5aSJoerg Sonnenberger /* red parameters for a very slow link (e.g., dialup) */
1344d723e5aSJoerg Sonnenberger #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
1354d723e5aSJoerg Sonnenberger /* q_weight = 0.015625 */
1364d723e5aSJoerg Sonnenberger
1374d723e5aSJoerg Sonnenberger /* fixed-point uses 12-bit decimal places */
1384d723e5aSJoerg Sonnenberger #define FP_SHIFT 12 /* fixed-point shift */
1394d723e5aSJoerg Sonnenberger
1404d723e5aSJoerg Sonnenberger /* red parameters for drop probability */
1414d723e5aSJoerg Sonnenberger #define INV_P_MAX 10 /* inverse of max drop probability */
1424d723e5aSJoerg Sonnenberger #define TH_MIN 5 /* min threshold */
1434d723e5aSJoerg Sonnenberger #define TH_MAX 15 /* max threshold */
1444d723e5aSJoerg Sonnenberger
1454d723e5aSJoerg Sonnenberger #define RED_LIMIT 60 /* default max queue lenght */
1464d723e5aSJoerg Sonnenberger #define RED_STATS /* collect statistics */
1474d723e5aSJoerg Sonnenberger
1484d723e5aSJoerg Sonnenberger /*
1494d723e5aSJoerg Sonnenberger * our default policy for forced-drop is drop-tail.
1504d723e5aSJoerg Sonnenberger * (in altq-1.1.2 or earlier, the default was random-drop.
1514d723e5aSJoerg Sonnenberger * but it makes more sense to punish the cause of the surge.)
1524d723e5aSJoerg Sonnenberger * to switch to the random-drop policy, define "RED_RANDOM_DROP".
1534d723e5aSJoerg Sonnenberger */
1544d723e5aSJoerg Sonnenberger
1554d723e5aSJoerg Sonnenberger /* default red parameter values */
1564d723e5aSJoerg Sonnenberger static int default_th_min = TH_MIN;
1574d723e5aSJoerg Sonnenberger static int default_th_max = TH_MAX;
1584d723e5aSJoerg Sonnenberger static int default_inv_pmax = INV_P_MAX;
1594d723e5aSJoerg Sonnenberger
1604d723e5aSJoerg Sonnenberger /*
1614d723e5aSJoerg Sonnenberger * red support routines
1624d723e5aSJoerg Sonnenberger */
1634d723e5aSJoerg Sonnenberger red_t *
red_alloc(int weight,int inv_pmax,int th_min,int th_max,int flags,int pkttime)1644d723e5aSJoerg Sonnenberger red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags, int pkttime)
1654d723e5aSJoerg Sonnenberger {
1664d723e5aSJoerg Sonnenberger red_t *rp;
1674d723e5aSJoerg Sonnenberger int w, i;
1684d723e5aSJoerg Sonnenberger int npkts_per_sec;
1694d723e5aSJoerg Sonnenberger
170efda3bd0SMatthew Dillon rp = kmalloc(sizeof(*rp), M_ALTQ, M_WAITOK | M_ZERO);
1714d723e5aSJoerg Sonnenberger rp->red_avg = 0;
1724d723e5aSJoerg Sonnenberger rp->red_idle = 1;
1734d723e5aSJoerg Sonnenberger
1744d723e5aSJoerg Sonnenberger if (weight == 0)
1754d723e5aSJoerg Sonnenberger rp->red_weight = W_WEIGHT;
1764d723e5aSJoerg Sonnenberger else
1774d723e5aSJoerg Sonnenberger rp->red_weight = weight;
1784d723e5aSJoerg Sonnenberger if (inv_pmax == 0)
1794d723e5aSJoerg Sonnenberger rp->red_inv_pmax = default_inv_pmax;
1804d723e5aSJoerg Sonnenberger else
1814d723e5aSJoerg Sonnenberger rp->red_inv_pmax = inv_pmax;
1824d723e5aSJoerg Sonnenberger if (th_min == 0)
1834d723e5aSJoerg Sonnenberger rp->red_thmin = default_th_min;
1844d723e5aSJoerg Sonnenberger else
1854d723e5aSJoerg Sonnenberger rp->red_thmin = th_min;
1864d723e5aSJoerg Sonnenberger if (th_max == 0)
1874d723e5aSJoerg Sonnenberger rp->red_thmax = default_th_max;
1884d723e5aSJoerg Sonnenberger else
1894d723e5aSJoerg Sonnenberger rp->red_thmax = th_max;
1904d723e5aSJoerg Sonnenberger
1914d723e5aSJoerg Sonnenberger rp->red_flags = flags;
1924d723e5aSJoerg Sonnenberger
1934d723e5aSJoerg Sonnenberger if (pkttime == 0)
1944d723e5aSJoerg Sonnenberger /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
1954d723e5aSJoerg Sonnenberger rp->red_pkttime = 800;
1964d723e5aSJoerg Sonnenberger else
1974d723e5aSJoerg Sonnenberger rp->red_pkttime = pkttime;
1984d723e5aSJoerg Sonnenberger
1994d723e5aSJoerg Sonnenberger if (weight == 0) {
2004d723e5aSJoerg Sonnenberger /* when the link is very slow, adjust red parameters */
2014d723e5aSJoerg Sonnenberger npkts_per_sec = 1000000 / rp->red_pkttime;
2024d723e5aSJoerg Sonnenberger if (npkts_per_sec < 50) {
2034d723e5aSJoerg Sonnenberger /* up to about 400Kbps */
2044d723e5aSJoerg Sonnenberger rp->red_weight = W_WEIGHT_2;
2054d723e5aSJoerg Sonnenberger } else if (npkts_per_sec < 300) {
2064d723e5aSJoerg Sonnenberger /* up to about 2.4Mbps */
2074d723e5aSJoerg Sonnenberger rp->red_weight = W_WEIGHT_1;
2084d723e5aSJoerg Sonnenberger }
2094d723e5aSJoerg Sonnenberger }
2104d723e5aSJoerg Sonnenberger
2114d723e5aSJoerg Sonnenberger /* calculate wshift. weight must be power of 2 */
2124d723e5aSJoerg Sonnenberger w = rp->red_weight;
2134d723e5aSJoerg Sonnenberger for (i = 0; w > 1; i++)
2144d723e5aSJoerg Sonnenberger w = w >> 1;
2154d723e5aSJoerg Sonnenberger rp->red_wshift = i;
2164d723e5aSJoerg Sonnenberger w = 1 << rp->red_wshift;
2174d723e5aSJoerg Sonnenberger if (w != rp->red_weight) {
2184b1cf444SSascha Wildner kprintf("invalid weight value %d for red! use %d\n",
2194d723e5aSJoerg Sonnenberger rp->red_weight, w);
2204d723e5aSJoerg Sonnenberger rp->red_weight = w;
2214d723e5aSJoerg Sonnenberger }
2224d723e5aSJoerg Sonnenberger
2234d723e5aSJoerg Sonnenberger /*
2244d723e5aSJoerg Sonnenberger * thmin_s and thmax_s are scaled versions of th_min and th_max
2254d723e5aSJoerg Sonnenberger * to be compared with avg.
2264d723e5aSJoerg Sonnenberger */
2274d723e5aSJoerg Sonnenberger rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
2284d723e5aSJoerg Sonnenberger rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
2294d723e5aSJoerg Sonnenberger
2304d723e5aSJoerg Sonnenberger /*
2314d723e5aSJoerg Sonnenberger * precompute probability denominator
2324d723e5aSJoerg Sonnenberger * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
2334d723e5aSJoerg Sonnenberger */
2344d723e5aSJoerg Sonnenberger rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
2354d723e5aSJoerg Sonnenberger * rp->red_inv_pmax) << FP_SHIFT;
2364d723e5aSJoerg Sonnenberger
2374d723e5aSJoerg Sonnenberger /* allocate weight table */
2384d723e5aSJoerg Sonnenberger rp->red_wtab = wtab_alloc(rp->red_weight);
2394d723e5aSJoerg Sonnenberger
2404d723e5aSJoerg Sonnenberger microtime(&rp->red_last);
2414d723e5aSJoerg Sonnenberger return (rp);
2424d723e5aSJoerg Sonnenberger }
2434d723e5aSJoerg Sonnenberger
2444d723e5aSJoerg Sonnenberger void
red_destroy(red_t * rp)2454d723e5aSJoerg Sonnenberger red_destroy(red_t *rp)
2464d723e5aSJoerg Sonnenberger {
2474d723e5aSJoerg Sonnenberger wtab_destroy(rp->red_wtab);
248efda3bd0SMatthew Dillon kfree(rp, M_ALTQ);
2494d723e5aSJoerg Sonnenberger }
2504d723e5aSJoerg Sonnenberger
2514d723e5aSJoerg Sonnenberger void
red_getstats(red_t * rp,struct redstats * sp)2524d723e5aSJoerg Sonnenberger red_getstats(red_t *rp, struct redstats *sp)
2534d723e5aSJoerg Sonnenberger {
2544d723e5aSJoerg Sonnenberger sp->q_avg = rp->red_avg >> rp->red_wshift;
2554d723e5aSJoerg Sonnenberger sp->xmit_cnt = rp->red_stats.xmit_cnt;
2564d723e5aSJoerg Sonnenberger sp->drop_cnt = rp->red_stats.drop_cnt;
2574d723e5aSJoerg Sonnenberger sp->drop_forced = rp->red_stats.drop_forced;
2584d723e5aSJoerg Sonnenberger sp->drop_unforced = rp->red_stats.drop_unforced;
2594d723e5aSJoerg Sonnenberger sp->marked_packets = rp->red_stats.marked_packets;
2604d723e5aSJoerg Sonnenberger }
2614d723e5aSJoerg Sonnenberger
2624d723e5aSJoerg Sonnenberger int
red_addq(red_t * rp,class_queue_t * q,struct mbuf * m,struct altq_pktattr * pktattr)2634d723e5aSJoerg Sonnenberger red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, struct altq_pktattr *pktattr)
2644d723e5aSJoerg Sonnenberger {
2654d723e5aSJoerg Sonnenberger int avg, droptype;
2664d723e5aSJoerg Sonnenberger int n;
2674d723e5aSJoerg Sonnenberger
2684d723e5aSJoerg Sonnenberger avg = rp->red_avg;
2694d723e5aSJoerg Sonnenberger
2704d723e5aSJoerg Sonnenberger /*
2714d723e5aSJoerg Sonnenberger * if we were idle, we pretend that n packets arrived during
2724d723e5aSJoerg Sonnenberger * the idle period.
2734d723e5aSJoerg Sonnenberger */
2744d723e5aSJoerg Sonnenberger if (rp->red_idle) {
2754d723e5aSJoerg Sonnenberger struct timeval now;
2764d723e5aSJoerg Sonnenberger int t;
2774d723e5aSJoerg Sonnenberger
2784d723e5aSJoerg Sonnenberger rp->red_idle = 0;
2794d723e5aSJoerg Sonnenberger microtime(&now);
2804d723e5aSJoerg Sonnenberger t = (now.tv_sec - rp->red_last.tv_sec);
2814d723e5aSJoerg Sonnenberger if (t > 60) {
2824d723e5aSJoerg Sonnenberger /*
2834d723e5aSJoerg Sonnenberger * being idle for more than 1 minute, set avg to zero.
2844d723e5aSJoerg Sonnenberger * this prevents t from overflow.
2854d723e5aSJoerg Sonnenberger */
2864d723e5aSJoerg Sonnenberger avg = 0;
2874d723e5aSJoerg Sonnenberger } else {
2884d723e5aSJoerg Sonnenberger t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
2894d723e5aSJoerg Sonnenberger n = t / rp->red_pkttime - 1;
2904d723e5aSJoerg Sonnenberger
2914d723e5aSJoerg Sonnenberger /* the following line does (avg = (1 - Wq)^n * avg) */
2924d723e5aSJoerg Sonnenberger if (n > 0)
2934d723e5aSJoerg Sonnenberger avg = (avg >> FP_SHIFT) *
2944d723e5aSJoerg Sonnenberger pow_w(rp->red_wtab, n);
2954d723e5aSJoerg Sonnenberger }
2964d723e5aSJoerg Sonnenberger }
2974d723e5aSJoerg Sonnenberger
2984d723e5aSJoerg Sonnenberger /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
2994d723e5aSJoerg Sonnenberger avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
3004d723e5aSJoerg Sonnenberger rp->red_avg = avg; /* save the new value */
3014d723e5aSJoerg Sonnenberger
3024d723e5aSJoerg Sonnenberger /*
3034d723e5aSJoerg Sonnenberger * red_count keeps a tally of arriving traffic that has not
3044d723e5aSJoerg Sonnenberger * been dropped.
3054d723e5aSJoerg Sonnenberger */
3064d723e5aSJoerg Sonnenberger rp->red_count++;
3074d723e5aSJoerg Sonnenberger
3084d723e5aSJoerg Sonnenberger /* see if we drop early */
3094d723e5aSJoerg Sonnenberger droptype = DTYPE_NODROP;
3104d723e5aSJoerg Sonnenberger if (avg >= rp->red_thmin_s && qlen(q) > 1) {
3114d723e5aSJoerg Sonnenberger if (avg >= rp->red_thmax_s) {
3124d723e5aSJoerg Sonnenberger /* avg >= th_max: forced drop */
3134d723e5aSJoerg Sonnenberger droptype = DTYPE_FORCED;
3144d723e5aSJoerg Sonnenberger } else if (rp->red_old == 0) {
3154d723e5aSJoerg Sonnenberger /* first exceeds th_min */
3164d723e5aSJoerg Sonnenberger rp->red_count = 1;
3174d723e5aSJoerg Sonnenberger rp->red_old = 1;
3184d723e5aSJoerg Sonnenberger } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
3194d723e5aSJoerg Sonnenberger rp->red_probd, rp->red_count)) {
3204d723e5aSJoerg Sonnenberger /* mark or drop by red */
3214d723e5aSJoerg Sonnenberger if ((rp->red_flags & REDF_ECN) &&
3224d723e5aSJoerg Sonnenberger mark_ecn(m, pktattr, rp->red_flags)) {
3234d723e5aSJoerg Sonnenberger /* successfully marked. do not drop. */
3244d723e5aSJoerg Sonnenberger rp->red_count = 0;
3254d723e5aSJoerg Sonnenberger #ifdef RED_STATS
3264d723e5aSJoerg Sonnenberger rp->red_stats.marked_packets++;
3274d723e5aSJoerg Sonnenberger #endif
3284d723e5aSJoerg Sonnenberger } else {
3294d723e5aSJoerg Sonnenberger /* unforced drop by red */
3304d723e5aSJoerg Sonnenberger droptype = DTYPE_EARLY;
3314d723e5aSJoerg Sonnenberger }
3324d723e5aSJoerg Sonnenberger }
3334d723e5aSJoerg Sonnenberger } else {
3344d723e5aSJoerg Sonnenberger /* avg < th_min */
3354d723e5aSJoerg Sonnenberger rp->red_old = 0;
3364d723e5aSJoerg Sonnenberger }
3374d723e5aSJoerg Sonnenberger
3384d723e5aSJoerg Sonnenberger /*
3394d723e5aSJoerg Sonnenberger * if the queue length hits the hard limit, it's a forced drop.
3404d723e5aSJoerg Sonnenberger */
3414d723e5aSJoerg Sonnenberger if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
3424d723e5aSJoerg Sonnenberger droptype = DTYPE_FORCED;
3434d723e5aSJoerg Sonnenberger
3444d723e5aSJoerg Sonnenberger #ifdef RED_RANDOM_DROP
3454d723e5aSJoerg Sonnenberger /* if successful or forced drop, enqueue this packet. */
3464d723e5aSJoerg Sonnenberger if (droptype != DTYPE_EARLY)
3474d723e5aSJoerg Sonnenberger _addq(q, m);
3484d723e5aSJoerg Sonnenberger #else
3494d723e5aSJoerg Sonnenberger /* if successful, enqueue this packet. */
3504d723e5aSJoerg Sonnenberger if (droptype == DTYPE_NODROP)
3514d723e5aSJoerg Sonnenberger _addq(q, m);
3524d723e5aSJoerg Sonnenberger #endif
3534d723e5aSJoerg Sonnenberger if (droptype != DTYPE_NODROP) {
3544d723e5aSJoerg Sonnenberger if (droptype == DTYPE_EARLY) {
3554d723e5aSJoerg Sonnenberger /* drop the incoming packet */
3564d723e5aSJoerg Sonnenberger #ifdef RED_STATS
3574d723e5aSJoerg Sonnenberger rp->red_stats.drop_unforced++;
3584d723e5aSJoerg Sonnenberger #endif
3594d723e5aSJoerg Sonnenberger } else {
3604d723e5aSJoerg Sonnenberger /* forced drop, select a victim packet in the queue. */
3614d723e5aSJoerg Sonnenberger #ifdef RED_RANDOM_DROP
3624d723e5aSJoerg Sonnenberger m = _getq_random(q);
3634d723e5aSJoerg Sonnenberger #endif
3644d723e5aSJoerg Sonnenberger #ifdef RED_STATS
3654d723e5aSJoerg Sonnenberger rp->red_stats.drop_forced++;
3664d723e5aSJoerg Sonnenberger #endif
3674d723e5aSJoerg Sonnenberger }
3684d723e5aSJoerg Sonnenberger #ifdef RED_STATS
3694d723e5aSJoerg Sonnenberger PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
3704d723e5aSJoerg Sonnenberger #endif
3714d723e5aSJoerg Sonnenberger rp->red_count = 0;
3724d723e5aSJoerg Sonnenberger m_freem(m);
3734d723e5aSJoerg Sonnenberger return (-1);
3744d723e5aSJoerg Sonnenberger }
3754d723e5aSJoerg Sonnenberger /* successfully queued */
3764d723e5aSJoerg Sonnenberger #ifdef RED_STATS
3774d723e5aSJoerg Sonnenberger PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
3784d723e5aSJoerg Sonnenberger #endif
3794d723e5aSJoerg Sonnenberger return (0);
3804d723e5aSJoerg Sonnenberger }
3814d723e5aSJoerg Sonnenberger
3824d723e5aSJoerg Sonnenberger /*
3834d723e5aSJoerg Sonnenberger * early-drop probability is calculated as follows:
3844d723e5aSJoerg Sonnenberger * prob = p_max * (avg - th_min) / (th_max - th_min)
3854d723e5aSJoerg Sonnenberger * prob_a = prob / (2 - count*prob)
3864d723e5aSJoerg Sonnenberger * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
3874d723e5aSJoerg Sonnenberger * here prob_a increases as successive undrop count increases.
3884d723e5aSJoerg Sonnenberger * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
3894d723e5aSJoerg Sonnenberger * becomes 1 when (count >= (2 / prob))).
3904d723e5aSJoerg Sonnenberger */
3914d723e5aSJoerg Sonnenberger int
drop_early(int fp_len,int fp_probd,int count)3924d723e5aSJoerg Sonnenberger drop_early(int fp_len, int fp_probd, int count)
3934d723e5aSJoerg Sonnenberger {
3944d723e5aSJoerg Sonnenberger int d; /* denominator of drop-probability */
3954d723e5aSJoerg Sonnenberger
3964d723e5aSJoerg Sonnenberger d = fp_probd - count * fp_len;
3974d723e5aSJoerg Sonnenberger if (d <= 0) {
3984d723e5aSJoerg Sonnenberger /* count exceeds the hard limit: drop or mark */
3994d723e5aSJoerg Sonnenberger return (1);
4004d723e5aSJoerg Sonnenberger }
4014d723e5aSJoerg Sonnenberger
4024d723e5aSJoerg Sonnenberger /*
4034d723e5aSJoerg Sonnenberger * now the range of d is [1..600] in fixed-point. (when
4044d723e5aSJoerg Sonnenberger * th_max-th_min=10 and p_max=1/30)
4054d723e5aSJoerg Sonnenberger * drop probability = (avg - TH_MIN) / d
4064d723e5aSJoerg Sonnenberger */
4074d723e5aSJoerg Sonnenberger
4080ced1954SMatthew Dillon if ((karc4random() % d) < fp_len) {
4094d723e5aSJoerg Sonnenberger /* drop or mark */
4104d723e5aSJoerg Sonnenberger return (1);
4114d723e5aSJoerg Sonnenberger }
4124d723e5aSJoerg Sonnenberger /* no drop/mark */
4134d723e5aSJoerg Sonnenberger return (0);
4144d723e5aSJoerg Sonnenberger }
4154d723e5aSJoerg Sonnenberger
4164d723e5aSJoerg Sonnenberger /*
4174d723e5aSJoerg Sonnenberger * try to mark CE bit to the packet.
4184d723e5aSJoerg Sonnenberger * returns 1 if successfully marked, 0 otherwise.
4194d723e5aSJoerg Sonnenberger */
4204d723e5aSJoerg Sonnenberger int
mark_ecn(struct mbuf * m,struct altq_pktattr * pktattr,int flags)4214d723e5aSJoerg Sonnenberger mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
4224d723e5aSJoerg Sonnenberger {
4234d723e5aSJoerg Sonnenberger struct mbuf *m0;
4244d723e5aSJoerg Sonnenberger void *hdr;
4254d723e5aSJoerg Sonnenberger int af;
4264d723e5aSJoerg Sonnenberger
427dd5c35afSMatthew Dillon if (m->m_pkthdr.fw_flags & PF_MBUF_STRUCTURE) {
428315a7da3SJan Lentfer af = m->m_pkthdr.pf.ecn_af;
429315a7da3SJan Lentfer hdr = m->m_pkthdr.pf.hdr;
430dd5c35afSMatthew Dillon } else if (pktattr) {
431dd5c35afSMatthew Dillon af = pktattr->pattr_af;
432dd5c35afSMatthew Dillon hdr = pktattr->pattr_hdr;
433dd5c35afSMatthew Dillon } else {
434dd5c35afSMatthew Dillon return (0);
435dd5c35afSMatthew Dillon }
4364d723e5aSJoerg Sonnenberger
4374d723e5aSJoerg Sonnenberger if (af != AF_INET && af != AF_INET6)
4384d723e5aSJoerg Sonnenberger return (0);
4394d723e5aSJoerg Sonnenberger
4404d723e5aSJoerg Sonnenberger /* verify that pattr_hdr is within the mbuf data */
4414d723e5aSJoerg Sonnenberger for (m0 = m; m0 != NULL; m0 = m0->m_next) {
4424d723e5aSJoerg Sonnenberger if (((caddr_t)hdr >= m0->m_data) &&
4434d723e5aSJoerg Sonnenberger ((caddr_t)hdr < m0->m_data + m0->m_len))
4444d723e5aSJoerg Sonnenberger break;
4454d723e5aSJoerg Sonnenberger }
4464d723e5aSJoerg Sonnenberger if (m0 == NULL) {
4474d723e5aSJoerg Sonnenberger /* ick, tag info is stale */
4484d723e5aSJoerg Sonnenberger return (0);
4494d723e5aSJoerg Sonnenberger }
4504d723e5aSJoerg Sonnenberger
4514d723e5aSJoerg Sonnenberger switch (af) {
4524d723e5aSJoerg Sonnenberger case AF_INET:
4534d723e5aSJoerg Sonnenberger if (flags & REDF_ECN4) {
4544d723e5aSJoerg Sonnenberger struct ip *ip = hdr;
4554d723e5aSJoerg Sonnenberger uint8_t otos;
4564d723e5aSJoerg Sonnenberger int sum;
4574d723e5aSJoerg Sonnenberger
4584d723e5aSJoerg Sonnenberger if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
4594d723e5aSJoerg Sonnenberger return (0); /* not-ECT */
4604d723e5aSJoerg Sonnenberger if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
4614d723e5aSJoerg Sonnenberger return (1); /* already marked */
4624d723e5aSJoerg Sonnenberger
4634d723e5aSJoerg Sonnenberger /*
4644d723e5aSJoerg Sonnenberger * ecn-capable but not marked,
4654d723e5aSJoerg Sonnenberger * mark CE and update checksum
4664d723e5aSJoerg Sonnenberger */
4674d723e5aSJoerg Sonnenberger otos = ip->ip_tos;
4684d723e5aSJoerg Sonnenberger ip->ip_tos |= IPTOS_ECN_CE;
4694d723e5aSJoerg Sonnenberger /*
4704d723e5aSJoerg Sonnenberger * update checksum (from RFC1624)
4714d723e5aSJoerg Sonnenberger * HC' = ~(~HC + ~m + m')
4724d723e5aSJoerg Sonnenberger */
4734d723e5aSJoerg Sonnenberger sum = ~ntohs(ip->ip_sum) & 0xffff;
4744d723e5aSJoerg Sonnenberger sum += (~otos & 0xffff) + ip->ip_tos;
4754d723e5aSJoerg Sonnenberger sum = (sum >> 16) + (sum & 0xffff);
4764d723e5aSJoerg Sonnenberger sum += (sum >> 16); /* add carry */
4774d723e5aSJoerg Sonnenberger ip->ip_sum = htons(~sum & 0xffff);
4784d723e5aSJoerg Sonnenberger return (1);
4794d723e5aSJoerg Sonnenberger }
4804d723e5aSJoerg Sonnenberger break;
4814d723e5aSJoerg Sonnenberger #ifdef INET6
4824d723e5aSJoerg Sonnenberger case AF_INET6:
4834d723e5aSJoerg Sonnenberger if (flags & REDF_ECN6) {
4844d723e5aSJoerg Sonnenberger struct ip6_hdr *ip6 = hdr;
4854d723e5aSJoerg Sonnenberger uint32_t flowlabel;
4864d723e5aSJoerg Sonnenberger
4874d723e5aSJoerg Sonnenberger flowlabel = ntohl(ip6->ip6_flow);
4884d723e5aSJoerg Sonnenberger if ((flowlabel >> 28) != 6)
4894d723e5aSJoerg Sonnenberger return (0); /* version mismatch! */
4904d723e5aSJoerg Sonnenberger if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
4914d723e5aSJoerg Sonnenberger (IPTOS_ECN_NOTECT << 20))
4924d723e5aSJoerg Sonnenberger return (0); /* not-ECT */
4934d723e5aSJoerg Sonnenberger if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
4944d723e5aSJoerg Sonnenberger (IPTOS_ECN_CE << 20))
4954d723e5aSJoerg Sonnenberger return (1); /* already marked */
4964d723e5aSJoerg Sonnenberger /*
4974d723e5aSJoerg Sonnenberger * ecn-capable but not marked, mark CE
4984d723e5aSJoerg Sonnenberger */
4994d723e5aSJoerg Sonnenberger flowlabel |= (IPTOS_ECN_CE << 20);
5004d723e5aSJoerg Sonnenberger ip6->ip6_flow = htonl(flowlabel);
5014d723e5aSJoerg Sonnenberger return (1);
5024d723e5aSJoerg Sonnenberger }
5034d723e5aSJoerg Sonnenberger break;
5044d723e5aSJoerg Sonnenberger #endif /* INET6 */
5054d723e5aSJoerg Sonnenberger }
5064d723e5aSJoerg Sonnenberger
5074d723e5aSJoerg Sonnenberger /* not marked */
5084d723e5aSJoerg Sonnenberger return (0);
5094d723e5aSJoerg Sonnenberger }
5104d723e5aSJoerg Sonnenberger
5114d723e5aSJoerg Sonnenberger struct mbuf *
red_getq(red_t * rp,class_queue_t * q)5124d723e5aSJoerg Sonnenberger red_getq(red_t *rp, class_queue_t *q)
5134d723e5aSJoerg Sonnenberger {
5144d723e5aSJoerg Sonnenberger struct mbuf *m;
5154d723e5aSJoerg Sonnenberger
5164d723e5aSJoerg Sonnenberger if ((m = _getq(q)) == NULL) {
5174d723e5aSJoerg Sonnenberger if (rp->red_idle == 0) {
5184d723e5aSJoerg Sonnenberger rp->red_idle = 1;
5194d723e5aSJoerg Sonnenberger microtime(&rp->red_last);
5204d723e5aSJoerg Sonnenberger }
5214d723e5aSJoerg Sonnenberger return NULL;
5224d723e5aSJoerg Sonnenberger }
5234d723e5aSJoerg Sonnenberger
5244d723e5aSJoerg Sonnenberger rp->red_idle = 0;
5254d723e5aSJoerg Sonnenberger return (m);
5264d723e5aSJoerg Sonnenberger }
5274d723e5aSJoerg Sonnenberger
5284d723e5aSJoerg Sonnenberger /*
5294d723e5aSJoerg Sonnenberger * helper routine to calibrate avg during idle.
5304d723e5aSJoerg Sonnenberger * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
5314d723e5aSJoerg Sonnenberger * here Wq = 1/weight and the code assumes Wq is close to zero.
5324d723e5aSJoerg Sonnenberger *
5334d723e5aSJoerg Sonnenberger * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
5344d723e5aSJoerg Sonnenberger */
5354d723e5aSJoerg Sonnenberger static SLIST_HEAD(, wtab) wtab_list = SLIST_HEAD_INITIALIZER(&wtab_list);
5364d723e5aSJoerg Sonnenberger
5374d723e5aSJoerg Sonnenberger struct wtab *
wtab_alloc(int weight)5384d723e5aSJoerg Sonnenberger wtab_alloc(int weight)
5394d723e5aSJoerg Sonnenberger {
5404d723e5aSJoerg Sonnenberger struct wtab *w;
5414d723e5aSJoerg Sonnenberger int i;
5424d723e5aSJoerg Sonnenberger
5434d723e5aSJoerg Sonnenberger SLIST_FOREACH(w, &wtab_list, w_link) {
5444d723e5aSJoerg Sonnenberger if (w->w_weight == weight) {
5454d723e5aSJoerg Sonnenberger w->w_refcount++;
5464d723e5aSJoerg Sonnenberger return (w);
5474d723e5aSJoerg Sonnenberger }
5484d723e5aSJoerg Sonnenberger }
5494d723e5aSJoerg Sonnenberger
550efda3bd0SMatthew Dillon w = kmalloc(sizeof(*w), M_ALTQ, M_WAITOK | M_ZERO);
5514d723e5aSJoerg Sonnenberger w->w_weight = weight;
5524d723e5aSJoerg Sonnenberger w->w_refcount = 1;
5534d723e5aSJoerg Sonnenberger SLIST_INSERT_HEAD(&wtab_list, w, w_link);
5544d723e5aSJoerg Sonnenberger
5554d723e5aSJoerg Sonnenberger /* initialize the weight table */
5564d723e5aSJoerg Sonnenberger w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
5574d723e5aSJoerg Sonnenberger for (i = 1; i < 32; i++) {
5584d723e5aSJoerg Sonnenberger w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
5594d723e5aSJoerg Sonnenberger if (w->w_tab[i] == 0 && w->w_param_max == 0)
5604d723e5aSJoerg Sonnenberger w->w_param_max = 1 << i;
5614d723e5aSJoerg Sonnenberger }
5624d723e5aSJoerg Sonnenberger
5634d723e5aSJoerg Sonnenberger return (w);
5644d723e5aSJoerg Sonnenberger }
5654d723e5aSJoerg Sonnenberger
5664d723e5aSJoerg Sonnenberger int
wtab_destroy(struct wtab * w)5674d723e5aSJoerg Sonnenberger wtab_destroy(struct wtab *w)
5684d723e5aSJoerg Sonnenberger {
5694d723e5aSJoerg Sonnenberger if (--w->w_refcount > 0)
5704d723e5aSJoerg Sonnenberger return (0);
5714d723e5aSJoerg Sonnenberger
5724d723e5aSJoerg Sonnenberger SLIST_REMOVE(&wtab_list, w, wtab, w_link);
573efda3bd0SMatthew Dillon kfree(w, M_ALTQ);
5744d723e5aSJoerg Sonnenberger
5754d723e5aSJoerg Sonnenberger return (0);
5764d723e5aSJoerg Sonnenberger }
5774d723e5aSJoerg Sonnenberger
5784d723e5aSJoerg Sonnenberger int32_t
pow_w(struct wtab * w,int n)5794d723e5aSJoerg Sonnenberger pow_w(struct wtab *w, int n)
5804d723e5aSJoerg Sonnenberger {
5814d723e5aSJoerg Sonnenberger int i, bit;
5824d723e5aSJoerg Sonnenberger int32_t val;
5834d723e5aSJoerg Sonnenberger
5844d723e5aSJoerg Sonnenberger if (n >= w->w_param_max)
5854d723e5aSJoerg Sonnenberger return (0);
5864d723e5aSJoerg Sonnenberger
5874d723e5aSJoerg Sonnenberger val = 1 << FP_SHIFT;
5884d723e5aSJoerg Sonnenberger if (n <= 0)
5894d723e5aSJoerg Sonnenberger return (val);
5904d723e5aSJoerg Sonnenberger
5914d723e5aSJoerg Sonnenberger bit = 1;
5924d723e5aSJoerg Sonnenberger i = 0;
5934d723e5aSJoerg Sonnenberger while (n) {
5944d723e5aSJoerg Sonnenberger if (n & bit) {
5954d723e5aSJoerg Sonnenberger val = (val * w->w_tab[i]) >> FP_SHIFT;
5964d723e5aSJoerg Sonnenberger n &= ~bit;
5974d723e5aSJoerg Sonnenberger }
5984d723e5aSJoerg Sonnenberger i++;
5994d723e5aSJoerg Sonnenberger bit <<= 1;
6004d723e5aSJoerg Sonnenberger }
6014d723e5aSJoerg Sonnenberger return (val);
6024d723e5aSJoerg Sonnenberger }
6034d723e5aSJoerg Sonnenberger
6044d723e5aSJoerg Sonnenberger #endif /* ALTQ_RED */
605