xref: /freebsd/sys/net/altq/altq_red.c (revision 772e66a6)
1 /*	$FreeBSD$	*/
2 /*	$KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 1997-2003
6  *	Sony Computer Science Laboratories Inc.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  */
30 /*
31  * Copyright (c) 1990-1994 Regents of the University of California.
32  * All rights reserved.
33  *
34  * Redistribution and use in source and binary forms, with or without
35  * modification, are permitted provided that the following conditions
36  * are met:
37  * 1. Redistributions of source code must retain the above copyright
38  *    notice, this list of conditions and the following disclaimer.
39  * 2. Redistributions in binary form must reproduce the above copyright
40  *    notice, this list of conditions and the following disclaimer in the
41  *    documentation and/or other materials provided with the distribution.
42  * 3. All advertising materials mentioning features or use of this software
43  *    must display the following acknowledgement:
44  *	This product includes software developed by the Computer Systems
45  *	Engineering Group at Lawrence Berkeley Laboratory.
46  * 4. Neither the name of the University nor of the Laboratory may be used
47  *    to endorse or promote products derived from this software without
48  *    specific prior written permission.
49  *
50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  */
62 
63 #if defined(__FreeBSD__) || defined(__NetBSD__)
64 #include "opt_altq.h"
65 #include "opt_inet.h"
66 #ifdef __FreeBSD__
67 #include "opt_inet6.h"
68 #endif
69 #endif /* __FreeBSD__ || __NetBSD__ */
70 #ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
71 
72 #include <sys/param.h>
73 #include <sys/malloc.h>
74 #include <sys/mbuf.h>
75 #include <sys/socket.h>
76 #include <sys/systm.h>
77 #include <sys/errno.h>
78 #if 1 /* ALTQ3_COMPAT */
79 #include <sys/sockio.h>
80 #include <sys/proc.h>
81 #include <sys/kernel.h>
82 #ifdef ALTQ_FLOWVALVE
83 #include <sys/queue.h>
84 #include <sys/time.h>
85 #endif
86 #endif /* ALTQ3_COMPAT */
87 
88 #include <net/if.h>
89 #include <net/if_var.h>
90 
91 #include <netinet/in.h>
92 #include <netinet/in_systm.h>
93 #include <netinet/ip.h>
94 #ifdef INET6
95 #include <netinet/ip6.h>
96 #endif
97 
98 #include <netpfil/pf/pf.h>
99 #include <netpfil/pf/pf_altq.h>
100 #include <netpfil/pf/pf_mtag.h>
101 #include <net/altq/altq.h>
102 #include <net/altq/altq_red.h>
103 #ifdef ALTQ3_COMPAT
104 #include <net/altq/altq_conf.h>
105 #ifdef ALTQ_FLOWVALVE
106 #include <net/altq/altq_flowvalve.h>
107 #endif
108 #endif
109 
110 /*
111  * ALTQ/RED (Random Early Detection) implementation using 32-bit
112  * fixed-point calculation.
113  *
114  * written by kjc using the ns code as a reference.
115  * you can learn more about red and ns from Sally's home page at
116  * http://www-nrg.ee.lbl.gov/floyd/
117  *
118  * most of the red parameter values are fixed in this implementation
119  * to prevent fixed-point overflow/underflow.
120  * if you change the parameters, watch out for overflow/underflow!
121  *
122  * the parameters used are recommended values by Sally.
123  * the corresponding ns config looks:
124  *	q_weight=0.00195
125  *	minthresh=5 maxthresh=15 queue-size=60
126  *	linterm=30
127  *	dropmech=drop-tail
128  *	bytes=false (can't be handled by 32-bit fixed-point)
129  *	doubleq=false dqthresh=false
130  *	wait=true
131  */
132 /*
133  * alternative red parameters for a slow link.
134  *
135  * assume the queue length becomes from zero to L and keeps L, it takes
136  * N packets for q_avg to reach 63% of L.
137  * when q_weight is 0.002, N is about 500 packets.
138  * for a slow link like dial-up, 500 packets takes more than 1 minute!
139  * when q_weight is 0.008, N is about 127 packets.
140  * when q_weight is 0.016, N is about 63 packets.
141  * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
142  * are allowed for 0.016.
143  * see Sally's paper for more details.
144  */
145 /* normal red parameters */
146 #define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
147 				/* q_weight = 0.00195 */
148 
149 /* red parameters for a slow link */
150 #define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
151 				/* q_weight = 0.0078125 */
152 
153 /* red parameters for a very slow link (e.g., dialup) */
154 #define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
155 				/* q_weight = 0.015625 */
156 
157 /* fixed-point uses 12-bit decimal places */
158 #define	FP_SHIFT	12	/* fixed-point shift */
159 
160 /* red parameters for drop probability */
161 #define	INV_P_MAX	10	/* inverse of max drop probability */
162 #define	TH_MIN		5	/* min threshold */
163 #define	TH_MAX		15	/* max threshold */
164 
165 #define	RED_LIMIT	60	/* default max queue lenght */
166 #define	RED_STATS		/* collect statistics */
167 
168 /*
169  * our default policy for forced-drop is drop-tail.
170  * (in altq-1.1.2 or earlier, the default was random-drop.
171  * but it makes more sense to punish the cause of the surge.)
172  * to switch to the random-drop policy, define "RED_RANDOM_DROP".
173  */
174 
175 #ifdef ALTQ3_COMPAT
176 #ifdef ALTQ_FLOWVALVE
177 /*
178  * flow-valve is an extention to protect red from unresponsive flows
179  * and to promote end-to-end congestion control.
180  * flow-valve observes the average drop rates of the flows that have
181  * experienced packet drops in the recent past.
182  * when the average drop rate exceeds the threshold, the flow is
183  * blocked by the flow-valve.  the trapped flow should back off
184  * exponentially to escape from the flow-valve.
185  */
186 #ifdef RED_RANDOM_DROP
187 #error "random-drop can't be used with flow-valve!"
188 #endif
189 #endif /* ALTQ_FLOWVALVE */
190 
191 /* red_list keeps all red_queue_t's allocated. */
192 static red_queue_t *red_list = NULL;
193 
194 #endif /* ALTQ3_COMPAT */
195 
196 /* default red parameter values */
197 static int default_th_min = TH_MIN;
198 static int default_th_max = TH_MAX;
199 static int default_inv_pmax = INV_P_MAX;
200 
201 #ifdef ALTQ3_COMPAT
202 /* internal function prototypes */
203 static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
204 static struct mbuf *red_dequeue(struct ifaltq *, int);
205 static int red_request(struct ifaltq *, int, void *);
206 static void red_purgeq(red_queue_t *);
207 static int red_detach(red_queue_t *);
208 #ifdef ALTQ_FLOWVALVE
209 static __inline struct fve *flowlist_lookup(struct flowvalve *,
210 			 struct altq_pktattr *, struct timeval *);
211 static __inline struct fve *flowlist_reclaim(struct flowvalve *,
212 					     struct altq_pktattr *);
213 static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
214 static __inline int fv_p2f(struct flowvalve *, int);
215 #if 0 /* XXX: make the compiler happy (fv_alloc unused) */
216 static struct flowvalve *fv_alloc(struct red *);
217 #endif
218 static void fv_destroy(struct flowvalve *);
219 static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
220 			struct fve **);
221 static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
222 			 struct fve *);
223 #endif
224 #endif /* ALTQ3_COMPAT */
225 
226 /*
227  * red support routines
228  */
229 red_t *
230 red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
231    int pkttime)
232 {
233 	red_t	*rp;
234 	int	 w, i;
235 	int	 npkts_per_sec;
236 
237 	rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO);
238 	if (rp == NULL)
239 		return (NULL);
240 
241 	if (weight == 0)
242 		rp->red_weight = W_WEIGHT;
243 	else
244 		rp->red_weight = weight;
245 
246 	/* allocate weight table */
247 	rp->red_wtab = wtab_alloc(rp->red_weight);
248 	if (rp->red_wtab == NULL) {
249 		free(rp, M_DEVBUF);
250 		return (NULL);
251 	}
252 
253 	rp->red_avg = 0;
254 	rp->red_idle = 1;
255 
256 	if (inv_pmax == 0)
257 		rp->red_inv_pmax = default_inv_pmax;
258 	else
259 		rp->red_inv_pmax = inv_pmax;
260 	if (th_min == 0)
261 		rp->red_thmin = default_th_min;
262 	else
263 		rp->red_thmin = th_min;
264 	if (th_max == 0)
265 		rp->red_thmax = default_th_max;
266 	else
267 		rp->red_thmax = th_max;
268 
269 	rp->red_flags = flags;
270 
271 	if (pkttime == 0)
272 		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
273 		rp->red_pkttime = 800;
274 	else
275 		rp->red_pkttime = pkttime;
276 
277 	if (weight == 0) {
278 		/* when the link is very slow, adjust red parameters */
279 		npkts_per_sec = 1000000 / rp->red_pkttime;
280 		if (npkts_per_sec < 50) {
281 			/* up to about 400Kbps */
282 			rp->red_weight = W_WEIGHT_2;
283 		} else if (npkts_per_sec < 300) {
284 			/* up to about 2.4Mbps */
285 			rp->red_weight = W_WEIGHT_1;
286 		}
287 	}
288 
289 	/* calculate wshift.  weight must be power of 2 */
290 	w = rp->red_weight;
291 	for (i = 0; w > 1; i++)
292 		w = w >> 1;
293 	rp->red_wshift = i;
294 	w = 1 << rp->red_wshift;
295 	if (w != rp->red_weight) {
296 		printf("invalid weight value %d for red! use %d\n",
297 		       rp->red_weight, w);
298 		rp->red_weight = w;
299 	}
300 
301 	/*
302 	 * thmin_s and thmax_s are scaled versions of th_min and th_max
303 	 * to be compared with avg.
304 	 */
305 	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
306 	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
307 
308 	/*
309 	 * precompute probability denominator
310 	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
311 	 */
312 	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
313 			 * rp->red_inv_pmax) << FP_SHIFT;
314 
315 	microtime(&rp->red_last);
316 	return (rp);
317 }
318 
319 void
320 red_destroy(red_t *rp)
321 {
322 #ifdef ALTQ3_COMPAT
323 #ifdef ALTQ_FLOWVALVE
324 	if (rp->red_flowvalve != NULL)
325 		fv_destroy(rp->red_flowvalve);
326 #endif
327 #endif /* ALTQ3_COMPAT */
328 	wtab_destroy(rp->red_wtab);
329 	free(rp, M_DEVBUF);
330 }
331 
332 void
333 red_getstats(red_t *rp, struct redstats *sp)
334 {
335 	sp->q_avg		= rp->red_avg >> rp->red_wshift;
336 	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
337 	sp->drop_cnt		= rp->red_stats.drop_cnt;
338 	sp->drop_forced		= rp->red_stats.drop_forced;
339 	sp->drop_unforced	= rp->red_stats.drop_unforced;
340 	sp->marked_packets	= rp->red_stats.marked_packets;
341 }
342 
343 int
344 red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
345     struct altq_pktattr *pktattr)
346 {
347 	int avg, droptype;
348 	int n;
349 #ifdef ALTQ3_COMPAT
350 #ifdef ALTQ_FLOWVALVE
351 	struct fve *fve = NULL;
352 
353 	if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
354 		if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
355 			m_freem(m);
356 			return (-1);
357 		}
358 #endif
359 #endif /* ALTQ3_COMPAT */
360 
361 	avg = rp->red_avg;
362 
363 	/*
364 	 * if we were idle, we pretend that n packets arrived during
365 	 * the idle period.
366 	 */
367 	if (rp->red_idle) {
368 		struct timeval now;
369 		int t;
370 
371 		rp->red_idle = 0;
372 		microtime(&now);
373 		t = (now.tv_sec - rp->red_last.tv_sec);
374 		if (t > 60) {
375 			/*
376 			 * being idle for more than 1 minute, set avg to zero.
377 			 * this prevents t from overflow.
378 			 */
379 			avg = 0;
380 		} else {
381 			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
382 			n = t / rp->red_pkttime - 1;
383 
384 			/* the following line does (avg = (1 - Wq)^n * avg) */
385 			if (n > 0)
386 				avg = (avg >> FP_SHIFT) *
387 				    pow_w(rp->red_wtab, n);
388 		}
389 	}
390 
391 	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
392 	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
393 	rp->red_avg = avg;		/* save the new value */
394 
395 	/*
396 	 * red_count keeps a tally of arriving traffic that has not
397 	 * been dropped.
398 	 */
399 	rp->red_count++;
400 
401 	/* see if we drop early */
402 	droptype = DTYPE_NODROP;
403 	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
404 		if (avg >= rp->red_thmax_s) {
405 			/* avg >= th_max: forced drop */
406 			droptype = DTYPE_FORCED;
407 		} else if (rp->red_old == 0) {
408 			/* first exceeds th_min */
409 			rp->red_count = 1;
410 			rp->red_old = 1;
411 		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
412 				      rp->red_probd, rp->red_count)) {
413 			/* mark or drop by red */
414 			if ((rp->red_flags & REDF_ECN) &&
415 			    mark_ecn(m, pktattr, rp->red_flags)) {
416 				/* successfully marked.  do not drop. */
417 				rp->red_count = 0;
418 #ifdef RED_STATS
419 				rp->red_stats.marked_packets++;
420 #endif
421 			} else {
422 				/* unforced drop by red */
423 				droptype = DTYPE_EARLY;
424 			}
425 		}
426 	} else {
427 		/* avg < th_min */
428 		rp->red_old = 0;
429 	}
430 
431 	/*
432 	 * if the queue length hits the hard limit, it's a forced drop.
433 	 */
434 	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
435 		droptype = DTYPE_FORCED;
436 
437 #ifdef RED_RANDOM_DROP
438 	/* if successful or forced drop, enqueue this packet. */
439 	if (droptype != DTYPE_EARLY)
440 		_addq(q, m);
441 #else
442 	/* if successful, enqueue this packet. */
443 	if (droptype == DTYPE_NODROP)
444 		_addq(q, m);
445 #endif
446 	if (droptype != DTYPE_NODROP) {
447 		if (droptype == DTYPE_EARLY) {
448 			/* drop the incoming packet */
449 #ifdef RED_STATS
450 			rp->red_stats.drop_unforced++;
451 #endif
452 		} else {
453 			/* forced drop, select a victim packet in the queue. */
454 #ifdef RED_RANDOM_DROP
455 			m = _getq_random(q);
456 #endif
457 #ifdef RED_STATS
458 			rp->red_stats.drop_forced++;
459 #endif
460 		}
461 #ifdef RED_STATS
462 		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
463 #endif
464 		rp->red_count = 0;
465 #ifdef ALTQ3_COMPAT
466 #ifdef ALTQ_FLOWVALVE
467 		if (rp->red_flowvalve != NULL)
468 			fv_dropbyred(rp->red_flowvalve, pktattr, fve);
469 #endif
470 #endif /* ALTQ3_COMPAT */
471 		m_freem(m);
472 		return (-1);
473 	}
474 	/* successfully queued */
475 #ifdef RED_STATS
476 	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
477 #endif
478 	return (0);
479 }
480 
481 /*
482  * early-drop probability is calculated as follows:
483  *   prob = p_max * (avg - th_min) / (th_max - th_min)
484  *   prob_a = prob / (2 - count*prob)
485  *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
486  * here prob_a increases as successive undrop count increases.
487  * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
488  * becomes 1 when (count >= (2 / prob))).
489  */
490 int
491 drop_early(int fp_len, int fp_probd, int count)
492 {
493 	int	d;		/* denominator of drop-probability */
494 
495 	d = fp_probd - count * fp_len;
496 	if (d <= 0)
497 		/* count exceeds the hard limit: drop or mark */
498 		return (1);
499 
500 	/*
501 	 * now the range of d is [1..600] in fixed-point. (when
502 	 * th_max-th_min=10 and p_max=1/30)
503 	 * drop probability = (avg - TH_MIN) / d
504 	 */
505 
506 	if ((arc4random() % d) < fp_len) {
507 		/* drop or mark */
508 		return (1);
509 	}
510 	/* no drop/mark */
511 	return (0);
512 }
513 
514 /*
515  * try to mark CE bit to the packet.
516  *    returns 1 if successfully marked, 0 otherwise.
517  */
518 int
519 mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
520 {
521 	struct mbuf	*m0;
522 	struct pf_mtag	*at;
523 	void		*hdr;
524 
525 	at = pf_find_mtag(m);
526 	if (at != NULL) {
527 		hdr = at->hdr;
528 #ifdef ALTQ3_COMPAT
529 	} else if (pktattr != NULL) {
530 		af = pktattr->pattr_af;
531 		hdr = pktattr->pattr_hdr;
532 #endif /* ALTQ3_COMPAT */
533 	} else
534 		return (0);
535 
536 	/* verify that pattr_hdr is within the mbuf data */
537 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
538 		if (((caddr_t)hdr >= m0->m_data) &&
539 		    ((caddr_t)hdr < m0->m_data + m0->m_len))
540 			break;
541 	if (m0 == NULL) {
542 		/* ick, tag info is stale */
543 		return (0);
544 	}
545 
546 	switch (((struct ip *)hdr)->ip_v) {
547 	case IPVERSION:
548 		if (flags & REDF_ECN4) {
549 			struct ip *ip = hdr;
550 			u_int8_t otos;
551 			int sum;
552 
553 			if (ip->ip_v != 4)
554 				return (0);	/* version mismatch! */
555 
556 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
557 				return (0);	/* not-ECT */
558 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
559 				return (1);	/* already marked */
560 
561 			/*
562 			 * ecn-capable but not marked,
563 			 * mark CE and update checksum
564 			 */
565 			otos = ip->ip_tos;
566 			ip->ip_tos |= IPTOS_ECN_CE;
567 			/*
568 			 * update checksum (from RFC1624)
569 			 *	   HC' = ~(~HC + ~m + m')
570 			 */
571 			sum = ~ntohs(ip->ip_sum) & 0xffff;
572 			sum += (~otos & 0xffff) + ip->ip_tos;
573 			sum = (sum >> 16) + (sum & 0xffff);
574 			sum += (sum >> 16);  /* add carry */
575 			ip->ip_sum = htons(~sum & 0xffff);
576 			return (1);
577 		}
578 		break;
579 #ifdef INET6
580 	case (IPV6_VERSION >> 4):
581 		if (flags & REDF_ECN6) {
582 			struct ip6_hdr *ip6 = hdr;
583 			u_int32_t flowlabel;
584 
585 			flowlabel = ntohl(ip6->ip6_flow);
586 			if ((flowlabel >> 28) != 6)
587 				return (0);	/* version mismatch! */
588 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
589 			    (IPTOS_ECN_NOTECT << 20))
590 				return (0);	/* not-ECT */
591 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
592 			    (IPTOS_ECN_CE << 20))
593 				return (1);	/* already marked */
594 			/*
595 			 * ecn-capable but not marked,  mark CE
596 			 */
597 			flowlabel |= (IPTOS_ECN_CE << 20);
598 			ip6->ip6_flow = htonl(flowlabel);
599 			return (1);
600 		}
601 		break;
602 #endif  /* INET6 */
603 	}
604 
605 	/* not marked */
606 	return (0);
607 }
608 
609 struct mbuf *
610 red_getq(rp, q)
611 	red_t *rp;
612 	class_queue_t *q;
613 {
614 	struct mbuf *m;
615 
616 	if ((m = _getq(q)) == NULL) {
617 		if (rp->red_idle == 0) {
618 			rp->red_idle = 1;
619 			microtime(&rp->red_last);
620 		}
621 		return NULL;
622 	}
623 
624 	rp->red_idle = 0;
625 	return (m);
626 }
627 
628 /*
629  * helper routine to calibrate avg during idle.
630  * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
631  * here Wq = 1/weight and the code assumes Wq is close to zero.
632  *
633  * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
634  */
635 static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
636 
637 struct wtab *
638 wtab_alloc(int weight)
639 {
640 	struct wtab	*w;
641 	int		 i;
642 
643 	for (w = wtab_list; w != NULL; w = w->w_next)
644 		if (w->w_weight == weight) {
645 			w->w_refcount++;
646 			return (w);
647 		}
648 
649 	w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO);
650 	if (w == NULL)
651 		return (NULL);
652 	w->w_weight = weight;
653 	w->w_refcount = 1;
654 	w->w_next = wtab_list;
655 	wtab_list = w;
656 
657 	/* initialize the weight table */
658 	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
659 	for (i = 1; i < 32; i++) {
660 		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
661 		if (w->w_tab[i] == 0 && w->w_param_max == 0)
662 			w->w_param_max = 1 << i;
663 	}
664 
665 	return (w);
666 }
667 
668 int
669 wtab_destroy(struct wtab *w)
670 {
671 	struct wtab	*prev;
672 
673 	if (--w->w_refcount > 0)
674 		return (0);
675 
676 	if (wtab_list == w)
677 		wtab_list = w->w_next;
678 	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
679 		if (prev->w_next == w) {
680 			prev->w_next = w->w_next;
681 			break;
682 		}
683 
684 	free(w, M_DEVBUF);
685 	return (0);
686 }
687 
688 int32_t
689 pow_w(struct wtab *w, int n)
690 {
691 	int	i, bit;
692 	int32_t	val;
693 
694 	if (n >= w->w_param_max)
695 		return (0);
696 
697 	val = 1 << FP_SHIFT;
698 	if (n <= 0)
699 		return (val);
700 
701 	bit = 1;
702 	i = 0;
703 	while (n) {
704 		if (n & bit) {
705 			val = (val * w->w_tab[i]) >> FP_SHIFT;
706 			n &= ~bit;
707 		}
708 		i++;
709 		bit <<=  1;
710 	}
711 	return (val);
712 }
713 
714 #ifdef ALTQ3_COMPAT
715 /*
716  * red device interface
717  */
718 altqdev_decl(red);
719 
720 int
721 redopen(dev, flag, fmt, p)
722 	dev_t dev;
723 	int flag, fmt;
724 #if (__FreeBSD_version > 500000)
725 	struct thread *p;
726 #else
727 	struct proc *p;
728 #endif
729 {
730 	/* everything will be done when the queueing scheme is attached. */
731 	return 0;
732 }
733 
734 int
735 redclose(dev, flag, fmt, p)
736 	dev_t dev;
737 	int flag, fmt;
738 #if (__FreeBSD_version > 500000)
739 	struct thread *p;
740 #else
741 	struct proc *p;
742 #endif
743 {
744 	red_queue_t *rqp;
745 	int err, error = 0;
746 
747 	while ((rqp = red_list) != NULL) {
748 		/* destroy all */
749 		err = red_detach(rqp);
750 		if (err != 0 && error == 0)
751 			error = err;
752 	}
753 
754 	return error;
755 }
756 
757 int
758 redioctl(dev, cmd, addr, flag, p)
759 	dev_t dev;
760 	ioctlcmd_t cmd;
761 	caddr_t addr;
762 	int flag;
763 #if (__FreeBSD_version > 500000)
764 	struct thread *p;
765 #else
766 	struct proc *p;
767 #endif
768 {
769 	red_queue_t *rqp;
770 	struct red_interface *ifacep;
771 	struct ifnet *ifp;
772 	int	error = 0;
773 
774 	/* check super-user privilege */
775 	switch (cmd) {
776 	case RED_GETSTATS:
777 		break;
778 	default:
779 #if (__FreeBSD_version > 700000)
780 		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
781 #elsif (__FreeBSD_version > 400000)
782 		if ((error = suser(p)) != 0)
783 #else
784 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
785 #endif
786 			return (error);
787 		break;
788 	}
789 
790 	switch (cmd) {
791 
792 	case RED_ENABLE:
793 		ifacep = (struct red_interface *)addr;
794 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
795 			error = EBADF;
796 			break;
797 		}
798 		error = altq_enable(rqp->rq_ifq);
799 		break;
800 
801 	case RED_DISABLE:
802 		ifacep = (struct red_interface *)addr;
803 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
804 			error = EBADF;
805 			break;
806 		}
807 		error = altq_disable(rqp->rq_ifq);
808 		break;
809 
810 	case RED_IF_ATTACH:
811 		ifp = ifunit(((struct red_interface *)addr)->red_ifname);
812 		if (ifp == NULL) {
813 			error = ENXIO;
814 			break;
815 		}
816 
817 		/* allocate and initialize red_queue_t */
818 		rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
819 		if (rqp == NULL) {
820 			error = ENOMEM;
821 			break;
822 		}
823 		bzero(rqp, sizeof(red_queue_t));
824 
825 		rqp->rq_q = malloc(sizeof(class_queue_t),
826 		       M_DEVBUF, M_WAITOK);
827 		if (rqp->rq_q == NULL) {
828 			free(rqp, M_DEVBUF);
829 			error = ENOMEM;
830 			break;
831 		}
832 		bzero(rqp->rq_q, sizeof(class_queue_t));
833 
834 		rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
835 		if (rqp->rq_red == NULL) {
836 			free(rqp->rq_q, M_DEVBUF);
837 			free(rqp, M_DEVBUF);
838 			error = ENOMEM;
839 			break;
840 		}
841 
842 		rqp->rq_ifq = &ifp->if_snd;
843 		qtail(rqp->rq_q) = NULL;
844 		qlen(rqp->rq_q) = 0;
845 		qlimit(rqp->rq_q) = RED_LIMIT;
846 		qtype(rqp->rq_q) = Q_RED;
847 
848 		/*
849 		 * set RED to this ifnet structure.
850 		 */
851 		error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
852 				    red_enqueue, red_dequeue, red_request,
853 				    NULL, NULL);
854 		if (error) {
855 			red_destroy(rqp->rq_red);
856 			free(rqp->rq_q, M_DEVBUF);
857 			free(rqp, M_DEVBUF);
858 			break;
859 		}
860 
861 		/* add this state to the red list */
862 		rqp->rq_next = red_list;
863 		red_list = rqp;
864 		break;
865 
866 	case RED_IF_DETACH:
867 		ifacep = (struct red_interface *)addr;
868 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
869 			error = EBADF;
870 			break;
871 		}
872 		error = red_detach(rqp);
873 		break;
874 
875 	case RED_GETSTATS:
876 		do {
877 			struct red_stats *q_stats;
878 			red_t *rp;
879 
880 			q_stats = (struct red_stats *)addr;
881 			if ((rqp = altq_lookup(q_stats->iface.red_ifname,
882 					     ALTQT_RED)) == NULL) {
883 				error = EBADF;
884 				break;
885 			}
886 
887 			q_stats->q_len 	   = qlen(rqp->rq_q);
888 			q_stats->q_limit   = qlimit(rqp->rq_q);
889 
890 			rp = rqp->rq_red;
891 			q_stats->q_avg 	   = rp->red_avg >> rp->red_wshift;
892 			q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
893 			q_stats->drop_cnt  = rp->red_stats.drop_cnt;
894 			q_stats->drop_forced   = rp->red_stats.drop_forced;
895 			q_stats->drop_unforced = rp->red_stats.drop_unforced;
896 			q_stats->marked_packets = rp->red_stats.marked_packets;
897 
898 			q_stats->weight		= rp->red_weight;
899 			q_stats->inv_pmax	= rp->red_inv_pmax;
900 			q_stats->th_min		= rp->red_thmin;
901 			q_stats->th_max		= rp->red_thmax;
902 
903 #ifdef ALTQ_FLOWVALVE
904 			if (rp->red_flowvalve != NULL) {
905 				struct flowvalve *fv = rp->red_flowvalve;
906 				q_stats->fv_flows    = fv->fv_flows;
907 				q_stats->fv_pass     = fv->fv_stats.pass;
908 				q_stats->fv_predrop  = fv->fv_stats.predrop;
909 				q_stats->fv_alloc    = fv->fv_stats.alloc;
910 				q_stats->fv_escape   = fv->fv_stats.escape;
911 			} else {
912 #endif /* ALTQ_FLOWVALVE */
913 				q_stats->fv_flows    = 0;
914 				q_stats->fv_pass     = 0;
915 				q_stats->fv_predrop  = 0;
916 				q_stats->fv_alloc    = 0;
917 				q_stats->fv_escape   = 0;
918 #ifdef ALTQ_FLOWVALVE
919 			}
920 #endif /* ALTQ_FLOWVALVE */
921 		} while (/*CONSTCOND*/ 0);
922 		break;
923 
924 	case RED_CONFIG:
925 		do {
926 			struct red_conf *fc;
927 			red_t *new;
928 			int s, limit;
929 
930 			fc = (struct red_conf *)addr;
931 			if ((rqp = altq_lookup(fc->iface.red_ifname,
932 					       ALTQT_RED)) == NULL) {
933 				error = EBADF;
934 				break;
935 			}
936 			new = red_alloc(fc->red_weight,
937 					fc->red_inv_pmax,
938 					fc->red_thmin,
939 					fc->red_thmax,
940 					fc->red_flags,
941 					fc->red_pkttime);
942 			if (new == NULL) {
943 				error = ENOMEM;
944 				break;
945 			}
946 
947 #ifdef __NetBSD__
948 			s = splnet();
949 #else
950 			s = splimp();
951 #endif
952 			red_purgeq(rqp);
953 			limit = fc->red_limit;
954 			if (limit < fc->red_thmax)
955 				limit = fc->red_thmax;
956 			qlimit(rqp->rq_q) = limit;
957 			fc->red_limit = limit;	/* write back the new value */
958 
959 			red_destroy(rqp->rq_red);
960 			rqp->rq_red = new;
961 
962 			splx(s);
963 
964 			/* write back new values */
965 			fc->red_limit = limit;
966 			fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
967 			fc->red_thmin = rqp->rq_red->red_thmin;
968 			fc->red_thmax = rqp->rq_red->red_thmax;
969 
970 		} while (/*CONSTCOND*/ 0);
971 		break;
972 
973 	case RED_SETDEFAULTS:
974 		do {
975 			struct redparams *rp;
976 
977 			rp = (struct redparams *)addr;
978 
979 			default_th_min = rp->th_min;
980 			default_th_max = rp->th_max;
981 			default_inv_pmax = rp->inv_pmax;
982 		} while (/*CONSTCOND*/ 0);
983 		break;
984 
985 	default:
986 		error = EINVAL;
987 		break;
988 	}
989 	return error;
990 }
991 
992 static int
993 red_detach(rqp)
994 	red_queue_t *rqp;
995 {
996 	red_queue_t *tmp;
997 	int error = 0;
998 
999 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1000 		altq_disable(rqp->rq_ifq);
1001 
1002 	if ((error = altq_detach(rqp->rq_ifq)))
1003 		return (error);
1004 
1005 	if (red_list == rqp)
1006 		red_list = rqp->rq_next;
1007 	else {
1008 		for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
1009 			if (tmp->rq_next == rqp) {
1010 				tmp->rq_next = rqp->rq_next;
1011 				break;
1012 			}
1013 		if (tmp == NULL)
1014 			printf("red_detach: no state found in red_list!\n");
1015 	}
1016 
1017 	red_destroy(rqp->rq_red);
1018 	free(rqp->rq_q, M_DEVBUF);
1019 	free(rqp, M_DEVBUF);
1020 	return (error);
1021 }
1022 
1023 /*
1024  * enqueue routine:
1025  *
1026  *	returns: 0 when successfully queued.
1027  *		 ENOBUFS when drop occurs.
1028  */
1029 static int
1030 red_enqueue(ifq, m, pktattr)
1031 	struct ifaltq *ifq;
1032 	struct mbuf *m;
1033 	struct altq_pktattr *pktattr;
1034 {
1035 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1036 
1037 	IFQ_LOCK_ASSERT(ifq);
1038 
1039 	if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1040 		return ENOBUFS;
1041 	ifq->ifq_len++;
1042 	return 0;
1043 }
1044 
1045 /*
1046  * dequeue routine:
1047  *	must be called in splimp.
1048  *
1049  *	returns: mbuf dequeued.
1050  *		 NULL when no packet is available in the queue.
1051  */
1052 
1053 static struct mbuf *
1054 red_dequeue(ifq, op)
1055 	struct ifaltq *ifq;
1056 	int op;
1057 {
1058 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1059 	struct mbuf *m;
1060 
1061 	IFQ_LOCK_ASSERT(ifq);
1062 
1063 	if (op == ALTDQ_POLL)
1064 		return qhead(rqp->rq_q);
1065 
1066 	/* op == ALTDQ_REMOVE */
1067 	m =  red_getq(rqp->rq_red, rqp->rq_q);
1068 	if (m != NULL)
1069 		ifq->ifq_len--;
1070 	return (m);
1071 }
1072 
1073 static int
1074 red_request(ifq, req, arg)
1075 	struct ifaltq *ifq;
1076 	int req;
1077 	void *arg;
1078 {
1079 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1080 
1081 	IFQ_LOCK_ASSERT(ifq);
1082 
1083 	switch (req) {
1084 	case ALTRQ_PURGE:
1085 		red_purgeq(rqp);
1086 		break;
1087 	}
1088 	return (0);
1089 }
1090 
1091 static void
1092 red_purgeq(rqp)
1093 	red_queue_t *rqp;
1094 {
1095 	_flushq(rqp->rq_q);
1096 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1097 		rqp->rq_ifq->ifq_len = 0;
1098 }
1099 
1100 #ifdef ALTQ_FLOWVALVE
1101 
1102 #define	FV_PSHIFT	7	/* weight of average drop rate -- 1/128 */
1103 #define	FV_PSCALE(x)	((x) << FV_PSHIFT)
1104 #define	FV_PUNSCALE(x)	((x) >> FV_PSHIFT)
1105 #define	FV_FSHIFT	5	/* weight of average fraction -- 1/32 */
1106 #define	FV_FSCALE(x)	((x) << FV_FSHIFT)
1107 #define	FV_FUNSCALE(x)	((x) >> FV_FSHIFT)
1108 
1109 #define	FV_TIMER	(3 * hz)	/* timer value for garbage collector */
1110 #define	FV_FLOWLISTSIZE		64	/* how many flows in flowlist */
1111 
1112 #define	FV_N			10	/* update fve_f every FV_N packets */
1113 
1114 #define	FV_BACKOFFTHRESH	1  /* backoff threshold interval in second */
1115 #define	FV_TTHRESH		3  /* time threshold to delete fve */
1116 #define	FV_ALPHA		5  /* extra packet count */
1117 
1118 #define	FV_STATS
1119 
1120 #if (__FreeBSD_version > 300000)
1121 #define	FV_TIMESTAMP(tp)	getmicrotime(tp)
1122 #else
1123 #define	FV_TIMESTAMP(tp)	{ (*(tp)) = time; }
1124 #endif
1125 
1126 /*
1127  * Brtt table: 127 entry table to convert drop rate (p) to
1128  * the corresponding bandwidth fraction (f)
1129  * the following equation is implemented to use scaled values,
1130  * fve_p and fve_f, in the fixed point format.
1131  *
1132  *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1133  *   f = Brtt(p) / (max_th + alpha)
1134  */
1135 #define	BRTT_SIZE	128
1136 #define	BRTT_SHIFT	12
1137 #define	BRTT_MASK	0x0007f000
1138 #define	BRTT_PMAX	(1 << (FV_PSHIFT + FP_SHIFT))
1139 
1140 const int brtt_tab[BRTT_SIZE] = {
1141 	0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1142 	392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1143 	225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1144 	145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1145 	98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1146 	67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1147 	47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1148 	33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1149 	24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1150 	18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1151 	14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1152 	10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1153 	8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1154 	6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1155 	5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1156 	4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1157 };
1158 
1159 static __inline struct fve *
1160 flowlist_lookup(fv, pktattr, now)
1161 	struct flowvalve *fv;
1162 	struct altq_pktattr *pktattr;
1163 	struct timeval *now;
1164 {
1165 	struct fve *fve;
1166 	int flows;
1167 	struct ip *ip;
1168 #ifdef INET6
1169 	struct ip6_hdr *ip6;
1170 #endif
1171 	struct timeval tthresh;
1172 
1173 	if (pktattr == NULL)
1174 		return (NULL);
1175 
1176 	tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1177 	flows = 0;
1178 	/*
1179 	 * search the flow list
1180 	 */
1181 	switch (pktattr->pattr_af) {
1182 	case AF_INET:
1183 		ip = (struct ip *)pktattr->pattr_hdr;
1184 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1185 			if (fve->fve_lastdrop.tv_sec == 0)
1186 				break;
1187 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1188 				fve->fve_lastdrop.tv_sec = 0;
1189 				break;
1190 			}
1191 			if (fve->fve_flow.flow_af == AF_INET &&
1192 			    fve->fve_flow.flow_ip.ip_src.s_addr ==
1193 			    ip->ip_src.s_addr &&
1194 			    fve->fve_flow.flow_ip.ip_dst.s_addr ==
1195 			    ip->ip_dst.s_addr)
1196 				return (fve);
1197 			flows++;
1198 		}
1199 		break;
1200 #ifdef INET6
1201 	case AF_INET6:
1202 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1203 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1204 			if (fve->fve_lastdrop.tv_sec == 0)
1205 				break;
1206 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1207 				fve->fve_lastdrop.tv_sec = 0;
1208 				break;
1209 			}
1210 			if (fve->fve_flow.flow_af == AF_INET6 &&
1211 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1212 					       &ip6->ip6_src) &&
1213 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1214 					       &ip6->ip6_dst))
1215 				return (fve);
1216 			flows++;
1217 		}
1218 		break;
1219 #endif /* INET6 */
1220 
1221 	default:
1222 		/* unknown protocol.  no drop. */
1223 		return (NULL);
1224 	}
1225 	fv->fv_flows = flows;	/* save the number of active fve's */
1226 	return (NULL);
1227 }
1228 
1229 static __inline struct fve *
1230 flowlist_reclaim(fv, pktattr)
1231 	struct flowvalve *fv;
1232 	struct altq_pktattr *pktattr;
1233 {
1234 	struct fve *fve;
1235 	struct ip *ip;
1236 #ifdef INET6
1237 	struct ip6_hdr *ip6;
1238 #endif
1239 
1240 	/*
1241 	 * get an entry from the tail of the LRU list.
1242 	 */
1243 	fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1244 
1245 	switch (pktattr->pattr_af) {
1246 	case AF_INET:
1247 		ip = (struct ip *)pktattr->pattr_hdr;
1248 		fve->fve_flow.flow_af = AF_INET;
1249 		fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1250 		fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1251 		break;
1252 #ifdef INET6
1253 	case AF_INET6:
1254 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1255 		fve->fve_flow.flow_af = AF_INET6;
1256 		fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1257 		fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1258 		break;
1259 #endif
1260 	}
1261 
1262 	fve->fve_state = Green;
1263 	fve->fve_p = 0.0;
1264 	fve->fve_f = 0.0;
1265 	fve->fve_ifseq = fv->fv_ifseq - 1;
1266 	fve->fve_count = 0;
1267 
1268 	fv->fv_flows++;
1269 #ifdef FV_STATS
1270 	fv->fv_stats.alloc++;
1271 #endif
1272 	return (fve);
1273 }
1274 
1275 static __inline void
1276 flowlist_move_to_head(fv, fve)
1277 	struct flowvalve *fv;
1278 	struct fve *fve;
1279 {
1280 	if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1281 		TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1282 		TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1283 	}
1284 }
1285 
1286 #if 0 /* XXX: make the compiler happy (fv_alloc unused) */
1287 /*
1288  * allocate flowvalve structure
1289  */
1290 static struct flowvalve *
1291 fv_alloc(rp)
1292 	struct red *rp;
1293 {
1294 	struct flowvalve *fv;
1295 	struct fve *fve;
1296 	int i, num;
1297 
1298 	num = FV_FLOWLISTSIZE;
1299 	fv = malloc(sizeof(struct flowvalve),
1300 	       M_DEVBUF, M_WAITOK);
1301 	if (fv == NULL)
1302 		return (NULL);
1303 	bzero(fv, sizeof(struct flowvalve));
1304 
1305 	fv->fv_fves = malloc(sizeof(struct fve) * num,
1306 	       M_DEVBUF, M_WAITOK);
1307 	if (fv->fv_fves == NULL) {
1308 		free(fv, M_DEVBUF);
1309 		return (NULL);
1310 	}
1311 	bzero(fv->fv_fves, sizeof(struct fve) * num);
1312 
1313 	fv->fv_flows = 0;
1314 	TAILQ_INIT(&fv->fv_flowlist);
1315 	for (i = 0; i < num; i++) {
1316 		fve = &fv->fv_fves[i];
1317 		fve->fve_lastdrop.tv_sec = 0;
1318 		TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1319 	}
1320 
1321 	/* initialize drop rate threshold in scaled fixed-point */
1322 	fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1323 
1324 	/* initialize drop rate to fraction table */
1325 	fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE,
1326 	       M_DEVBUF, M_WAITOK);
1327 	if (fv->fv_p2ftab == NULL) {
1328 		free(fv->fv_fves, M_DEVBUF);
1329 		free(fv, M_DEVBUF);
1330 		return (NULL);
1331 	}
1332 	/*
1333 	 * create the p2f table.
1334 	 * (shift is used to keep the precision)
1335 	 */
1336 	for (i = 1; i < BRTT_SIZE; i++) {
1337 		int f;
1338 
1339 		f = brtt_tab[i] << 8;
1340 		fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1341 	}
1342 
1343 	return (fv);
1344 }
1345 #endif
1346 
1347 static void fv_destroy(fv)
1348 	struct flowvalve *fv;
1349 {
1350 	free(fv->fv_p2ftab, M_DEVBUF);
1351 	free(fv->fv_fves, M_DEVBUF);
1352 	free(fv, M_DEVBUF);
1353 }
1354 
1355 static __inline int
1356 fv_p2f(fv, p)
1357 	struct flowvalve	*fv;
1358 	int	p;
1359 {
1360 	int val, f;
1361 
1362 	if (p >= BRTT_PMAX)
1363 		f = fv->fv_p2ftab[BRTT_SIZE-1];
1364 	else if ((val = (p & BRTT_MASK)))
1365 		f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1366 	else
1367 		f = fv->fv_p2ftab[1];
1368 	return (f);
1369 }
1370 
1371 /*
1372  * check if an arriving packet should be pre-dropped.
1373  * called from red_addq() when a packet arrives.
1374  * returns 1 when the packet should be pre-dropped.
1375  * should be called in splimp.
1376  */
1377 static int
1378 fv_checkflow(fv, pktattr, fcache)
1379 	struct flowvalve *fv;
1380 	struct altq_pktattr *pktattr;
1381 	struct fve **fcache;
1382 {
1383 	struct fve *fve;
1384 	struct timeval now;
1385 
1386 	fv->fv_ifseq++;
1387 	FV_TIMESTAMP(&now);
1388 
1389 	if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1390 		/* no matching entry in the flowlist */
1391 		return (0);
1392 
1393 	*fcache = fve;
1394 
1395 	/* update fraction f for every FV_N packets */
1396 	if (++fve->fve_count == FV_N) {
1397 		/*
1398 		 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1399 		 */
1400 		fve->fve_f =
1401 			(FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1402 			+ fve->fve_f - FV_FUNSCALE(fve->fve_f);
1403 		fve->fve_ifseq = fv->fv_ifseq;
1404 		fve->fve_count = 0;
1405 	}
1406 
1407 	/*
1408 	 * overpumping test
1409 	 */
1410 	if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1411 		int fthresh;
1412 
1413 		/* calculate a threshold */
1414 		fthresh = fv_p2f(fv, fve->fve_p);
1415 		if (fve->fve_f > fthresh)
1416 			fve->fve_state = Red;
1417 	}
1418 
1419 	if (fve->fve_state == Red) {
1420 		/*
1421 		 * backoff test
1422 		 */
1423 		if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1424 			/* no drop for at least FV_BACKOFFTHRESH sec */
1425 			fve->fve_p = 0;
1426 			fve->fve_state = Green;
1427 #ifdef FV_STATS
1428 			fv->fv_stats.escape++;
1429 #endif
1430 		} else {
1431 			/* block this flow */
1432 			flowlist_move_to_head(fv, fve);
1433 			fve->fve_lastdrop = now;
1434 #ifdef FV_STATS
1435 			fv->fv_stats.predrop++;
1436 #endif
1437 			return (1);
1438 		}
1439 	}
1440 
1441 	/*
1442 	 * p = (1 - Wp) * p
1443 	 */
1444 	fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1445 	if (fve->fve_p < 0)
1446 		fve->fve_p = 0;
1447 #ifdef FV_STATS
1448 	fv->fv_stats.pass++;
1449 #endif
1450 	return (0);
1451 }
1452 
1453 /*
1454  * called from red_addq when a packet is dropped by red.
1455  * should be called in splimp.
1456  */
1457 static void fv_dropbyred(fv, pktattr, fcache)
1458 	struct flowvalve *fv;
1459 	struct altq_pktattr *pktattr;
1460 	struct fve *fcache;
1461 {
1462 	struct fve *fve;
1463 	struct timeval now;
1464 
1465 	if (pktattr == NULL)
1466 		return;
1467 	FV_TIMESTAMP(&now);
1468 
1469 	if (fcache != NULL)
1470 		/* the fve of this packet is already cached */
1471 		fve = fcache;
1472 	else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1473 		fve = flowlist_reclaim(fv, pktattr);
1474 
1475 	flowlist_move_to_head(fv, fve);
1476 
1477 	/*
1478 	 * update p:  the following line cancels the update
1479 	 *	      in fv_checkflow() and calculate
1480 	 *	p = Wp + (1 - Wp) * p
1481 	 */
1482 	fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1483 
1484 	fve->fve_lastdrop = now;
1485 }
1486 
1487 #endif /* ALTQ_FLOWVALVE */
1488 
1489 #ifdef KLD_MODULE
1490 
1491 static struct altqsw red_sw =
1492 	{"red", redopen, redclose, redioctl};
1493 
1494 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1495 MODULE_VERSION(altq_red, 1);
1496 
1497 #endif /* KLD_MODULE */
1498 #endif /* ALTQ3_COMPAT */
1499 
1500 #endif /* ALTQ_RED */
1501