xref: /dragonfly/sys/net/dummynet/ip_dummynet.h (revision 86d7f5d3)
1*86d7f5d3SJohn Marino /*
2*86d7f5d3SJohn Marino  * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa
3*86d7f5d3SJohn Marino  * Portions Copyright (c) 2000 Akamba Corp.
4*86d7f5d3SJohn Marino  * All rights reserved
5*86d7f5d3SJohn Marino  *
6*86d7f5d3SJohn Marino  * Redistribution and use in source and binary forms, with or without
7*86d7f5d3SJohn Marino  * modification, are permitted provided that the following conditions
8*86d7f5d3SJohn Marino  * are met:
9*86d7f5d3SJohn Marino  * 1. Redistributions of source code must retain the above copyright
10*86d7f5d3SJohn Marino  *    notice, this list of conditions and the following disclaimer.
11*86d7f5d3SJohn Marino  * 2. Redistributions in binary form must reproduce the above copyright
12*86d7f5d3SJohn Marino  *    notice, this list of conditions and the following disclaimer in the
13*86d7f5d3SJohn Marino  *    documentation and/or other materials provided with the distribution.
14*86d7f5d3SJohn Marino  *
15*86d7f5d3SJohn Marino  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16*86d7f5d3SJohn Marino  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17*86d7f5d3SJohn Marino  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18*86d7f5d3SJohn Marino  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19*86d7f5d3SJohn Marino  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20*86d7f5d3SJohn Marino  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21*86d7f5d3SJohn Marino  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22*86d7f5d3SJohn Marino  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23*86d7f5d3SJohn Marino  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24*86d7f5d3SJohn Marino  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25*86d7f5d3SJohn Marino  * SUCH DAMAGE.
26*86d7f5d3SJohn Marino  *
27*86d7f5d3SJohn Marino  * $FreeBSD: src/sys/netinet/ip_dummynet.h,v 1.10.2.9 2003/05/13 09:31:06 maxim Exp $
28*86d7f5d3SJohn Marino  * $DragonFly: src/sys/net/dummynet/ip_dummynet.h,v 1.19 2008/09/20 04:36:51 sephe Exp $
29*86d7f5d3SJohn Marino  */
30*86d7f5d3SJohn Marino 
31*86d7f5d3SJohn Marino #ifndef _IP_DUMMYNET_H
32*86d7f5d3SJohn Marino #define _IP_DUMMYNET_H
33*86d7f5d3SJohn Marino 
34*86d7f5d3SJohn Marino /*
35*86d7f5d3SJohn Marino  * We start with a heap, which is used in the scheduler to decide when to
36*86d7f5d3SJohn Marino  * transmit packets etc.
37*86d7f5d3SJohn Marino  *
38*86d7f5d3SJohn Marino  * The key for the heap is used for two different values:
39*86d7f5d3SJohn Marino  *
40*86d7f5d3SJohn Marino  * 1. Timer ticks- max 10K/second, so 32 bits are enough;
41*86d7f5d3SJohn Marino  *
42*86d7f5d3SJohn Marino  * 2. Virtual times.  These increase in steps of len/x, where len is the
43*86d7f5d3SJohn Marino  *    packet length, and x is either the weight of the flow, or the sum
44*86d7f5d3SJohn Marino  *    of all weights.
45*86d7f5d3SJohn Marino  *    If we limit to max 1000 flows and a max weight of 100, then x needs
46*86d7f5d3SJohn Marino  *    17 bits.  The packet size is 16 bits, so we can easily overflow if
47*86d7f5d3SJohn Marino  *    we do not allow errors.
48*86d7f5d3SJohn Marino  *
49*86d7f5d3SJohn Marino  * So we use a key "dn_key" which is 64 bits.
50*86d7f5d3SJohn Marino  *
51*86d7f5d3SJohn Marino  * MY_M is used as a shift count when doing fixed point arithmetic
52*86d7f5d3SJohn Marino  * (a better name would be useful...).
53*86d7f5d3SJohn Marino  */
54*86d7f5d3SJohn Marino typedef uint64_t	dn_key;	/* sorting key */
55*86d7f5d3SJohn Marino 
56*86d7f5d3SJohn Marino /*
57*86d7f5d3SJohn Marino  * Number of left shift to obtain a larger precision
58*86d7f5d3SJohn Marino  *
59*86d7f5d3SJohn Marino  * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the
60*86d7f5d3SJohn Marino  * virtual time wraps every 15 days.
61*86d7f5d3SJohn Marino  */
62*86d7f5d3SJohn Marino #define MY_M		16
63*86d7f5d3SJohn Marino 
64*86d7f5d3SJohn Marino #ifdef _KERNEL
65*86d7f5d3SJohn Marino 
66*86d7f5d3SJohn Marino /*
67*86d7f5d3SJohn Marino  * A heap entry is made of a key and a pointer to the actual object stored
68*86d7f5d3SJohn Marino  * in the heap.
69*86d7f5d3SJohn Marino  *
70*86d7f5d3SJohn Marino  * The heap is an array of dn_heap_entry entries, dynamically allocated.
71*86d7f5d3SJohn Marino  * Current size is "size", with "elements" actually in use.
72*86d7f5d3SJohn Marino  *
73*86d7f5d3SJohn Marino  * The heap normally supports only ordered insert and extract from the top.
74*86d7f5d3SJohn Marino  * If we want to extract an object from the middle of the heap, we have to
75*86d7f5d3SJohn Marino  * know where the object itself is located in the heap (or we need to scan
76*86d7f5d3SJohn Marino  * the whole array).  To this purpose, an object has a field (int) which
77*86d7f5d3SJohn Marino  * contains the index of the object itself into the heap.  When the object
78*86d7f5d3SJohn Marino  * is moved, the field must also be updated.  The offset of the index in the
79*86d7f5d3SJohn Marino  * object is stored in the 'offset' field in the heap descriptor.  The
80*86d7f5d3SJohn Marino  * assumption is that this offset is non-zero if we want to support extract
81*86d7f5d3SJohn Marino  * from the middle.
82*86d7f5d3SJohn Marino  */
83*86d7f5d3SJohn Marino struct dn_heap_entry {
84*86d7f5d3SJohn Marino     dn_key key;		/* sorting key.  Topmost element is smallest one */
85*86d7f5d3SJohn Marino     void *object;	/* object pointer */
86*86d7f5d3SJohn Marino };
87*86d7f5d3SJohn Marino 
88*86d7f5d3SJohn Marino struct dn_heap {
89*86d7f5d3SJohn Marino     int size;
90*86d7f5d3SJohn Marino     int elements;
91*86d7f5d3SJohn Marino     int offset; /* XXX if > 0 this is the offset of direct ptr to obj */
92*86d7f5d3SJohn Marino     struct dn_heap_entry *p;	/* really an array of "size" entries */
93*86d7f5d3SJohn Marino };
94*86d7f5d3SJohn Marino 
95*86d7f5d3SJohn Marino struct dn_flow_id {
96*86d7f5d3SJohn Marino     uint16_t fid_type;	/* ETHERTYPE_ */
97*86d7f5d3SJohn Marino     uint16_t pad;
98*86d7f5d3SJohn Marino     union {
99*86d7f5d3SJohn Marino 	struct {
100*86d7f5d3SJohn Marino 	    uint32_t dst_ip;
101*86d7f5d3SJohn Marino 	    uint32_t src_ip;
102*86d7f5d3SJohn Marino 	    uint16_t dst_port;
103*86d7f5d3SJohn Marino 	    uint16_t src_port;
104*86d7f5d3SJohn Marino 	    uint8_t proto;
105*86d7f5d3SJohn Marino 	    uint8_t flags;
106*86d7f5d3SJohn Marino 	} inet;
107*86d7f5d3SJohn Marino     } fid_u;
108*86d7f5d3SJohn Marino #define fid_dst_ip	fid_u.inet.dst_ip
109*86d7f5d3SJohn Marino #define fid_src_ip	fid_u.inet.src_ip
110*86d7f5d3SJohn Marino #define fid_dst_port	fid_u.inet.dst_port
111*86d7f5d3SJohn Marino #define fid_src_port	fid_u.inet.src_port
112*86d7f5d3SJohn Marino #define fid_proto	fid_u.inet.proto
113*86d7f5d3SJohn Marino #define fid_flags	fid_u.inet.flags
114*86d7f5d3SJohn Marino };
115*86d7f5d3SJohn Marino 
116*86d7f5d3SJohn Marino typedef void	(*ip_dn_unref_priv_t)(void *);
117*86d7f5d3SJohn Marino struct lwkt_port;
118*86d7f5d3SJohn Marino 
119*86d7f5d3SJohn Marino /*
120*86d7f5d3SJohn Marino  * struct dn_pkt identifies a packet in the dummynet queue, but is also used
121*86d7f5d3SJohn Marino  * to tag packets passed back to the various destinations (ip_input(),
122*86d7f5d3SJohn Marino  * ip_output() and so on).
123*86d7f5d3SJohn Marino  *
124*86d7f5d3SJohn Marino  * It is a tag (PACKET_TAG_DUMMYNET) associated with the actual mbuf.
125*86d7f5d3SJohn Marino  */
126*86d7f5d3SJohn Marino struct dn_pkt {
127*86d7f5d3SJohn Marino     struct mbuf *dn_m;
128*86d7f5d3SJohn Marino     TAILQ_ENTRY(dn_pkt) dn_next;
129*86d7f5d3SJohn Marino 
130*86d7f5d3SJohn Marino     void *dn_priv;
131*86d7f5d3SJohn Marino     ip_dn_unref_priv_t dn_unref_priv;
132*86d7f5d3SJohn Marino 
133*86d7f5d3SJohn Marino     uint32_t dn_flags;		/* action when packet comes out. */
134*86d7f5d3SJohn Marino #define DN_FLAGS_IS_PIPE	0x10
135*86d7f5d3SJohn Marino #define DN_FLAGS_DIR_MASK	0x0f
136*86d7f5d3SJohn Marino #define DN_TO_IP_OUT		1
137*86d7f5d3SJohn Marino #define DN_TO_IP_IN		2
138*86d7f5d3SJohn Marino #define DN_TO_ETH_DEMUX		4
139*86d7f5d3SJohn Marino #define DN_TO_ETH_OUT		5
140*86d7f5d3SJohn Marino #define DN_TO_MAX		6
141*86d7f5d3SJohn Marino 
142*86d7f5d3SJohn Marino     dn_key output_time;		/* when the pkt is due for delivery */
143*86d7f5d3SJohn Marino     struct ifnet *ifp;		/* interface, for ip_output */
144*86d7f5d3SJohn Marino     struct sockaddr_in *dn_dst;
145*86d7f5d3SJohn Marino     struct route ro;		/* route, for ip_output. MUST COPY */
146*86d7f5d3SJohn Marino     int flags;			/* flags, for ip_output (IPv6 ?) */
147*86d7f5d3SJohn Marino 
148*86d7f5d3SJohn Marino     u_short pipe_nr;		/* pipe/flow_set number */
149*86d7f5d3SJohn Marino     u_short pad;
150*86d7f5d3SJohn Marino 
151*86d7f5d3SJohn Marino     struct dn_flow_id id;	/* flow id */
152*86d7f5d3SJohn Marino     int cpuid;			/* target cpuid, for assertion */
153*86d7f5d3SJohn Marino     struct lwkt_port *msgport;	/* target msgport */
154*86d7f5d3SJohn Marino };
155*86d7f5d3SJohn Marino TAILQ_HEAD(dn_pkt_queue, dn_pkt);
156*86d7f5d3SJohn Marino 
157*86d7f5d3SJohn Marino /*
158*86d7f5d3SJohn Marino  * Overall structure of dummynet (with WF2Q+):
159*86d7f5d3SJohn Marino  *
160*86d7f5d3SJohn Marino  * In dummynet, packets are selected with the firewall rules, and passed to
161*86d7f5d3SJohn Marino  * two different objects: PIPE or QUEUE.
162*86d7f5d3SJohn Marino  *
163*86d7f5d3SJohn Marino  * A QUEUE is just a queue with configurable size and queue management policy.
164*86d7f5d3SJohn Marino  * It is also associated with a mask (to discriminate among different flows),
165*86d7f5d3SJohn Marino  * a weight (used to give different shares of the bandwidth to different flows)
166*86d7f5d3SJohn Marino  * and a "pipe", which essentially supplies the transmit clock for all queues
167*86d7f5d3SJohn Marino  * associated with that pipe.
168*86d7f5d3SJohn Marino  *
169*86d7f5d3SJohn Marino  * A PIPE emulates a fixed-bandwidth link, whose bandwidth is configurable.
170*86d7f5d3SJohn Marino  * The "clock" for a pipe comes from an internal timer.  A pipe is also
171*86d7f5d3SJohn Marino  * associated with one (or more, if masks are used) queue, where all packets
172*86d7f5d3SJohn Marino  * for that pipe are stored.
173*86d7f5d3SJohn Marino  *
174*86d7f5d3SJohn Marino  * The bandwidth available on the pipe is shared by the queues associated with
175*86d7f5d3SJohn Marino  * that pipe (only one in case the packet is sent to a PIPE) according to the
176*86d7f5d3SJohn Marino  * WF2Q+ scheduling algorithm and the configured weights.
177*86d7f5d3SJohn Marino  *
178*86d7f5d3SJohn Marino  * In general, incoming packets are stored in the appropriate queue, which is
179*86d7f5d3SJohn Marino  * then placed into one of a few heaps managed by a scheduler to decide when
180*86d7f5d3SJohn Marino  * the packet should be extracted.  The scheduler (a function called dummynet())
181*86d7f5d3SJohn Marino  * is run at every timer tick, and grabs queues from the head of the heaps when
182*86d7f5d3SJohn Marino  * they are ready for processing.
183*86d7f5d3SJohn Marino  *
184*86d7f5d3SJohn Marino  * There are three data structures definining a pipe and associated queues:
185*86d7f5d3SJohn Marino  *
186*86d7f5d3SJohn Marino  *  + dn_pipe, which contains the main configuration parameters related to
187*86d7f5d3SJohn Marino  *    delay and bandwidth;
188*86d7f5d3SJohn Marino  *  + dn_flow_set, which contains WF2Q+ configuration, flow masks, plr and
189*86d7f5d3SJohn Marino  *    RED configuration;
190*86d7f5d3SJohn Marino  *  + dn_flow_queue, which is the per-flow queue (containing the packets)
191*86d7f5d3SJohn Marino  *
192*86d7f5d3SJohn Marino  * Multiple dn_flow_set can be linked to the same pipe, and multiple
193*86d7f5d3SJohn Marino  * dn_flow_queue can be linked to the same dn_flow_set.
194*86d7f5d3SJohn Marino  * All data structures are linked in a linear list which is used for
195*86d7f5d3SJohn Marino  * housekeeping purposes.
196*86d7f5d3SJohn Marino  *
197*86d7f5d3SJohn Marino  * During configuration, we create and initialize the dn_flow_set and dn_pipe
198*86d7f5d3SJohn Marino  * structures (a dn_pipe also contains a dn_flow_set).
199*86d7f5d3SJohn Marino  *
200*86d7f5d3SJohn Marino  * At runtime: packets are sent to the appropriate dn_flow_set (either WFQ
201*86d7f5d3SJohn Marino  * ones, or the one embedded in the dn_pipe for fixed-rate flows), which in
202*86d7f5d3SJohn Marino  * turn dispatches them to the appropriate dn_flow_queue (created dynamically
203*86d7f5d3SJohn Marino  * according to the masks).
204*86d7f5d3SJohn Marino  *
205*86d7f5d3SJohn Marino  * The transmit clock for fixed rate flows (ready_event()) selects the
206*86d7f5d3SJohn Marino  * dn_flow_queue to be used to transmit the next packet. For WF2Q,
207*86d7f5d3SJohn Marino  * wfq_ready_event() extract a pipe which in turn selects the right flow using
208*86d7f5d3SJohn Marino  * a number of heaps defined into the pipe itself.
209*86d7f5d3SJohn Marino  */
210*86d7f5d3SJohn Marino 
211*86d7f5d3SJohn Marino /*
212*86d7f5d3SJohn Marino  * Per flow queue.  This contains the flow identifier, the queue of packets,
213*86d7f5d3SJohn Marino  * counters, and parameters used to support both RED and WF2Q+.
214*86d7f5d3SJohn Marino  *
215*86d7f5d3SJohn Marino  * A dn_flow_queue is created and initialized whenever a packet for a new
216*86d7f5d3SJohn Marino  * flow arrives.
217*86d7f5d3SJohn Marino  */
218*86d7f5d3SJohn Marino struct dn_flow_queue {
219*86d7f5d3SJohn Marino     struct dn_flow_id id;
220*86d7f5d3SJohn Marino     LIST_ENTRY(dn_flow_queue) q_link;
221*86d7f5d3SJohn Marino 
222*86d7f5d3SJohn Marino     struct dn_pkt_queue queue;	/* queue of packets */
223*86d7f5d3SJohn Marino     u_int len;
224*86d7f5d3SJohn Marino     u_int len_bytes;
225*86d7f5d3SJohn Marino     u_long numbytes;		/* credit for transmission (dynamic queues) */
226*86d7f5d3SJohn Marino 
227*86d7f5d3SJohn Marino     uint64_t tot_pkts;		/* statistics counters */
228*86d7f5d3SJohn Marino     uint64_t tot_bytes;
229*86d7f5d3SJohn Marino     uint32_t drops;
230*86d7f5d3SJohn Marino 
231*86d7f5d3SJohn Marino     int hash_slot;		/* debugging/diagnostic */
232*86d7f5d3SJohn Marino 
233*86d7f5d3SJohn Marino     /* RED parameters */
234*86d7f5d3SJohn Marino     int avg;			/* average queue length est. (scaled) */
235*86d7f5d3SJohn Marino     int count;			/* arrivals since last RED drop */
236*86d7f5d3SJohn Marino     int random;			/* random value (scaled) */
237*86d7f5d3SJohn Marino     uint32_t q_time;		/* start of queue idle time */
238*86d7f5d3SJohn Marino 
239*86d7f5d3SJohn Marino     /* WF2Q+ support */
240*86d7f5d3SJohn Marino     struct dn_flow_set *fs;	/* parent flow set */
241*86d7f5d3SJohn Marino     int heap_pos;		/* position (index) of struct in heap */
242*86d7f5d3SJohn Marino     dn_key sched_time;		/* current time when queue enters ready_heap */
243*86d7f5d3SJohn Marino 
244*86d7f5d3SJohn Marino     dn_key S, F;		/* start time, finish time */
245*86d7f5d3SJohn Marino     /*
246*86d7f5d3SJohn Marino      * Setting F < S means the timestamp is invalid. We only need
247*86d7f5d3SJohn Marino      * to test this when the queue is empty.
248*86d7f5d3SJohn Marino      */
249*86d7f5d3SJohn Marino };
250*86d7f5d3SJohn Marino LIST_HEAD(dn_flowqueue_head, dn_flow_queue);
251*86d7f5d3SJohn Marino 
252*86d7f5d3SJohn Marino /*
253*86d7f5d3SJohn Marino  * flow_set descriptor.  Contains the "template" parameters for the queue
254*86d7f5d3SJohn Marino  * configuration, and pointers to the hash table of dn_flow_queue's.
255*86d7f5d3SJohn Marino  *
256*86d7f5d3SJohn Marino  * The hash table is an array of lists -- we identify the slot by hashing
257*86d7f5d3SJohn Marino  * the flow-id, then scan the list looking for a match.
258*86d7f5d3SJohn Marino  * The size of the hash table (buckets) is configurable on a per-queue basis.
259*86d7f5d3SJohn Marino  *
260*86d7f5d3SJohn Marino  * A dn_flow_set is created whenever a new queue or pipe is created (in the
261*86d7f5d3SJohn Marino  * latter case, the structure is located inside the struct dn_pipe).
262*86d7f5d3SJohn Marino  */
263*86d7f5d3SJohn Marino struct dn_flow_set {
264*86d7f5d3SJohn Marino     u_short fs_nr;		/* flow_set number */
265*86d7f5d3SJohn Marino     u_short flags_fs;		/* see 'Flow set flags' */
266*86d7f5d3SJohn Marino 
267*86d7f5d3SJohn Marino     LIST_ENTRY(dn_flow_set) fs_link;
268*86d7f5d3SJohn Marino 
269*86d7f5d3SJohn Marino     struct dn_pipe *pipe;	/* pointer to parent pipe */
270*86d7f5d3SJohn Marino     u_short parent_nr;		/* parent pipe#, 0 if local to a pipe */
271*86d7f5d3SJohn Marino 
272*86d7f5d3SJohn Marino     int weight;			/* WFQ queue weight */
273*86d7f5d3SJohn Marino     int qsize;			/* queue size in slots or bytes */
274*86d7f5d3SJohn Marino     int plr;			/* pkt loss rate (2^31-1 means 100%) */
275*86d7f5d3SJohn Marino 
276*86d7f5d3SJohn Marino     struct dn_flow_id flow_mask;
277*86d7f5d3SJohn Marino 
278*86d7f5d3SJohn Marino     /* hash table of queues onto this flow_set */
279*86d7f5d3SJohn Marino     int rq_size;		/* number of slots */
280*86d7f5d3SJohn Marino     int rq_elements;		/* active elements */
281*86d7f5d3SJohn Marino     struct dn_flowqueue_head *rq;/* array of rq_size entries */
282*86d7f5d3SJohn Marino 
283*86d7f5d3SJohn Marino     uint32_t last_expired;	/* do not expire too frequently */
284*86d7f5d3SJohn Marino     int backlogged;		/* #active queues for this flowset */
285*86d7f5d3SJohn Marino 
286*86d7f5d3SJohn Marino     /* RED parameters */
287*86d7f5d3SJohn Marino     int w_q;			/* queue weight (scaled) */
288*86d7f5d3SJohn Marino     int max_th;			/* maximum threshold for queue (scaled) */
289*86d7f5d3SJohn Marino     int min_th;			/* minimum threshold for queue (scaled) */
290*86d7f5d3SJohn Marino     int max_p;			/* maximum value for p_b (scaled) */
291*86d7f5d3SJohn Marino     u_int c_1;			/* max_p/(max_th-min_th) (scaled) */
292*86d7f5d3SJohn Marino     u_int c_2;			/* max_p*min_th/(max_th-min_th) (scaled) */
293*86d7f5d3SJohn Marino     u_int c_3;			/* for GRED, (1-max_p)/max_th (scaled) */
294*86d7f5d3SJohn Marino     u_int c_4;			/* for GRED, 1 - 2*max_p (scaled) */
295*86d7f5d3SJohn Marino     u_int *w_q_lookup;		/* lookup table for computing (1-w_q)^t */
296*86d7f5d3SJohn Marino     u_int lookup_depth;		/* depth of lookup table */
297*86d7f5d3SJohn Marino     int lookup_step;		/* granularity inside the lookup table */
298*86d7f5d3SJohn Marino     int lookup_weight;		/* equal to (1-w_q)^t / (1-w_q)^(t+1) */
299*86d7f5d3SJohn Marino     int avg_pkt_size;		/* medium packet size */
300*86d7f5d3SJohn Marino     int max_pkt_size;		/* max packet size */
301*86d7f5d3SJohn Marino };
302*86d7f5d3SJohn Marino LIST_HEAD(dn_flowset_head, dn_flow_set);
303*86d7f5d3SJohn Marino 
304*86d7f5d3SJohn Marino /*
305*86d7f5d3SJohn Marino  * Pipe descriptor. Contains global parameters, delay-line queue, and the
306*86d7f5d3SJohn Marino  * flow_set used for fixed-rate queues.
307*86d7f5d3SJohn Marino  *
308*86d7f5d3SJohn Marino  * For WF2Q+ support it also has 3 heaps holding dn_flow_queue:
309*86d7f5d3SJohn Marino  *  + not_eligible_heap, for queues whose start time is higher than the
310*86d7f5d3SJohn Marino  *    virtual time. Sorted by start time.
311*86d7f5d3SJohn Marino  *  + scheduler_heap, for queues eligible for scheduling.  Sorted by finish
312*86d7f5d3SJohn Marino  *    time.
313*86d7f5d3SJohn Marino  *  + idle_heap, all flows that are idle and can be removed.  We do that on
314*86d7f5d3SJohn Marino  *    each tick so we do not slow down too much operations during forwarding.
315*86d7f5d3SJohn Marino  */
316*86d7f5d3SJohn Marino struct dn_pipe {		/* a pipe */
317*86d7f5d3SJohn Marino     int pipe_nr;		/* number */
318*86d7f5d3SJohn Marino     int bandwidth;		/* really, bytes/tick. */
319*86d7f5d3SJohn Marino     int delay;			/* really, ticks */
320*86d7f5d3SJohn Marino 
321*86d7f5d3SJohn Marino     struct dn_pkt_queue p_queue;/* packets in delay line */
322*86d7f5d3SJohn Marino     LIST_ENTRY(dn_pipe) p_link;
323*86d7f5d3SJohn Marino 
324*86d7f5d3SJohn Marino     /* WF2Q+ */
325*86d7f5d3SJohn Marino     struct dn_heap scheduler_heap; /* top extract - key Finish time*/
326*86d7f5d3SJohn Marino     struct dn_heap not_eligible_heap; /* top extract- key Start time */
327*86d7f5d3SJohn Marino     struct dn_heap idle_heap;	/* random extract - key Start=Finish time */
328*86d7f5d3SJohn Marino 
329*86d7f5d3SJohn Marino     dn_key V;			/* virtual time */
330*86d7f5d3SJohn Marino     int sum;			/* sum of weights of all active sessions */
331*86d7f5d3SJohn Marino     int numbytes;		/* bits I can transmit (more or less). */
332*86d7f5d3SJohn Marino 
333*86d7f5d3SJohn Marino     dn_key sched_time;		/* time pipe was scheduled in ready_heap */
334*86d7f5d3SJohn Marino 
335*86d7f5d3SJohn Marino     struct dn_flow_set fs;	/* used with fixed-rate flows */
336*86d7f5d3SJohn Marino };
337*86d7f5d3SJohn Marino LIST_HEAD(dn_pipe_head, dn_pipe);
338*86d7f5d3SJohn Marino 
339*86d7f5d3SJohn Marino struct dn_sopt {
340*86d7f5d3SJohn Marino 	int	dn_sopt_name;
341*86d7f5d3SJohn Marino 	void	*dn_sopt_arg;
342*86d7f5d3SJohn Marino 	size_t	dn_sopt_arglen;
343*86d7f5d3SJohn Marino };
344*86d7f5d3SJohn Marino 
345*86d7f5d3SJohn Marino typedef int	ip_dn_ctl_t(struct dn_sopt *);
346*86d7f5d3SJohn Marino typedef int	ip_dn_io_t(struct mbuf *);
347*86d7f5d3SJohn Marino 
348*86d7f5d3SJohn Marino extern ip_dn_ctl_t	*ip_dn_ctl_ptr;
349*86d7f5d3SJohn Marino extern ip_dn_io_t	*ip_dn_io_ptr;
350*86d7f5d3SJohn Marino 
351*86d7f5d3SJohn Marino void	ip_dn_queue(struct mbuf *);
352*86d7f5d3SJohn Marino void	ip_dn_packet_free(struct dn_pkt *);
353*86d7f5d3SJohn Marino void	ip_dn_packet_redispatch(struct dn_pkt *);
354*86d7f5d3SJohn Marino int	ip_dn_sockopt(struct sockopt *);
355*86d7f5d3SJohn Marino 
356*86d7f5d3SJohn Marino #define	DUMMYNET_LOADED	(ip_dn_io_ptr != NULL)
357*86d7f5d3SJohn Marino 
358*86d7f5d3SJohn Marino #endif	/* _KERNEL */
359*86d7f5d3SJohn Marino 
360*86d7f5d3SJohn Marino struct dn_ioc_flowid {
361*86d7f5d3SJohn Marino     uint16_t type;	/* ETHERTYPE_ */
362*86d7f5d3SJohn Marino     uint16_t pad;
363*86d7f5d3SJohn Marino     union {
364*86d7f5d3SJohn Marino 	struct {
365*86d7f5d3SJohn Marino 	    uint32_t dst_ip;
366*86d7f5d3SJohn Marino 	    uint32_t src_ip;
367*86d7f5d3SJohn Marino 	    uint16_t dst_port;
368*86d7f5d3SJohn Marino 	    uint16_t src_port;
369*86d7f5d3SJohn Marino 	    uint8_t proto;
370*86d7f5d3SJohn Marino 	    uint8_t flags;
371*86d7f5d3SJohn Marino 	} ip;
372*86d7f5d3SJohn Marino 	uint8_t pad[64];
373*86d7f5d3SJohn Marino     } u;
374*86d7f5d3SJohn Marino };
375*86d7f5d3SJohn Marino 
376*86d7f5d3SJohn Marino struct dn_ioc_flowqueue {
377*86d7f5d3SJohn Marino     u_int len;
378*86d7f5d3SJohn Marino     u_int len_bytes;
379*86d7f5d3SJohn Marino 
380*86d7f5d3SJohn Marino     uint64_t tot_pkts;
381*86d7f5d3SJohn Marino     uint64_t tot_bytes;
382*86d7f5d3SJohn Marino     uint32_t drops;
383*86d7f5d3SJohn Marino 
384*86d7f5d3SJohn Marino     int hash_slot;		/* debugging/diagnostic */
385*86d7f5d3SJohn Marino     dn_key S;			/* virtual start time */
386*86d7f5d3SJohn Marino     dn_key F;			/* virtual finish time */
387*86d7f5d3SJohn Marino 
388*86d7f5d3SJohn Marino     struct dn_ioc_flowid id;
389*86d7f5d3SJohn Marino     uint8_t reserved[16];
390*86d7f5d3SJohn Marino };
391*86d7f5d3SJohn Marino 
392*86d7f5d3SJohn Marino struct dn_ioc_flowset {
393*86d7f5d3SJohn Marino     u_short fs_type;		/* DN_IS_{QUEUE,PIPE}, MUST be first */
394*86d7f5d3SJohn Marino 
395*86d7f5d3SJohn Marino     u_short fs_nr;		/* flow_set number */
396*86d7f5d3SJohn Marino     u_short flags_fs;		/* see 'Flow set flags' */
397*86d7f5d3SJohn Marino     u_short parent_nr;		/* parent pipe#, 0 if local to a pipe */
398*86d7f5d3SJohn Marino 
399*86d7f5d3SJohn Marino     int weight;			/* WFQ queue weight */
400*86d7f5d3SJohn Marino     int qsize;			/* queue size in slots or bytes */
401*86d7f5d3SJohn Marino     int plr;			/* pkt loss rate (2^31-1 means 100%) */
402*86d7f5d3SJohn Marino 
403*86d7f5d3SJohn Marino     /* Hash table information */
404*86d7f5d3SJohn Marino     int rq_size;		/* number of slots */
405*86d7f5d3SJohn Marino     int rq_elements;		/* active elements */
406*86d7f5d3SJohn Marino 
407*86d7f5d3SJohn Marino     /* RED parameters */
408*86d7f5d3SJohn Marino     int w_q;			/* queue weight (scaled) */
409*86d7f5d3SJohn Marino     int max_th;			/* maximum threshold for queue (scaled) */
410*86d7f5d3SJohn Marino     int min_th;			/* minimum threshold for queue (scaled) */
411*86d7f5d3SJohn Marino     int max_p;			/* maximum value for p_b (scaled) */
412*86d7f5d3SJohn Marino     int lookup_step;		/* granularity inside the lookup table */
413*86d7f5d3SJohn Marino     int lookup_weight;		/* equal to (1-w_q)^t / (1-w_q)^(t+1) */
414*86d7f5d3SJohn Marino 
415*86d7f5d3SJohn Marino     struct dn_ioc_flowid flow_mask;
416*86d7f5d3SJohn Marino     uint8_t reserved[16];
417*86d7f5d3SJohn Marino };
418*86d7f5d3SJohn Marino 
419*86d7f5d3SJohn Marino struct dn_ioc_pipe {
420*86d7f5d3SJohn Marino     struct dn_ioc_flowset fs;	/* MUST be first */
421*86d7f5d3SJohn Marino 
422*86d7f5d3SJohn Marino     int pipe_nr;		/* pipe number */
423*86d7f5d3SJohn Marino     int bandwidth;		/* bit/second */
424*86d7f5d3SJohn Marino     int delay;			/* milliseconds */
425*86d7f5d3SJohn Marino 
426*86d7f5d3SJohn Marino     dn_key V;			/* virtual time */
427*86d7f5d3SJohn Marino 
428*86d7f5d3SJohn Marino     uint8_t reserved[16];
429*86d7f5d3SJohn Marino };
430*86d7f5d3SJohn Marino 
431*86d7f5d3SJohn Marino /*
432*86d7f5d3SJohn Marino  * Flow set flags
433*86d7f5d3SJohn Marino  */
434*86d7f5d3SJohn Marino #define DN_HAVE_FLOW_MASK	0x0001
435*86d7f5d3SJohn Marino #define DN_IS_RED		0x0002
436*86d7f5d3SJohn Marino #define DN_IS_GENTLE_RED	0x0004
437*86d7f5d3SJohn Marino #define DN_QSIZE_IS_BYTES	0x0008	/* queue size is measured in bytes */
438*86d7f5d3SJohn Marino #define DN_NOERROR		0x0010	/* do not report ENOBUFS on drops */
439*86d7f5d3SJohn Marino #define DN_IS_PIPE		0x4000
440*86d7f5d3SJohn Marino #define DN_IS_QUEUE		0x8000
441*86d7f5d3SJohn Marino 
442*86d7f5d3SJohn Marino /*
443*86d7f5d3SJohn Marino  * Macros for RED
444*86d7f5d3SJohn Marino  */
445*86d7f5d3SJohn Marino #define SCALE_RED		16
446*86d7f5d3SJohn Marino #define SCALE(x)		((x) << SCALE_RED)
447*86d7f5d3SJohn Marino #define SCALE_VAL(x)		((x) >> SCALE_RED)
448*86d7f5d3SJohn Marino #define SCALE_MUL(x, y)		(((x) * (y)) >> SCALE_RED)
449*86d7f5d3SJohn Marino 
450*86d7f5d3SJohn Marino /*
451*86d7f5d3SJohn Marino  * Maximum pipe number
452*86d7f5d3SJohn Marino  */
453*86d7f5d3SJohn Marino #define DN_PIPE_NR_MAX		65536
454*86d7f5d3SJohn Marino 
455*86d7f5d3SJohn Marino #endif /* !_IP_DUMMYNET_H */
456