1 /* 2 * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa 3 * Portions Copyright (c) 2000 Akamba Corp. 4 * All rights reserved 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.24.2.22 2003/05/13 09:31:06 maxim Exp $ 28 * $DragonFly: src/sys/net/dummynet/ip_dummynet.c,v 1.4 2004/01/06 03:17:25 dillon Exp $ 29 */ 30 31 #if !defined(KLD_MODULE) 32 #include "opt_ipfw.h" /* for IPFW2 definition */ 33 #endif 34 35 #define DEB(x) 36 #define DDB(x) x 37 38 /* 39 * This module implements IP dummynet, a bandwidth limiter/delay emulator 40 * used in conjunction with the ipfw package. 41 * Description of the data structures used is in ip_dummynet.h 42 * Here you mainly find the following blocks of code: 43 * + variable declarations; 44 * + heap management functions; 45 * + scheduler and dummynet functions; 46 * + configuration and initialization. 47 * 48 * NOTA BENE: critical sections are protected by splimp()/splx() 49 * pairs. One would think that splnet() is enough as for most of 50 * the netinet code, but it is not so because when used with 51 * bridging, dummynet is invoked at splimp(). 52 * 53 * Most important Changes: 54 * 55 * 011004: KLDable 56 * 010124: Fixed WF2Q behaviour 57 * 010122: Fixed spl protection. 58 * 000601: WF2Q support 59 * 000106: large rewrite, use heaps to handle very many pipes. 60 * 980513: initial release 61 * 62 * include files marked with XXX are probably not needed 63 */ 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/malloc.h> 68 #include <sys/mbuf.h> 69 #include <sys/kernel.h> 70 #include <sys/module.h> 71 #include <sys/proc.h> 72 #include <sys/socket.h> 73 #include <sys/socketvar.h> 74 #include <sys/time.h> 75 #include <sys/sysctl.h> 76 #include <net/if.h> 77 #include <net/route.h> 78 #include <netinet/in.h> 79 #include <netinet/in_systm.h> 80 #include <netinet/in_var.h> 81 #include <netinet/ip.h> 82 #include <net/ipfw/ip_fw.h> 83 #include "ip_dummynet.h" 84 #include <netinet/ip_var.h> 85 86 #include <netinet/if_ether.h> /* for struct arpcom */ 87 #include <net/bridge/bridge.h> 88 89 /* 90 * We keep a private variable for the simulation time, but we could 91 * probably use an existing one ("softticks" in sys/kern/kern_timer.c) 92 */ 93 static dn_key curr_time = 0 ; /* current simulation time */ 94 95 static int dn_hash_size = 64 ; /* default hash size */ 96 97 /* statistics on number of queue searches and search steps */ 98 static int searches, search_steps ; 99 static int pipe_expire = 1 ; /* expire queue if empty */ 100 static int dn_max_ratio = 16 ; /* max queues/buckets ratio */ 101 102 static int red_lookup_depth = 256; /* RED - default lookup table depth */ 103 static int red_avg_pkt_size = 512; /* RED - default medium packet size */ 104 static int red_max_pkt_size = 1500; /* RED - default max packet size */ 105 106 /* 107 * Three heaps contain queues and pipes that the scheduler handles: 108 * 109 * ready_heap contains all dn_flow_queue related to fixed-rate pipes. 110 * 111 * wfq_ready_heap contains the pipes associated with WF2Q flows 112 * 113 * extract_heap contains pipes associated with delay lines. 114 * 115 */ 116 117 MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap"); 118 119 static struct dn_heap ready_heap, extract_heap, wfq_ready_heap ; 120 121 static int heap_init(struct dn_heap *h, int size) ; 122 static int heap_insert (struct dn_heap *h, dn_key key1, void *p); 123 static void heap_extract(struct dn_heap *h, void *obj); 124 125 static void transmit_event(struct dn_pipe *pipe); 126 static void ready_event(struct dn_flow_queue *q); 127 128 static struct dn_pipe *all_pipes = NULL ; /* list of all pipes */ 129 static struct dn_flow_set *all_flow_sets = NULL ;/* list of all flow_sets */ 130 131 static struct callout_handle dn_timeout; 132 133 #ifdef SYSCTL_NODE 134 SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, 135 CTLFLAG_RW, 0, "Dummynet"); 136 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size, 137 CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size"); 138 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, curr_time, 139 CTLFLAG_RD, &curr_time, 0, "Current tick"); 140 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap, 141 CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap"); 142 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap, 143 CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap"); 144 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, searches, 145 CTLFLAG_RD, &searches, 0, "Number of queue searches"); 146 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, search_steps, 147 CTLFLAG_RD, &search_steps, 0, "Number of queue search steps"); 148 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire, 149 CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty"); 150 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len, 151 CTLFLAG_RW, &dn_max_ratio, 0, 152 "Max ratio between dynamic queues and buckets"); 153 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth, 154 CTLFLAG_RD, &red_lookup_depth, 0, "Depth of RED lookup table"); 155 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size, 156 CTLFLAG_RD, &red_avg_pkt_size, 0, "RED Medium packet size"); 157 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size, 158 CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size"); 159 #endif 160 161 static int config_pipe(struct dn_pipe *p); 162 static int ip_dn_ctl(struct sockopt *sopt); 163 164 static void rt_unref(struct rtentry *); 165 static void dummynet(void *); 166 static void dummynet_flush(void); 167 void dummynet_drain(void); 168 static ip_dn_io_t dummynet_io; 169 static void dn_rule_delete(void *); 170 171 int if_tx_rdy(struct ifnet *ifp); 172 173 static void 174 rt_unref(struct rtentry *rt) 175 { 176 if (rt == NULL) 177 return ; 178 if (rt->rt_refcnt <= 0) 179 printf("-- warning, refcnt now %ld, decreasing\n", rt->rt_refcnt); 180 RTFREE(rt); 181 } 182 183 /* 184 * Heap management functions. 185 * 186 * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2. 187 * Some macros help finding parent/children so we can optimize them. 188 * 189 * heap_init() is called to expand the heap when needed. 190 * Increment size in blocks of 16 entries. 191 * XXX failure to allocate a new element is a pretty bad failure 192 * as we basically stall a whole queue forever!! 193 * Returns 1 on error, 0 on success 194 */ 195 #define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 ) 196 #define HEAP_LEFT(x) ( 2*(x) + 1 ) 197 #define HEAP_IS_LEFT(x) ( (x) & 1 ) 198 #define HEAP_RIGHT(x) ( 2*(x) + 2 ) 199 #define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; } 200 #define HEAP_INCREMENT 15 201 202 static int 203 heap_init(struct dn_heap *h, int new_size) 204 { 205 struct dn_heap_entry *p; 206 207 if (h->size >= new_size ) { 208 printf("heap_init, Bogus call, have %d want %d\n", 209 h->size, new_size); 210 return 0 ; 211 } 212 new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ; 213 p = malloc(new_size * sizeof(*p), M_DUMMYNET, M_NOWAIT); 214 if (p == NULL) { 215 printf(" heap_init, resize %d failed\n", new_size ); 216 return 1 ; /* error */ 217 } 218 if (h->size > 0) { 219 bcopy(h->p, p, h->size * sizeof(*p) ); 220 free(h->p, M_DUMMYNET); 221 } 222 h->p = p ; 223 h->size = new_size ; 224 return 0 ; 225 } 226 227 /* 228 * Insert element in heap. Normally, p != NULL, we insert p in 229 * a new position and bubble up. If p == NULL, then the element is 230 * already in place, and key is the position where to start the 231 * bubble-up. 232 * Returns 1 on failure (cannot allocate new heap entry) 233 * 234 * If offset > 0 the position (index, int) of the element in the heap is 235 * also stored in the element itself at the given offset in bytes. 236 */ 237 #define SET_OFFSET(heap, node) \ 238 if (heap->offset > 0) \ 239 *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ; 240 /* 241 * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value. 242 */ 243 #define RESET_OFFSET(heap, node) \ 244 if (heap->offset > 0) \ 245 *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ; 246 static int 247 heap_insert(struct dn_heap *h, dn_key key1, void *p) 248 { 249 int son = h->elements ; 250 251 if (p == NULL) /* data already there, set starting point */ 252 son = key1 ; 253 else { /* insert new element at the end, possibly resize */ 254 son = h->elements ; 255 if (son == h->size) /* need resize... */ 256 if (heap_init(h, h->elements+1) ) 257 return 1 ; /* failure... */ 258 h->p[son].object = p ; 259 h->p[son].key = key1 ; 260 h->elements++ ; 261 } 262 while (son > 0) { /* bubble up */ 263 int father = HEAP_FATHER(son) ; 264 struct dn_heap_entry tmp ; 265 266 if (DN_KEY_LT( h->p[father].key, h->p[son].key ) ) 267 break ; /* found right position */ 268 /* son smaller than father, swap and repeat */ 269 HEAP_SWAP(h->p[son], h->p[father], tmp) ; 270 SET_OFFSET(h, son); 271 son = father ; 272 } 273 SET_OFFSET(h, son); 274 return 0 ; 275 } 276 277 /* 278 * remove top element from heap, or obj if obj != NULL 279 */ 280 static void 281 heap_extract(struct dn_heap *h, void *obj) 282 { 283 int child, father, max = h->elements - 1 ; 284 285 if (max < 0) { 286 printf("warning, extract from empty heap 0x%p\n", h); 287 return ; 288 } 289 father = 0 ; /* default: move up smallest child */ 290 if (obj != NULL) { /* extract specific element, index is at offset */ 291 if (h->offset <= 0) 292 panic("*** heap_extract from middle not supported on this heap!!!\n"); 293 father = *((int *)((char *)obj + h->offset)) ; 294 if (father < 0 || father >= h->elements) { 295 printf("dummynet: heap_extract, father %d out of bound 0..%d\n", 296 father, h->elements); 297 panic("heap_extract"); 298 } 299 } 300 RESET_OFFSET(h, father); 301 child = HEAP_LEFT(father) ; /* left child */ 302 while (child <= max) { /* valid entry */ 303 if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) ) 304 child = child+1 ; /* take right child, otherwise left */ 305 h->p[father] = h->p[child] ; 306 SET_OFFSET(h, father); 307 father = child ; 308 child = HEAP_LEFT(child) ; /* left child for next loop */ 309 } 310 h->elements-- ; 311 if (father != max) { 312 /* 313 * Fill hole with last entry and bubble up, reusing the insert code 314 */ 315 h->p[father] = h->p[max] ; 316 heap_insert(h, father, NULL); /* this one cannot fail */ 317 } 318 } 319 320 #if 0 321 /* 322 * change object position and update references 323 * XXX this one is never used! 324 */ 325 static void 326 heap_move(struct dn_heap *h, dn_key new_key, void *object) 327 { 328 int temp; 329 int i ; 330 int max = h->elements-1 ; 331 struct dn_heap_entry buf ; 332 333 if (h->offset <= 0) 334 panic("cannot move items on this heap"); 335 336 i = *((int *)((char *)object + h->offset)); 337 if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */ 338 h->p[i].key = new_key ; 339 for (; i>0 && DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key) ; 340 i = temp ) { /* bubble up */ 341 HEAP_SWAP(h->p[i], h->p[temp], buf) ; 342 SET_OFFSET(h, i); 343 } 344 } else { /* must move down */ 345 h->p[i].key = new_key ; 346 while ( (temp = HEAP_LEFT(i)) <= max ) { /* found left child */ 347 if ((temp != max) && DN_KEY_GT(h->p[temp].key, h->p[temp+1].key)) 348 temp++ ; /* select child with min key */ 349 if (DN_KEY_GT(new_key, h->p[temp].key)) { /* go down */ 350 HEAP_SWAP(h->p[i], h->p[temp], buf) ; 351 SET_OFFSET(h, i); 352 } else 353 break ; 354 i = temp ; 355 } 356 } 357 SET_OFFSET(h, i); 358 } 359 #endif /* heap_move, unused */ 360 361 /* 362 * heapify() will reorganize data inside an array to maintain the 363 * heap property. It is needed when we delete a bunch of entries. 364 */ 365 static void 366 heapify(struct dn_heap *h) 367 { 368 int i ; 369 370 for (i = 0 ; i < h->elements ; i++ ) 371 heap_insert(h, i , NULL) ; 372 } 373 374 /* 375 * cleanup the heap and free data structure 376 */ 377 static void 378 heap_free(struct dn_heap *h) 379 { 380 if (h->size >0 ) 381 free(h->p, M_DUMMYNET); 382 bzero(h, sizeof(*h) ); 383 } 384 385 /* 386 * --- end of heap management functions --- 387 */ 388 389 /* 390 * Scheduler functions: 391 * 392 * transmit_event() is called when the delay-line needs to enter 393 * the scheduler, either because of existing pkts getting ready, 394 * or new packets entering the queue. The event handled is the delivery 395 * time of the packet. 396 * 397 * ready_event() does something similar with fixed-rate queues, and the 398 * event handled is the finish time of the head pkt. 399 * 400 * wfq_ready_event() does something similar with WF2Q queues, and the 401 * event handled is the start time of the head pkt. 402 * 403 * In all cases, we make sure that the data structures are consistent 404 * before passing pkts out, because this might trigger recursive 405 * invocations of the procedures. 406 */ 407 static void 408 transmit_event(struct dn_pipe *pipe) 409 { 410 struct dn_pkt *pkt ; 411 412 while ( (pkt = pipe->head) && DN_KEY_LEQ(pkt->output_time, curr_time) ) { 413 /* 414 * first unlink, then call procedures, since ip_input() can invoke 415 * ip_output() and viceversa, thus causing nested calls 416 */ 417 pipe->head = DN_NEXT(pkt) ; 418 419 /* 420 * The actual mbuf is preceded by a struct dn_pkt, resembling an mbuf 421 * (NOT A REAL one, just a small block of malloc'ed memory) with 422 * m_type = MT_TAG, m_flags = PACKET_TAG_DUMMYNET 423 * dn_m (m_next) = actual mbuf to be processed by ip_input/output 424 * and some other fields. 425 * The block IS FREED HERE because it contains parameters passed 426 * to the called routine. 427 */ 428 switch (pkt->dn_dir) { 429 case DN_TO_IP_OUT: 430 (void)ip_output((struct mbuf *)pkt, NULL, NULL, 0, NULL, NULL); 431 rt_unref (pkt->ro.ro_rt) ; 432 break ; 433 434 case DN_TO_IP_IN : 435 ip_input((struct mbuf *)pkt) ; 436 break ; 437 438 case DN_TO_BDG_FWD : 439 if (!BDG_LOADED) { 440 /* somebody unloaded the bridge module. Drop pkt */ 441 printf("-- dropping bridged packet trapped in pipe--\n"); 442 m_freem(pkt->dn_m); 443 break; 444 } /* fallthrough */ 445 case DN_TO_ETH_DEMUX: 446 { 447 struct mbuf *m = (struct mbuf *)pkt ; 448 struct ether_header *eh; 449 450 if (pkt->dn_m->m_len < ETHER_HDR_LEN && 451 (pkt->dn_m = m_pullup(pkt->dn_m, ETHER_HDR_LEN)) == NULL) { 452 printf("dummynet/bridge: pullup fail, dropping pkt\n"); 453 break; 454 } 455 /* 456 * same as ether_input, make eh be a pointer into the mbuf 457 */ 458 eh = mtod(pkt->dn_m, struct ether_header *); 459 m_adj(pkt->dn_m, ETHER_HDR_LEN); 460 /* 461 * bdg_forward() wants a pointer to the pseudo-mbuf-header, but 462 * on return it will supply the pointer to the actual packet 463 * (originally pkt->dn_m, but could be something else now) if 464 * it has not consumed it. 465 */ 466 if (pkt->dn_dir == DN_TO_BDG_FWD) { 467 m = bdg_forward_ptr(m, eh, pkt->ifp); 468 if (m) 469 m_freem(m); 470 } else 471 ether_demux(NULL, eh, m); /* which consumes the mbuf */ 472 } 473 break ; 474 case DN_TO_ETH_OUT: 475 ether_output_frame(pkt->ifp, (struct mbuf *)pkt); 476 break; 477 478 default: 479 printf("dummynet: bad switch %d!\n", pkt->dn_dir); 480 m_freem(pkt->dn_m); 481 break ; 482 } 483 free(pkt, M_DUMMYNET); 484 } 485 /* if there are leftover packets, put into the heap for next event */ 486 if ( (pkt = pipe->head) ) 487 heap_insert(&extract_heap, pkt->output_time, pipe ) ; 488 /* XXX should check errors on heap_insert, by draining the 489 * whole pipe p and hoping in the future we are more successful 490 */ 491 } 492 493 /* 494 * the following macro computes how many ticks we have to wait 495 * before being able to transmit a packet. The credit is taken from 496 * either a pipe (WF2Q) or a flow_queue (per-flow queueing) 497 */ 498 #define SET_TICKS(pkt, q, p) \ 499 (pkt->dn_m->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \ 500 p->bandwidth ; 501 502 /* 503 * extract pkt from queue, compute output time (could be now) 504 * and put into delay line (p_queue) 505 */ 506 static void 507 move_pkt(struct dn_pkt *pkt, struct dn_flow_queue *q, 508 struct dn_pipe *p, int len) 509 { 510 q->head = DN_NEXT(pkt) ; 511 q->len-- ; 512 q->len_bytes -= len ; 513 514 pkt->output_time = curr_time + p->delay ; 515 516 if (p->head == NULL) 517 p->head = pkt; 518 else 519 DN_NEXT(p->tail) = pkt; 520 p->tail = pkt; 521 DN_NEXT(p->tail) = NULL; 522 } 523 524 /* 525 * ready_event() is invoked every time the queue must enter the 526 * scheduler, either because the first packet arrives, or because 527 * a previously scheduled event fired. 528 * On invokation, drain as many pkts as possible (could be 0) and then 529 * if there are leftover packets reinsert the pkt in the scheduler. 530 */ 531 static void 532 ready_event(struct dn_flow_queue *q) 533 { 534 struct dn_pkt *pkt; 535 struct dn_pipe *p = q->fs->pipe ; 536 int p_was_empty ; 537 538 if (p == NULL) { 539 printf("ready_event- pipe is gone\n"); 540 return ; 541 } 542 p_was_empty = (p->head == NULL) ; 543 544 /* 545 * schedule fixed-rate queues linked to this pipe: 546 * Account for the bw accumulated since last scheduling, then 547 * drain as many pkts as allowed by q->numbytes and move to 548 * the delay line (in p) computing output time. 549 * bandwidth==0 (no limit) means we can drain the whole queue, 550 * setting len_scaled = 0 does the job. 551 */ 552 q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth; 553 while ( (pkt = q->head) != NULL ) { 554 int len = pkt->dn_m->m_pkthdr.len; 555 int len_scaled = p->bandwidth ? len*8*hz : 0 ; 556 if (len_scaled > q->numbytes ) 557 break ; 558 q->numbytes -= len_scaled ; 559 move_pkt(pkt, q, p, len); 560 } 561 /* 562 * If we have more packets queued, schedule next ready event 563 * (can only occur when bandwidth != 0, otherwise we would have 564 * flushed the whole queue in the previous loop). 565 * To this purpose we record the current time and compute how many 566 * ticks to go for the finish time of the packet. 567 */ 568 if ( (pkt = q->head) != NULL ) { /* this implies bandwidth != 0 */ 569 dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */ 570 q->sched_time = curr_time ; 571 heap_insert(&ready_heap, curr_time + t, (void *)q ); 572 /* XXX should check errors on heap_insert, and drain the whole 573 * queue on error hoping next time we are luckier. 574 */ 575 } else { /* RED needs to know when the queue becomes empty */ 576 q->q_time = curr_time; 577 q->numbytes = 0; 578 } 579 /* 580 * If the delay line was empty call transmit_event(p) now. 581 * Otherwise, the scheduler will take care of it. 582 */ 583 if (p_was_empty) 584 transmit_event(p); 585 } 586 587 /* 588 * Called when we can transmit packets on WF2Q queues. Take pkts out of 589 * the queues at their start time, and enqueue into the delay line. 590 * Packets are drained until p->numbytes < 0. As long as 591 * len_scaled >= p->numbytes, the packet goes into the delay line 592 * with a deadline p->delay. For the last packet, if p->numbytes<0, 593 * there is an additional delay. 594 */ 595 static void 596 ready_event_wfq(struct dn_pipe *p) 597 { 598 int p_was_empty = (p->head == NULL) ; 599 struct dn_heap *sch = &(p->scheduler_heap); 600 struct dn_heap *neh = &(p->not_eligible_heap) ; 601 602 if (p->if_name[0] == 0) /* tx clock is simulated */ 603 p->numbytes += ( curr_time - p->sched_time ) * p->bandwidth; 604 else { /* tx clock is for real, the ifq must be empty or this is a NOP */ 605 if (p->ifp && p->ifp->if_snd.ifq_head != NULL) 606 return ; 607 else { 608 DEB(printf("pipe %d ready from %s --\n", 609 p->pipe_nr, p->if_name);) 610 } 611 } 612 613 /* 614 * While we have backlogged traffic AND credit, we need to do 615 * something on the queue. 616 */ 617 while ( p->numbytes >=0 && (sch->elements>0 || neh->elements >0) ) { 618 if (sch->elements > 0) { /* have some eligible pkts to send out */ 619 struct dn_flow_queue *q = sch->p[0].object ; 620 struct dn_pkt *pkt = q->head; 621 struct dn_flow_set *fs = q->fs; 622 u_int64_t len = pkt->dn_m->m_pkthdr.len; 623 int len_scaled = p->bandwidth ? len*8*hz : 0 ; 624 625 heap_extract(sch, NULL); /* remove queue from heap */ 626 p->numbytes -= len_scaled ; 627 move_pkt(pkt, q, p, len); 628 629 p->V += (len<<MY_M) / p->sum ; /* update V */ 630 q->S = q->F ; /* update start time */ 631 if (q->len == 0) { /* Flow not backlogged any more */ 632 fs->backlogged-- ; 633 heap_insert(&(p->idle_heap), q->F, q); 634 } else { /* still backlogged */ 635 /* 636 * update F and position in backlogged queue, then 637 * put flow in not_eligible_heap (we will fix this later). 638 */ 639 len = (q->head)->dn_m->m_pkthdr.len; 640 q->F += (len<<MY_M)/(u_int64_t) fs->weight ; 641 if (DN_KEY_LEQ(q->S, p->V)) 642 heap_insert(neh, q->S, q); 643 else 644 heap_insert(sch, q->F, q); 645 } 646 } 647 /* 648 * now compute V = max(V, min(S_i)). Remember that all elements in sch 649 * have by definition S_i <= V so if sch is not empty, V is surely 650 * the max and we must not update it. Conversely, if sch is empty 651 * we only need to look at neh. 652 */ 653 if (sch->elements == 0 && neh->elements > 0) 654 p->V = MAX64 ( p->V, neh->p[0].key ); 655 /* move from neh to sch any packets that have become eligible */ 656 while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V) ) { 657 struct dn_flow_queue *q = neh->p[0].object ; 658 heap_extract(neh, NULL); 659 heap_insert(sch, q->F, q); 660 } 661 662 if (p->if_name[0] != '\0') {/* tx clock is from a real thing */ 663 p->numbytes = -1 ; /* mark not ready for I/O */ 664 break ; 665 } 666 } 667 if (sch->elements == 0 && neh->elements == 0 && p->numbytes >= 0 668 && p->idle_heap.elements > 0) { 669 /* 670 * no traffic and no events scheduled. We can get rid of idle-heap. 671 */ 672 int i ; 673 674 for (i = 0 ; i < p->idle_heap.elements ; i++) { 675 struct dn_flow_queue *q = p->idle_heap.p[i].object ; 676 677 q->F = 0 ; 678 q->S = q->F + 1 ; 679 } 680 p->sum = 0 ; 681 p->V = 0 ; 682 p->idle_heap.elements = 0 ; 683 } 684 /* 685 * If we are getting clocks from dummynet (not a real interface) and 686 * If we are under credit, schedule the next ready event. 687 * Also fix the delivery time of the last packet. 688 */ 689 if (p->if_name[0]==0 && p->numbytes < 0) { /* this implies bandwidth >0 */ 690 dn_key t=0 ; /* number of ticks i have to wait */ 691 692 if (p->bandwidth > 0) 693 t = ( p->bandwidth -1 - p->numbytes) / p->bandwidth ; 694 p->tail->output_time += t ; 695 p->sched_time = curr_time ; 696 heap_insert(&wfq_ready_heap, curr_time + t, (void *)p); 697 /* XXX should check errors on heap_insert, and drain the whole 698 * queue on error hoping next time we are luckier. 699 */ 700 } 701 /* 702 * If the delay line was empty call transmit_event(p) now. 703 * Otherwise, the scheduler will take care of it. 704 */ 705 if (p_was_empty) 706 transmit_event(p); 707 } 708 709 /* 710 * This is called once per tick, or HZ times per second. It is used to 711 * increment the current tick counter and schedule expired events. 712 */ 713 static void 714 dummynet(void * __unused unused) 715 { 716 void *p ; /* generic parameter to handler */ 717 struct dn_heap *h ; 718 int s ; 719 struct dn_heap *heaps[3]; 720 int i; 721 struct dn_pipe *pe ; 722 723 heaps[0] = &ready_heap ; /* fixed-rate queues */ 724 heaps[1] = &wfq_ready_heap ; /* wfq queues */ 725 heaps[2] = &extract_heap ; /* delay line */ 726 s = splimp(); /* see note on top, splnet() is not enough */ 727 curr_time++ ; 728 for (i=0; i < 3 ; i++) { 729 h = heaps[i]; 730 while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) { 731 DDB(if (h->p[0].key > curr_time) 732 printf("-- dummynet: warning, heap %d is %d ticks late\n", 733 i, (int)(curr_time - h->p[0].key));) 734 p = h->p[0].object ; /* store a copy before heap_extract */ 735 heap_extract(h, NULL); /* need to extract before processing */ 736 if (i == 0) 737 ready_event(p) ; 738 else if (i == 1) { 739 struct dn_pipe *pipe = p; 740 if (pipe->if_name[0] != '\0') 741 printf("*** bad ready_event_wfq for pipe %s\n", 742 pipe->if_name); 743 else 744 ready_event_wfq(p) ; 745 } else 746 transmit_event(p); 747 } 748 } 749 /* sweep pipes trying to expire idle flow_queues */ 750 for (pe = all_pipes; pe ; pe = pe->next ) 751 if (pe->idle_heap.elements > 0 && 752 DN_KEY_LT(pe->idle_heap.p[0].key, pe->V) ) { 753 struct dn_flow_queue *q = pe->idle_heap.p[0].object ; 754 755 heap_extract(&(pe->idle_heap), NULL); 756 q->S = q->F + 1 ; /* mark timestamp as invalid */ 757 pe->sum -= q->fs->weight ; 758 } 759 splx(s); 760 dn_timeout = timeout(dummynet, NULL, 1); 761 } 762 763 /* 764 * called by an interface when tx_rdy occurs. 765 */ 766 int 767 if_tx_rdy(struct ifnet *ifp) 768 { 769 struct dn_pipe *p; 770 771 for (p = all_pipes; p ; p = p->next ) 772 if (p->ifp == ifp) 773 break ; 774 if (p == NULL) { 775 for (p = all_pipes; p ; p = p->next ) 776 if (!strcmp(p->if_name, ifp->if_xname) ) { 777 p->ifp = ifp ; 778 DEB(printf("++ tx rdy from %s (now found)\n", ifp->if_xname);) 779 break ; 780 } 781 } 782 if (p != NULL) { 783 DEB(printf("++ tx rdy from %s - qlen %d\n", ifp->if_xname, 784 ifp->if_snd.ifq_len);) 785 p->numbytes = 0 ; /* mark ready for I/O */ 786 ready_event_wfq(p); 787 } 788 return 0; 789 } 790 791 /* 792 * Unconditionally expire empty queues in case of shortage. 793 * Returns the number of queues freed. 794 */ 795 static int 796 expire_queues(struct dn_flow_set *fs) 797 { 798 struct dn_flow_queue *q, *prev ; 799 int i, initial_elements = fs->rq_elements ; 800 801 if (fs->last_expired == time_second) 802 return 0 ; 803 fs->last_expired = time_second ; 804 for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */ 805 for (prev=NULL, q = fs->rq[i] ; q != NULL ; ) 806 if (q->head != NULL || q->S != q->F+1) { 807 prev = q ; 808 q = q->next ; 809 } else { /* entry is idle, expire it */ 810 struct dn_flow_queue *old_q = q ; 811 812 if (prev != NULL) 813 prev->next = q = q->next ; 814 else 815 fs->rq[i] = q = q->next ; 816 fs->rq_elements-- ; 817 free(old_q, M_DUMMYNET); 818 } 819 return initial_elements - fs->rq_elements ; 820 } 821 822 /* 823 * If room, create a new queue and put at head of slot i; 824 * otherwise, create or use the default queue. 825 */ 826 static struct dn_flow_queue * 827 create_queue(struct dn_flow_set *fs, int i) 828 { 829 struct dn_flow_queue *q ; 830 831 if (fs->rq_elements > fs->rq_size * dn_max_ratio && 832 expire_queues(fs) == 0) { 833 /* 834 * No way to get room, use or create overflow queue. 835 */ 836 i = fs->rq_size ; 837 if ( fs->rq[i] != NULL ) 838 return fs->rq[i] ; 839 } 840 q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO); 841 if (q == NULL) { 842 printf("sorry, cannot allocate queue for new flow\n"); 843 return NULL ; 844 } 845 q->fs = fs ; 846 q->hash_slot = i ; 847 q->next = fs->rq[i] ; 848 q->S = q->F + 1; /* hack - mark timestamp as invalid */ 849 fs->rq[i] = q ; 850 fs->rq_elements++ ; 851 return q ; 852 } 853 854 /* 855 * Given a flow_set and a pkt in last_pkt, find a matching queue 856 * after appropriate masking. The queue is moved to front 857 * so that further searches take less time. 858 */ 859 static struct dn_flow_queue * 860 find_queue(struct dn_flow_set *fs, struct ipfw_flow_id *id) 861 { 862 int i = 0 ; /* we need i and q for new allocations */ 863 struct dn_flow_queue *q, *prev; 864 865 if ( !(fs->flags_fs & DN_HAVE_FLOW_MASK) ) 866 q = fs->rq[0] ; 867 else { 868 /* first, do the masking */ 869 id->dst_ip &= fs->flow_mask.dst_ip ; 870 id->src_ip &= fs->flow_mask.src_ip ; 871 id->dst_port &= fs->flow_mask.dst_port ; 872 id->src_port &= fs->flow_mask.src_port ; 873 id->proto &= fs->flow_mask.proto ; 874 id->flags = 0 ; /* we don't care about this one */ 875 /* then, hash function */ 876 i = ( (id->dst_ip) & 0xffff ) ^ 877 ( (id->dst_ip >> 15) & 0xffff ) ^ 878 ( (id->src_ip << 1) & 0xffff ) ^ 879 ( (id->src_ip >> 16 ) & 0xffff ) ^ 880 (id->dst_port << 1) ^ (id->src_port) ^ 881 (id->proto ); 882 i = i % fs->rq_size ; 883 /* finally, scan the current list for a match */ 884 searches++ ; 885 for (prev=NULL, q = fs->rq[i] ; q ; ) { 886 search_steps++; 887 if (id->dst_ip == q->id.dst_ip && 888 id->src_ip == q->id.src_ip && 889 id->dst_port == q->id.dst_port && 890 id->src_port == q->id.src_port && 891 id->proto == q->id.proto && 892 id->flags == q->id.flags) 893 break ; /* found */ 894 else if (pipe_expire && q->head == NULL && q->S == q->F+1 ) { 895 /* entry is idle and not in any heap, expire it */ 896 struct dn_flow_queue *old_q = q ; 897 898 if (prev != NULL) 899 prev->next = q = q->next ; 900 else 901 fs->rq[i] = q = q->next ; 902 fs->rq_elements-- ; 903 free(old_q, M_DUMMYNET); 904 continue ; 905 } 906 prev = q ; 907 q = q->next ; 908 } 909 if (q && prev != NULL) { /* found and not in front */ 910 prev->next = q->next ; 911 q->next = fs->rq[i] ; 912 fs->rq[i] = q ; 913 } 914 } 915 if (q == NULL) { /* no match, need to allocate a new entry */ 916 q = create_queue(fs, i); 917 if (q != NULL) 918 q->id = *id ; 919 } 920 return q ; 921 } 922 923 static int 924 red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len) 925 { 926 /* 927 * RED algorithm 928 * 929 * RED calculates the average queue size (avg) using a low-pass filter 930 * with an exponential weighted (w_q) moving average: 931 * avg <- (1-w_q) * avg + w_q * q_size 932 * where q_size is the queue length (measured in bytes or * packets). 933 * 934 * If q_size == 0, we compute the idle time for the link, and set 935 * avg = (1 - w_q)^(idle/s) 936 * where s is the time needed for transmitting a medium-sized packet. 937 * 938 * Now, if avg < min_th the packet is enqueued. 939 * If avg > max_th the packet is dropped. Otherwise, the packet is 940 * dropped with probability P function of avg. 941 * 942 */ 943 944 int64_t p_b = 0; 945 /* queue in bytes or packets ? */ 946 u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ? q->len_bytes : q->len; 947 948 DEB(printf("\n%d q: %2u ", (int) curr_time, q_size);) 949 950 /* average queue size estimation */ 951 if (q_size != 0) { 952 /* 953 * queue is not empty, avg <- avg + (q_size - avg) * w_q 954 */ 955 int diff = SCALE(q_size) - q->avg; 956 int64_t v = SCALE_MUL((int64_t) diff, (int64_t) fs->w_q); 957 958 q->avg += (int) v; 959 } else { 960 /* 961 * queue is empty, find for how long the queue has been 962 * empty and use a lookup table for computing 963 * (1 - * w_q)^(idle_time/s) where s is the time to send a 964 * (small) packet. 965 * XXX check wraps... 966 */ 967 if (q->avg) { 968 u_int t = (curr_time - q->q_time) / fs->lookup_step; 969 970 q->avg = (t < fs->lookup_depth) ? 971 SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0; 972 } 973 } 974 DEB(printf("avg: %u ", SCALE_VAL(q->avg));) 975 976 /* should i drop ? */ 977 978 if (q->avg < fs->min_th) { 979 q->count = -1; 980 return 0; /* accept packet ; */ 981 } 982 if (q->avg >= fs->max_th) { /* average queue >= max threshold */ 983 if (fs->flags_fs & DN_IS_GENTLE_RED) { 984 /* 985 * According to Gentle-RED, if avg is greater than max_th the 986 * packet is dropped with a probability 987 * p_b = c_3 * avg - c_4 988 * where c_3 = (1 - max_p) / max_th, and c_4 = 1 - 2 * max_p 989 */ 990 p_b = SCALE_MUL((int64_t) fs->c_3, (int64_t) q->avg) - fs->c_4; 991 } else { 992 q->count = -1; 993 printf("- drop"); 994 return 1 ; 995 } 996 } else if (q->avg > fs->min_th) { 997 /* 998 * we compute p_b using the linear dropping function p_b = c_1 * 999 * avg - c_2, where c_1 = max_p / (max_th - min_th), and c_2 = 1000 * max_p * min_th / (max_th - min_th) 1001 */ 1002 p_b = SCALE_MUL((int64_t) fs->c_1, (int64_t) q->avg) - fs->c_2; 1003 } 1004 if (fs->flags_fs & DN_QSIZE_IS_BYTES) 1005 p_b = (p_b * len) / fs->max_pkt_size; 1006 if (++q->count == 0) 1007 q->random = random() & 0xffff; 1008 else { 1009 /* 1010 * q->count counts packets arrived since last drop, so a greater 1011 * value of q->count means a greater packet drop probability. 1012 */ 1013 if (SCALE_MUL(p_b, SCALE((int64_t) q->count)) > q->random) { 1014 q->count = 0; 1015 DEB(printf("- red drop");) 1016 /* after a drop we calculate a new random value */ 1017 q->random = random() & 0xffff; 1018 return 1; /* drop */ 1019 } 1020 } 1021 /* end of RED algorithm */ 1022 return 0 ; /* accept */ 1023 } 1024 1025 static __inline 1026 struct dn_flow_set * 1027 locate_flowset(int pipe_nr, struct ip_fw *rule) 1028 { 1029 #if IPFW2 1030 struct dn_flow_set *fs; 1031 ipfw_insn *cmd = rule->cmd + rule->act_ofs; 1032 1033 if (cmd->opcode == O_LOG) 1034 cmd += F_LEN(cmd); 1035 fs = ((ipfw_insn_pipe *)cmd)->pipe_ptr; 1036 1037 if (fs != NULL) 1038 return fs; 1039 1040 if (cmd->opcode == O_QUEUE) 1041 #else /* !IPFW2 */ 1042 struct dn_flow_set *fs = NULL ; 1043 1044 if ( (rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_QUEUE ) 1045 #endif /* !IPFW2 */ 1046 for (fs=all_flow_sets; fs && fs->fs_nr != pipe_nr; fs=fs->next) 1047 ; 1048 else { 1049 struct dn_pipe *p1; 1050 for (p1 = all_pipes; p1 && p1->pipe_nr != pipe_nr; p1 = p1->next) 1051 ; 1052 if (p1 != NULL) 1053 fs = &(p1->fs) ; 1054 } 1055 /* record for the future */ 1056 #if IPFW2 1057 ((ipfw_insn_pipe *)cmd)->pipe_ptr = fs; 1058 #else 1059 if (fs != NULL) 1060 rule->pipe_ptr = fs; 1061 #endif 1062 return fs ; 1063 } 1064 1065 /* 1066 * dummynet hook for packets. Below 'pipe' is a pipe or a queue 1067 * depending on whether WF2Q or fixed bw is used. 1068 * 1069 * pipe_nr pipe or queue the packet is destined for. 1070 * dir where shall we send the packet after dummynet. 1071 * m the mbuf with the packet 1072 * ifp the 'ifp' parameter from the caller. 1073 * NULL in ip_input, destination interface in ip_output, 1074 * real_dst in bdg_forward 1075 * ro route parameter (only used in ip_output, NULL otherwise) 1076 * dst destination address, only used by ip_output 1077 * rule matching rule, in case of multiple passes 1078 * flags flags from the caller, only used in ip_output 1079 * 1080 */ 1081 static int 1082 dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) 1083 { 1084 struct dn_pkt *pkt; 1085 struct dn_flow_set *fs; 1086 struct dn_pipe *pipe ; 1087 u_int64_t len = m->m_pkthdr.len ; 1088 struct dn_flow_queue *q = NULL ; 1089 int s = splimp(); 1090 int is_pipe; 1091 #if IPFW2 1092 ipfw_insn *cmd = fwa->rule->cmd + fwa->rule->act_ofs; 1093 1094 if (cmd->opcode == O_LOG) 1095 cmd += F_LEN(cmd); 1096 is_pipe = (cmd->opcode == O_PIPE); 1097 #else 1098 is_pipe = (fwa->rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_PIPE; 1099 #endif 1100 1101 pipe_nr &= 0xffff ; 1102 1103 /* 1104 * this is a dummynet rule, so we expect a O_PIPE or O_QUEUE rule 1105 */ 1106 fs = locate_flowset(pipe_nr, fwa->rule); 1107 if (fs == NULL) 1108 goto dropit ; /* this queue/pipe does not exist! */ 1109 pipe = fs->pipe ; 1110 if (pipe == NULL) { /* must be a queue, try find a matching pipe */ 1111 for (pipe = all_pipes; pipe && pipe->pipe_nr != fs->parent_nr; 1112 pipe = pipe->next) 1113 ; 1114 if (pipe != NULL) 1115 fs->pipe = pipe ; 1116 else { 1117 printf("No pipe %d for queue %d, drop pkt\n", 1118 fs->parent_nr, fs->fs_nr); 1119 goto dropit ; 1120 } 1121 } 1122 q = find_queue(fs, &(fwa->f_id)); 1123 if ( q == NULL ) 1124 goto dropit ; /* cannot allocate queue */ 1125 /* 1126 * update statistics, then check reasons to drop pkt 1127 */ 1128 q->tot_bytes += len ; 1129 q->tot_pkts++ ; 1130 if ( fs->plr && random() < fs->plr ) 1131 goto dropit ; /* random pkt drop */ 1132 if ( fs->flags_fs & DN_QSIZE_IS_BYTES) { 1133 if (q->len_bytes > fs->qsize) 1134 goto dropit ; /* queue size overflow */ 1135 } else { 1136 if (q->len >= fs->qsize) 1137 goto dropit ; /* queue count overflow */ 1138 } 1139 if ( fs->flags_fs & DN_IS_RED && red_drops(fs, q, len) ) 1140 goto dropit ; 1141 1142 /* XXX expensive to zero, see if we can remove it*/ 1143 pkt = (struct dn_pkt *)malloc(sizeof (*pkt), M_DUMMYNET, M_NOWAIT|M_ZERO); 1144 if ( pkt == NULL ) 1145 goto dropit ; /* cannot allocate packet header */ 1146 /* ok, i can handle the pkt now... */ 1147 /* build and enqueue packet + parameters */ 1148 pkt->hdr.mh_type = MT_TAG; 1149 pkt->hdr.mh_flags = PACKET_TAG_DUMMYNET; 1150 pkt->rule = fwa->rule ; 1151 DN_NEXT(pkt) = NULL; 1152 pkt->dn_m = m; 1153 pkt->dn_dir = dir ; 1154 1155 pkt->ifp = fwa->oif; 1156 if (dir == DN_TO_IP_OUT) { 1157 /* 1158 * We need to copy *ro because for ICMP pkts (and maybe others) 1159 * the caller passed a pointer into the stack; dst might also be 1160 * a pointer into *ro so it needs to be updated. 1161 */ 1162 pkt->ro = *(fwa->ro); 1163 if (fwa->ro->ro_rt) 1164 fwa->ro->ro_rt->rt_refcnt++ ; 1165 if (fwa->dst == (struct sockaddr_in *)&fwa->ro->ro_dst) /* dst points into ro */ 1166 fwa->dst = (struct sockaddr_in *)&(pkt->ro.ro_dst) ; 1167 1168 pkt->dn_dst = fwa->dst; 1169 pkt->flags = fwa->flags; 1170 } 1171 if (q->head == NULL) 1172 q->head = pkt; 1173 else 1174 DN_NEXT(q->tail) = pkt; 1175 q->tail = pkt; 1176 q->len++; 1177 q->len_bytes += len ; 1178 1179 if ( q->head != pkt ) /* flow was not idle, we are done */ 1180 goto done; 1181 /* 1182 * If we reach this point the flow was previously idle, so we need 1183 * to schedule it. This involves different actions for fixed-rate or 1184 * WF2Q queues. 1185 */ 1186 if (is_pipe) { 1187 /* 1188 * Fixed-rate queue: just insert into the ready_heap. 1189 */ 1190 dn_key t = 0 ; 1191 if (pipe->bandwidth) 1192 t = SET_TICKS(pkt, q, pipe); 1193 q->sched_time = curr_time ; 1194 if (t == 0) /* must process it now */ 1195 ready_event( q ); 1196 else 1197 heap_insert(&ready_heap, curr_time + t , q ); 1198 } else { 1199 /* 1200 * WF2Q. First, compute start time S: if the flow was idle (S=F+1) 1201 * set S to the virtual time V for the controlling pipe, and update 1202 * the sum of weights for the pipe; otherwise, remove flow from 1203 * idle_heap and set S to max(F,V). 1204 * Second, compute finish time F = S + len/weight. 1205 * Third, if pipe was idle, update V=max(S, V). 1206 * Fourth, count one more backlogged flow. 1207 */ 1208 if (DN_KEY_GT(q->S, q->F)) { /* means timestamps are invalid */ 1209 q->S = pipe->V ; 1210 pipe->sum += fs->weight ; /* add weight of new queue */ 1211 } else { 1212 heap_extract(&(pipe->idle_heap), q); 1213 q->S = MAX64(q->F, pipe->V ) ; 1214 } 1215 q->F = q->S + ( len<<MY_M )/(u_int64_t) fs->weight; 1216 1217 if (pipe->not_eligible_heap.elements == 0 && 1218 pipe->scheduler_heap.elements == 0) 1219 pipe->V = MAX64 ( q->S, pipe->V ); 1220 fs->backlogged++ ; 1221 /* 1222 * Look at eligibility. A flow is not eligibile if S>V (when 1223 * this happens, it means that there is some other flow already 1224 * scheduled for the same pipe, so the scheduler_heap cannot be 1225 * empty). If the flow is not eligible we just store it in the 1226 * not_eligible_heap. Otherwise, we store in the scheduler_heap 1227 * and possibly invoke ready_event_wfq() right now if there is 1228 * leftover credit. 1229 * Note that for all flows in scheduler_heap (SCH), S_i <= V, 1230 * and for all flows in not_eligible_heap (NEH), S_i > V . 1231 * So when we need to compute max( V, min(S_i) ) forall i in SCH+NEH, 1232 * we only need to look into NEH. 1233 */ 1234 if (DN_KEY_GT(q->S, pipe->V) ) { /* not eligible */ 1235 if (pipe->scheduler_heap.elements == 0) 1236 printf("++ ouch! not eligible but empty scheduler!\n"); 1237 heap_insert(&(pipe->not_eligible_heap), q->S, q); 1238 } else { 1239 heap_insert(&(pipe->scheduler_heap), q->F, q); 1240 if (pipe->numbytes >= 0) { /* pipe is idle */ 1241 if (pipe->scheduler_heap.elements != 1) 1242 printf("*** OUCH! pipe should have been idle!\n"); 1243 DEB(printf("Waking up pipe %d at %d\n", 1244 pipe->pipe_nr, (int)(q->F >> MY_M)); ) 1245 pipe->sched_time = curr_time ; 1246 ready_event_wfq(pipe); 1247 } 1248 } 1249 } 1250 done: 1251 splx(s); 1252 return 0; 1253 1254 dropit: 1255 splx(s); 1256 if (q) 1257 q->drops++ ; 1258 m_freem(m); 1259 return ( (fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS); 1260 } 1261 1262 /* 1263 * Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT) 1264 * Doing this would probably save us the initial bzero of dn_pkt 1265 */ 1266 #define DN_FREE_PKT(pkt) { \ 1267 struct dn_pkt *n = pkt ; \ 1268 rt_unref ( n->ro.ro_rt ) ; \ 1269 m_freem(n->dn_m); \ 1270 pkt = DN_NEXT(n) ; \ 1271 free(n, M_DUMMYNET) ; } 1272 1273 /* 1274 * Dispose all packets and flow_queues on a flow_set. 1275 * If all=1, also remove red lookup table and other storage, 1276 * including the descriptor itself. 1277 * For the one in dn_pipe MUST also cleanup ready_heap... 1278 */ 1279 static void 1280 purge_flow_set(struct dn_flow_set *fs, int all) 1281 { 1282 struct dn_pkt *pkt ; 1283 struct dn_flow_queue *q, *qn ; 1284 int i ; 1285 1286 for (i = 0 ; i <= fs->rq_size ; i++ ) { 1287 for (q = fs->rq[i] ; q ; q = qn ) { 1288 for (pkt = q->head ; pkt ; ) 1289 DN_FREE_PKT(pkt) ; 1290 qn = q->next ; 1291 free(q, M_DUMMYNET); 1292 } 1293 fs->rq[i] = NULL ; 1294 } 1295 fs->rq_elements = 0 ; 1296 if (all) { 1297 /* RED - free lookup table */ 1298 if (fs->w_q_lookup) 1299 free(fs->w_q_lookup, M_DUMMYNET); 1300 if (fs->rq) 1301 free(fs->rq, M_DUMMYNET); 1302 /* if this fs is not part of a pipe, free it */ 1303 if (fs->pipe && fs != &(fs->pipe->fs) ) 1304 free(fs, M_DUMMYNET); 1305 } 1306 } 1307 1308 /* 1309 * Dispose all packets queued on a pipe (not a flow_set). 1310 * Also free all resources associated to a pipe, which is about 1311 * to be deleted. 1312 */ 1313 static void 1314 purge_pipe(struct dn_pipe *pipe) 1315 { 1316 struct dn_pkt *pkt ; 1317 1318 purge_flow_set( &(pipe->fs), 1 ); 1319 1320 for (pkt = pipe->head ; pkt ; ) 1321 DN_FREE_PKT(pkt) ; 1322 1323 heap_free( &(pipe->scheduler_heap) ); 1324 heap_free( &(pipe->not_eligible_heap) ); 1325 heap_free( &(pipe->idle_heap) ); 1326 } 1327 1328 /* 1329 * Delete all pipes and heaps returning memory. Must also 1330 * remove references from all ipfw rules to all pipes. 1331 */ 1332 static void 1333 dummynet_flush() 1334 { 1335 struct dn_pipe *curr_p, *p ; 1336 struct dn_flow_set *fs, *curr_fs; 1337 int s ; 1338 1339 s = splimp() ; 1340 1341 /* remove all references to pipes ...*/ 1342 flush_pipe_ptrs(NULL); 1343 /* prevent future matches... */ 1344 p = all_pipes ; 1345 all_pipes = NULL ; 1346 fs = all_flow_sets ; 1347 all_flow_sets = NULL ; 1348 /* and free heaps so we don't have unwanted events */ 1349 heap_free(&ready_heap); 1350 heap_free(&wfq_ready_heap); 1351 heap_free(&extract_heap); 1352 splx(s) ; 1353 /* 1354 * Now purge all queued pkts and delete all pipes 1355 */ 1356 /* scan and purge all flow_sets. */ 1357 for ( ; fs ; ) { 1358 curr_fs = fs ; 1359 fs = fs->next ; 1360 purge_flow_set(curr_fs, 1); 1361 } 1362 for ( ; p ; ) { 1363 purge_pipe(p); 1364 curr_p = p ; 1365 p = p->next ; 1366 free(curr_p, M_DUMMYNET); 1367 } 1368 } 1369 1370 1371 extern struct ip_fw *ip_fw_default_rule ; 1372 static void 1373 dn_rule_delete_fs(struct dn_flow_set *fs, void *r) 1374 { 1375 int i ; 1376 struct dn_flow_queue *q ; 1377 struct dn_pkt *pkt ; 1378 1379 for (i = 0 ; i <= fs->rq_size ; i++) /* last one is ovflow */ 1380 for (q = fs->rq[i] ; q ; q = q->next ) 1381 for (pkt = q->head ; pkt ; pkt = DN_NEXT(pkt) ) 1382 if (pkt->rule == r) 1383 pkt->rule = ip_fw_default_rule ; 1384 } 1385 /* 1386 * when a firewall rule is deleted, scan all queues and remove the flow-id 1387 * from packets matching this rule. 1388 */ 1389 void 1390 dn_rule_delete(void *r) 1391 { 1392 struct dn_pipe *p ; 1393 struct dn_pkt *pkt ; 1394 struct dn_flow_set *fs ; 1395 1396 /* 1397 * If the rule references a queue (dn_flow_set), then scan 1398 * the flow set, otherwise scan pipes. Should do either, but doing 1399 * both does not harm. 1400 */ 1401 for ( fs = all_flow_sets ; fs ; fs = fs->next ) 1402 dn_rule_delete_fs(fs, r); 1403 for ( p = all_pipes ; p ; p = p->next ) { 1404 fs = &(p->fs) ; 1405 dn_rule_delete_fs(fs, r); 1406 for (pkt = p->head ; pkt ; pkt = DN_NEXT(pkt) ) 1407 if (pkt->rule == r) 1408 pkt->rule = ip_fw_default_rule ; 1409 } 1410 } 1411 1412 /* 1413 * setup RED parameters 1414 */ 1415 static int 1416 config_red(struct dn_flow_set *p, struct dn_flow_set * x) 1417 { 1418 int i; 1419 1420 x->w_q = p->w_q; 1421 x->min_th = SCALE(p->min_th); 1422 x->max_th = SCALE(p->max_th); 1423 x->max_p = p->max_p; 1424 1425 x->c_1 = p->max_p / (p->max_th - p->min_th); 1426 x->c_2 = SCALE_MUL(x->c_1, SCALE(p->min_th)); 1427 if (x->flags_fs & DN_IS_GENTLE_RED) { 1428 x->c_3 = (SCALE(1) - p->max_p) / p->max_th; 1429 x->c_4 = (SCALE(1) - 2 * p->max_p); 1430 } 1431 1432 /* if the lookup table already exist, free and create it again */ 1433 if (x->w_q_lookup) { 1434 free(x->w_q_lookup, M_DUMMYNET); 1435 x->w_q_lookup = NULL ; 1436 } 1437 if (red_lookup_depth == 0) { 1438 printf("\nnet.inet.ip.dummynet.red_lookup_depth must be > 0"); 1439 free(x, M_DUMMYNET); 1440 return EINVAL; 1441 } 1442 x->lookup_depth = red_lookup_depth; 1443 x->w_q_lookup = (u_int *) malloc(x->lookup_depth * sizeof(int), 1444 M_DUMMYNET, M_NOWAIT); 1445 if (x->w_q_lookup == NULL) { 1446 printf("sorry, cannot allocate red lookup table\n"); 1447 free(x, M_DUMMYNET); 1448 return ENOSPC; 1449 } 1450 1451 /* fill the lookup table with (1 - w_q)^x */ 1452 x->lookup_step = p->lookup_step ; 1453 x->lookup_weight = p->lookup_weight ; 1454 x->w_q_lookup[0] = SCALE(1) - x->w_q; 1455 for (i = 1; i < x->lookup_depth; i++) 1456 x->w_q_lookup[i] = SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight); 1457 if (red_avg_pkt_size < 1) 1458 red_avg_pkt_size = 512 ; 1459 x->avg_pkt_size = red_avg_pkt_size ; 1460 if (red_max_pkt_size < 1) 1461 red_max_pkt_size = 1500 ; 1462 x->max_pkt_size = red_max_pkt_size ; 1463 return 0 ; 1464 } 1465 1466 static int 1467 alloc_hash(struct dn_flow_set *x, struct dn_flow_set *pfs) 1468 { 1469 if (x->flags_fs & DN_HAVE_FLOW_MASK) { /* allocate some slots */ 1470 int l = pfs->rq_size; 1471 1472 if (l == 0) 1473 l = dn_hash_size; 1474 if (l < 4) 1475 l = 4; 1476 else if (l > DN_MAX_HASH_SIZE) 1477 l = DN_MAX_HASH_SIZE; 1478 x->rq_size = l; 1479 } else /* one is enough for null mask */ 1480 x->rq_size = 1; 1481 x->rq = malloc((1 + x->rq_size) * sizeof(struct dn_flow_queue *), 1482 M_DUMMYNET, M_NOWAIT | M_ZERO); 1483 if (x->rq == NULL) { 1484 printf("sorry, cannot allocate queue\n"); 1485 return ENOSPC; 1486 } 1487 x->rq_elements = 0; 1488 return 0 ; 1489 } 1490 1491 static void 1492 set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src) 1493 { 1494 x->flags_fs = src->flags_fs; 1495 x->qsize = src->qsize; 1496 x->plr = src->plr; 1497 x->flow_mask = src->flow_mask; 1498 if (x->flags_fs & DN_QSIZE_IS_BYTES) { 1499 if (x->qsize > 1024*1024) 1500 x->qsize = 1024*1024 ; 1501 } else { 1502 if (x->qsize == 0) 1503 x->qsize = 50 ; 1504 if (x->qsize > 100) 1505 x->qsize = 50 ; 1506 } 1507 /* configuring RED */ 1508 if ( x->flags_fs & DN_IS_RED ) 1509 config_red(src, x) ; /* XXX should check errors */ 1510 } 1511 1512 /* 1513 * setup pipe or queue parameters. 1514 */ 1515 1516 static int 1517 config_pipe(struct dn_pipe *p) 1518 { 1519 int i, s; 1520 struct dn_flow_set *pfs = &(p->fs); 1521 struct dn_flow_queue *q; 1522 1523 /* 1524 * The config program passes parameters as follows: 1525 * bw = bits/second (0 means no limits), 1526 * delay = ms, must be translated into ticks. 1527 * qsize = slots/bytes 1528 */ 1529 p->delay = ( p->delay * hz ) / 1000 ; 1530 /* We need either a pipe number or a flow_set number */ 1531 if (p->pipe_nr == 0 && pfs->fs_nr == 0) 1532 return EINVAL ; 1533 if (p->pipe_nr != 0 && pfs->fs_nr != 0) 1534 return EINVAL ; 1535 if (p->pipe_nr != 0) { /* this is a pipe */ 1536 struct dn_pipe *x, *a, *b; 1537 /* locate pipe */ 1538 for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ; 1539 a = b , b = b->next) ; 1540 1541 if (b == NULL || b->pipe_nr != p->pipe_nr) { /* new pipe */ 1542 x = malloc(sizeof(struct dn_pipe), M_DUMMYNET, M_NOWAIT | M_ZERO); 1543 if (x == NULL) { 1544 printf("ip_dummynet.c: no memory for new pipe\n"); 1545 return ENOSPC; 1546 } 1547 x->pipe_nr = p->pipe_nr; 1548 x->fs.pipe = x ; 1549 /* idle_heap is the only one from which we extract from the middle. 1550 */ 1551 x->idle_heap.size = x->idle_heap.elements = 0 ; 1552 x->idle_heap.offset=OFFSET_OF(struct dn_flow_queue, heap_pos); 1553 } else { 1554 x = b; 1555 s = splimp(); 1556 /* Flush accumulated credit for all queues */ 1557 for (i = 0; i <= x->fs.rq_size; i++) 1558 for (q = x->fs.rq[i]; q; q = q->next) 1559 q->numbytes = 0; 1560 splx(s); 1561 } 1562 1563 s = splimp(); 1564 x->bandwidth = p->bandwidth ; 1565 x->numbytes = 0; /* just in case... */ 1566 bcopy(p->if_name, x->if_name, sizeof(p->if_name) ); 1567 x->ifp = NULL ; /* reset interface ptr */ 1568 x->delay = p->delay ; 1569 set_fs_parms(&(x->fs), pfs); 1570 1571 1572 if ( x->fs.rq == NULL ) { /* a new pipe */ 1573 s = alloc_hash(&(x->fs), pfs) ; 1574 if (s) { 1575 free(x, M_DUMMYNET); 1576 return s ; 1577 } 1578 x->next = b ; 1579 if (a == NULL) 1580 all_pipes = x ; 1581 else 1582 a->next = x ; 1583 } 1584 splx(s); 1585 } else { /* config queue */ 1586 struct dn_flow_set *x, *a, *b ; 1587 1588 /* locate flow_set */ 1589 for (a=NULL, b=all_flow_sets ; b && b->fs_nr < pfs->fs_nr ; 1590 a = b , b = b->next) ; 1591 1592 if (b == NULL || b->fs_nr != pfs->fs_nr) { /* new */ 1593 if (pfs->parent_nr == 0) /* need link to a pipe */ 1594 return EINVAL ; 1595 x = malloc(sizeof(struct dn_flow_set), M_DUMMYNET, M_NOWAIT|M_ZERO); 1596 if (x == NULL) { 1597 printf("ip_dummynet.c: no memory for new flow_set\n"); 1598 return ENOSPC; 1599 } 1600 x->fs_nr = pfs->fs_nr; 1601 x->parent_nr = pfs->parent_nr; 1602 x->weight = pfs->weight ; 1603 if (x->weight == 0) 1604 x->weight = 1 ; 1605 else if (x->weight > 100) 1606 x->weight = 100 ; 1607 } else { 1608 /* Change parent pipe not allowed; must delete and recreate */ 1609 if (pfs->parent_nr != 0 && b->parent_nr != pfs->parent_nr) 1610 return EINVAL ; 1611 x = b; 1612 } 1613 s = splimp(); 1614 set_fs_parms(x, pfs); 1615 1616 if ( x->rq == NULL ) { /* a new flow_set */ 1617 s = alloc_hash(x, pfs) ; 1618 if (s) { 1619 free(x, M_DUMMYNET); 1620 return s ; 1621 } 1622 x->next = b; 1623 if (a == NULL) 1624 all_flow_sets = x; 1625 else 1626 a->next = x; 1627 } 1628 splx(s); 1629 } 1630 return 0 ; 1631 } 1632 1633 /* 1634 * Helper function to remove from a heap queues which are linked to 1635 * a flow_set about to be deleted. 1636 */ 1637 static void 1638 fs_remove_from_heap(struct dn_heap *h, struct dn_flow_set *fs) 1639 { 1640 int i = 0, found = 0 ; 1641 for (; i < h->elements ;) 1642 if ( ((struct dn_flow_queue *)h->p[i].object)->fs == fs) { 1643 h->elements-- ; 1644 h->p[i] = h->p[h->elements] ; 1645 found++ ; 1646 } else 1647 i++ ; 1648 if (found) 1649 heapify(h); 1650 } 1651 1652 /* 1653 * helper function to remove a pipe from a heap (can be there at most once) 1654 */ 1655 static void 1656 pipe_remove_from_heap(struct dn_heap *h, struct dn_pipe *p) 1657 { 1658 if (h->elements > 0) { 1659 int i = 0 ; 1660 for (i=0; i < h->elements ; i++ ) { 1661 if (h->p[i].object == p) { /* found it */ 1662 h->elements-- ; 1663 h->p[i] = h->p[h->elements] ; 1664 heapify(h); 1665 break ; 1666 } 1667 } 1668 } 1669 } 1670 1671 /* 1672 * drain all queues. Called in case of severe mbuf shortage. 1673 */ 1674 void 1675 dummynet_drain() 1676 { 1677 struct dn_flow_set *fs; 1678 struct dn_pipe *p; 1679 struct dn_pkt *pkt; 1680 1681 heap_free(&ready_heap); 1682 heap_free(&wfq_ready_heap); 1683 heap_free(&extract_heap); 1684 /* remove all references to this pipe from flow_sets */ 1685 for (fs = all_flow_sets; fs; fs= fs->next ) 1686 purge_flow_set(fs, 0); 1687 1688 for (p = all_pipes; p; p= p->next ) { 1689 purge_flow_set(&(p->fs), 0); 1690 for (pkt = p->head ; pkt ; ) 1691 DN_FREE_PKT(pkt) ; 1692 p->head = p->tail = NULL ; 1693 } 1694 } 1695 1696 /* 1697 * Fully delete a pipe or a queue, cleaning up associated info. 1698 */ 1699 static int 1700 delete_pipe(struct dn_pipe *p) 1701 { 1702 int s ; 1703 1704 if (p->pipe_nr == 0 && p->fs.fs_nr == 0) 1705 return EINVAL ; 1706 if (p->pipe_nr != 0 && p->fs.fs_nr != 0) 1707 return EINVAL ; 1708 if (p->pipe_nr != 0) { /* this is an old-style pipe */ 1709 struct dn_pipe *a, *b; 1710 struct dn_flow_set *fs; 1711 1712 /* locate pipe */ 1713 for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ; 1714 a = b , b = b->next) ; 1715 if (b == NULL || (b->pipe_nr != p->pipe_nr) ) 1716 return EINVAL ; /* not found */ 1717 1718 s = splimp() ; 1719 1720 /* unlink from list of pipes */ 1721 if (a == NULL) 1722 all_pipes = b->next ; 1723 else 1724 a->next = b->next ; 1725 /* remove references to this pipe from the ip_fw rules. */ 1726 flush_pipe_ptrs(&(b->fs)); 1727 1728 /* remove all references to this pipe from flow_sets */ 1729 for (fs = all_flow_sets; fs; fs= fs->next ) 1730 if (fs->pipe == b) { 1731 printf("++ ref to pipe %d from fs %d\n", 1732 p->pipe_nr, fs->fs_nr); 1733 fs->pipe = NULL ; 1734 purge_flow_set(fs, 0); 1735 } 1736 fs_remove_from_heap(&ready_heap, &(b->fs)); 1737 purge_pipe(b); /* remove all data associated to this pipe */ 1738 /* remove reference to here from extract_heap and wfq_ready_heap */ 1739 pipe_remove_from_heap(&extract_heap, b); 1740 pipe_remove_from_heap(&wfq_ready_heap, b); 1741 splx(s); 1742 free(b, M_DUMMYNET); 1743 } else { /* this is a WF2Q queue (dn_flow_set) */ 1744 struct dn_flow_set *a, *b; 1745 1746 /* locate set */ 1747 for (a = NULL, b = all_flow_sets ; b && b->fs_nr < p->fs.fs_nr ; 1748 a = b , b = b->next) ; 1749 if (b == NULL || (b->fs_nr != p->fs.fs_nr) ) 1750 return EINVAL ; /* not found */ 1751 1752 s = splimp() ; 1753 if (a == NULL) 1754 all_flow_sets = b->next ; 1755 else 1756 a->next = b->next ; 1757 /* remove references to this flow_set from the ip_fw rules. */ 1758 flush_pipe_ptrs(b); 1759 1760 if (b->pipe != NULL) { 1761 /* Update total weight on parent pipe and cleanup parent heaps */ 1762 b->pipe->sum -= b->weight * b->backlogged ; 1763 fs_remove_from_heap(&(b->pipe->not_eligible_heap), b); 1764 fs_remove_from_heap(&(b->pipe->scheduler_heap), b); 1765 #if 1 /* XXX should i remove from idle_heap as well ? */ 1766 fs_remove_from_heap(&(b->pipe->idle_heap), b); 1767 #endif 1768 } 1769 purge_flow_set(b, 1); 1770 splx(s); 1771 } 1772 return 0 ; 1773 } 1774 1775 /* 1776 * helper function used to copy data from kernel in DUMMYNET_GET 1777 */ 1778 static char * 1779 dn_copy_set(struct dn_flow_set *set, char *bp) 1780 { 1781 int i, copied = 0 ; 1782 struct dn_flow_queue *q, *qp = (struct dn_flow_queue *)bp; 1783 1784 for (i = 0 ; i <= set->rq_size ; i++) 1785 for (q = set->rq[i] ; q ; q = q->next, qp++ ) { 1786 if (q->hash_slot != i) 1787 printf("++ at %d: wrong slot (have %d, " 1788 "should be %d)\n", copied, q->hash_slot, i); 1789 if (q->fs != set) 1790 printf("++ at %d: wrong fs ptr (have %p, should be %p)\n", 1791 i, q->fs, set); 1792 copied++ ; 1793 bcopy(q, qp, sizeof( *q ) ); 1794 /* cleanup pointers */ 1795 qp->next = NULL ; 1796 qp->head = qp->tail = NULL ; 1797 qp->fs = NULL ; 1798 } 1799 if (copied != set->rq_elements) 1800 printf("++ wrong count, have %d should be %d\n", 1801 copied, set->rq_elements); 1802 return (char *)qp ; 1803 } 1804 1805 static int 1806 dummynet_get(struct sockopt *sopt) 1807 { 1808 char *buf, *bp ; /* bp is the "copy-pointer" */ 1809 size_t size ; 1810 struct dn_flow_set *set ; 1811 struct dn_pipe *p ; 1812 int s, error=0 ; 1813 1814 s = splimp(); 1815 /* 1816 * compute size of data structures: list of pipes and flow_sets. 1817 */ 1818 for (p = all_pipes, size = 0 ; p ; p = p->next ) 1819 size += sizeof( *p ) + 1820 p->fs.rq_elements * sizeof(struct dn_flow_queue); 1821 for (set = all_flow_sets ; set ; set = set->next ) 1822 size += sizeof ( *set ) + 1823 set->rq_elements * sizeof(struct dn_flow_queue); 1824 buf = malloc(size, M_TEMP, M_NOWAIT); 1825 if (buf == 0) { 1826 splx(s); 1827 return ENOBUFS ; 1828 } 1829 for (p = all_pipes, bp = buf ; p ; p = p->next ) { 1830 struct dn_pipe *pipe_bp = (struct dn_pipe *)bp ; 1831 1832 /* 1833 * copy pipe descriptor into *bp, convert delay back to ms, 1834 * then copy the flow_set descriptor(s) one at a time. 1835 * After each flow_set, copy the queue descriptor it owns. 1836 */ 1837 bcopy(p, bp, sizeof( *p ) ); 1838 pipe_bp->delay = (pipe_bp->delay * 1000) / hz ; 1839 /* 1840 * XXX the following is a hack based on ->next being the 1841 * first field in dn_pipe and dn_flow_set. The correct 1842 * solution would be to move the dn_flow_set to the beginning 1843 * of struct dn_pipe. 1844 */ 1845 pipe_bp->next = (struct dn_pipe *)DN_IS_PIPE ; 1846 /* clean pointers */ 1847 pipe_bp->head = pipe_bp->tail = NULL ; 1848 pipe_bp->fs.next = NULL ; 1849 pipe_bp->fs.pipe = NULL ; 1850 pipe_bp->fs.rq = NULL ; 1851 1852 bp += sizeof( *p ) ; 1853 bp = dn_copy_set( &(p->fs), bp ); 1854 } 1855 for (set = all_flow_sets ; set ; set = set->next ) { 1856 struct dn_flow_set *fs_bp = (struct dn_flow_set *)bp ; 1857 bcopy(set, bp, sizeof( *set ) ); 1858 /* XXX same hack as above */ 1859 fs_bp->next = (struct dn_flow_set *)DN_IS_QUEUE ; 1860 fs_bp->pipe = NULL ; 1861 fs_bp->rq = NULL ; 1862 bp += sizeof( *set ) ; 1863 bp = dn_copy_set( set, bp ); 1864 } 1865 splx(s); 1866 error = sooptcopyout(sopt, buf, size); 1867 free(buf, M_TEMP); 1868 return error ; 1869 } 1870 1871 /* 1872 * Handler for the various dummynet socket options (get, flush, config, del) 1873 */ 1874 static int 1875 ip_dn_ctl(struct sockopt *sopt) 1876 { 1877 int error = 0 ; 1878 struct dn_pipe *p, tmp_pipe; 1879 1880 /* Disallow sets in really-really secure mode. */ 1881 if (sopt->sopt_dir == SOPT_SET) { 1882 #if __FreeBSD_version >= 500034 1883 error = securelevel_ge(sopt->sopt_td->td_ucred, 3); 1884 if (error) 1885 return (error); 1886 #else 1887 if (securelevel >= 3) 1888 return (EPERM); 1889 #endif 1890 } 1891 1892 switch (sopt->sopt_name) { 1893 default : 1894 printf("ip_dn_ctl -- unknown option %d", sopt->sopt_name); 1895 return EINVAL ; 1896 1897 case IP_DUMMYNET_GET : 1898 error = dummynet_get(sopt); 1899 break ; 1900 1901 case IP_DUMMYNET_FLUSH : 1902 dummynet_flush() ; 1903 break ; 1904 1905 case IP_DUMMYNET_CONFIGURE : 1906 p = &tmp_pipe ; 1907 error = sooptcopyin(sopt, p, sizeof *p, sizeof *p); 1908 if (error) 1909 break ; 1910 error = config_pipe(p); 1911 break ; 1912 1913 case IP_DUMMYNET_DEL : /* remove a pipe or queue */ 1914 p = &tmp_pipe ; 1915 error = sooptcopyin(sopt, p, sizeof *p, sizeof *p); 1916 if (error) 1917 break ; 1918 1919 error = delete_pipe(p); 1920 break ; 1921 } 1922 return error ; 1923 } 1924 1925 static void 1926 ip_dn_init(void) 1927 { 1928 printf("DUMMYNET initialized (011031)\n"); 1929 all_pipes = NULL ; 1930 all_flow_sets = NULL ; 1931 ready_heap.size = ready_heap.elements = 0 ; 1932 ready_heap.offset = 0 ; 1933 1934 wfq_ready_heap.size = wfq_ready_heap.elements = 0 ; 1935 wfq_ready_heap.offset = 0 ; 1936 1937 extract_heap.size = extract_heap.elements = 0 ; 1938 extract_heap.offset = 0 ; 1939 ip_dn_ctl_ptr = ip_dn_ctl; 1940 ip_dn_io_ptr = dummynet_io; 1941 ip_dn_ruledel_ptr = dn_rule_delete; 1942 bzero(&dn_timeout, sizeof(struct callout_handle)); 1943 dn_timeout = timeout(dummynet, NULL, 1); 1944 } 1945 1946 static int 1947 dummynet_modevent(module_t mod, int type, void *data) 1948 { 1949 int s; 1950 switch (type) { 1951 case MOD_LOAD: 1952 s = splimp(); 1953 if (DUMMYNET_LOADED) { 1954 splx(s); 1955 printf("DUMMYNET already loaded\n"); 1956 return EEXIST ; 1957 } 1958 ip_dn_init(); 1959 splx(s); 1960 break; 1961 1962 case MOD_UNLOAD: 1963 #if !defined(KLD_MODULE) 1964 printf("dummynet statically compiled, cannot unload\n"); 1965 return EINVAL ; 1966 #else 1967 s = splimp(); 1968 untimeout(dummynet, NULL, dn_timeout); 1969 dummynet_flush(); 1970 ip_dn_ctl_ptr = NULL; 1971 ip_dn_io_ptr = NULL; 1972 ip_dn_ruledel_ptr = NULL; 1973 splx(s); 1974 #endif 1975 break ; 1976 default: 1977 break ; 1978 } 1979 return 0 ; 1980 } 1981 1982 static moduledata_t dummynet_mod = { 1983 "dummynet", 1984 dummynet_modevent, 1985 NULL 1986 }; 1987 DECLARE_MODULE(dummynet, dummynet_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 1988 MODULE_DEPEND(dummynet, ipfw, 1, 1, 1); 1989 MODULE_VERSION(dummynet, 1); 1990