1 /* 2 * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa 3 * Portions Copyright (c) 2000 Akamba Corp. 4 * All rights reserved 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.24.2.22 2003/05/13 09:31:06 maxim Exp $ 28 * $DragonFly: src/sys/net/dummynet/ip_dummynet.c,v 1.6 2004/03/06 01:58:55 hsu Exp $ 29 */ 30 31 #if !defined(KLD_MODULE) 32 #include "opt_ipfw.h" /* for IPFW2 definition */ 33 #endif 34 35 #define DEB(x) 36 #define DDB(x) x 37 38 /* 39 * This module implements IP dummynet, a bandwidth limiter/delay emulator 40 * used in conjunction with the ipfw package. 41 * Description of the data structures used is in ip_dummynet.h 42 * Here you mainly find the following blocks of code: 43 * + variable declarations; 44 * + heap management functions; 45 * + scheduler and dummynet functions; 46 * + configuration and initialization. 47 * 48 * NOTA BENE: critical sections are protected by splimp()/splx() 49 * pairs. One would think that splnet() is enough as for most of 50 * the netinet code, but it is not so because when used with 51 * bridging, dummynet is invoked at splimp(). 52 * 53 * Most important Changes: 54 * 55 * 011004: KLDable 56 * 010124: Fixed WF2Q behaviour 57 * 010122: Fixed spl protection. 58 * 000601: WF2Q support 59 * 000106: large rewrite, use heaps to handle very many pipes. 60 * 980513: initial release 61 * 62 * include files marked with XXX are probably not needed 63 */ 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/malloc.h> 68 #include <sys/mbuf.h> 69 #include <sys/kernel.h> 70 #include <sys/module.h> 71 #include <sys/proc.h> 72 #include <sys/socket.h> 73 #include <sys/socketvar.h> 74 #include <sys/time.h> 75 #include <sys/sysctl.h> 76 #include <net/if.h> 77 #include <net/route.h> 78 #include <netinet/in.h> 79 #include <netinet/in_systm.h> 80 #include <netinet/in_var.h> 81 #include <netinet/ip.h> 82 #include <net/ipfw/ip_fw.h> 83 #include <net/netisr.h> 84 #include "ip_dummynet.h" 85 #include <netinet/ip_var.h> 86 87 #include <netinet/if_ether.h> /* for struct arpcom */ 88 #include <net/bridge/bridge.h> 89 90 /* 91 * We keep a private variable for the simulation time, but we could 92 * probably use an existing one ("softticks" in sys/kern/kern_timer.c) 93 */ 94 static dn_key curr_time = 0 ; /* current simulation time */ 95 96 static int dn_hash_size = 64 ; /* default hash size */ 97 98 /* statistics on number of queue searches and search steps */ 99 static int searches, search_steps ; 100 static int pipe_expire = 1 ; /* expire queue if empty */ 101 static int dn_max_ratio = 16 ; /* max queues/buckets ratio */ 102 103 static int red_lookup_depth = 256; /* RED - default lookup table depth */ 104 static int red_avg_pkt_size = 512; /* RED - default medium packet size */ 105 static int red_max_pkt_size = 1500; /* RED - default max packet size */ 106 107 /* 108 * Three heaps contain queues and pipes that the scheduler handles: 109 * 110 * ready_heap contains all dn_flow_queue related to fixed-rate pipes. 111 * 112 * wfq_ready_heap contains the pipes associated with WF2Q flows 113 * 114 * extract_heap contains pipes associated with delay lines. 115 * 116 */ 117 118 MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap"); 119 120 static struct dn_heap ready_heap, extract_heap, wfq_ready_heap ; 121 122 static int heap_init(struct dn_heap *h, int size) ; 123 static int heap_insert (struct dn_heap *h, dn_key key1, void *p); 124 static void heap_extract(struct dn_heap *h, void *obj); 125 126 static void transmit_event(struct dn_pipe *pipe); 127 static void ready_event(struct dn_flow_queue *q); 128 129 static struct dn_pipe *all_pipes = NULL ; /* list of all pipes */ 130 static struct dn_flow_set *all_flow_sets = NULL ;/* list of all flow_sets */ 131 132 static struct callout_handle dn_timeout; 133 134 #ifdef SYSCTL_NODE 135 SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, 136 CTLFLAG_RW, 0, "Dummynet"); 137 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size, 138 CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size"); 139 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, curr_time, 140 CTLFLAG_RD, &curr_time, 0, "Current tick"); 141 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap, 142 CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap"); 143 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap, 144 CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap"); 145 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, searches, 146 CTLFLAG_RD, &searches, 0, "Number of queue searches"); 147 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, search_steps, 148 CTLFLAG_RD, &search_steps, 0, "Number of queue search steps"); 149 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire, 150 CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty"); 151 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len, 152 CTLFLAG_RW, &dn_max_ratio, 0, 153 "Max ratio between dynamic queues and buckets"); 154 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth, 155 CTLFLAG_RD, &red_lookup_depth, 0, "Depth of RED lookup table"); 156 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size, 157 CTLFLAG_RD, &red_avg_pkt_size, 0, "RED Medium packet size"); 158 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size, 159 CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size"); 160 #endif 161 162 static int config_pipe(struct dn_pipe *p); 163 static int ip_dn_ctl(struct sockopt *sopt); 164 165 static void rt_unref(struct rtentry *); 166 static void dummynet(void *); 167 static void dummynet_flush(void); 168 void dummynet_drain(void); 169 static ip_dn_io_t dummynet_io; 170 static void dn_rule_delete(void *); 171 172 int if_tx_rdy(struct ifnet *ifp); 173 174 static void 175 rt_unref(struct rtentry *rt) 176 { 177 if (rt == NULL) 178 return ; 179 if (rt->rt_refcnt <= 0) 180 printf("-- warning, refcnt now %ld, decreasing\n", rt->rt_refcnt); 181 RTFREE(rt); 182 } 183 184 /* 185 * Heap management functions. 186 * 187 * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2. 188 * Some macros help finding parent/children so we can optimize them. 189 * 190 * heap_init() is called to expand the heap when needed. 191 * Increment size in blocks of 16 entries. 192 * XXX failure to allocate a new element is a pretty bad failure 193 * as we basically stall a whole queue forever!! 194 * Returns 1 on error, 0 on success 195 */ 196 #define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 ) 197 #define HEAP_LEFT(x) ( 2*(x) + 1 ) 198 #define HEAP_IS_LEFT(x) ( (x) & 1 ) 199 #define HEAP_RIGHT(x) ( 2*(x) + 2 ) 200 #define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; } 201 #define HEAP_INCREMENT 15 202 203 static int 204 heap_init(struct dn_heap *h, int new_size) 205 { 206 struct dn_heap_entry *p; 207 208 if (h->size >= new_size ) { 209 printf("heap_init, Bogus call, have %d want %d\n", 210 h->size, new_size); 211 return 0 ; 212 } 213 new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ; 214 p = malloc(new_size * sizeof(*p), M_DUMMYNET, M_NOWAIT); 215 if (p == NULL) { 216 printf(" heap_init, resize %d failed\n", new_size ); 217 return 1 ; /* error */ 218 } 219 if (h->size > 0) { 220 bcopy(h->p, p, h->size * sizeof(*p) ); 221 free(h->p, M_DUMMYNET); 222 } 223 h->p = p ; 224 h->size = new_size ; 225 return 0 ; 226 } 227 228 /* 229 * Insert element in heap. Normally, p != NULL, we insert p in 230 * a new position and bubble up. If p == NULL, then the element is 231 * already in place, and key is the position where to start the 232 * bubble-up. 233 * Returns 1 on failure (cannot allocate new heap entry) 234 * 235 * If offset > 0 the position (index, int) of the element in the heap is 236 * also stored in the element itself at the given offset in bytes. 237 */ 238 #define SET_OFFSET(heap, node) \ 239 if (heap->offset > 0) \ 240 *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ; 241 /* 242 * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value. 243 */ 244 #define RESET_OFFSET(heap, node) \ 245 if (heap->offset > 0) \ 246 *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ; 247 static int 248 heap_insert(struct dn_heap *h, dn_key key1, void *p) 249 { 250 int son = h->elements ; 251 252 if (p == NULL) /* data already there, set starting point */ 253 son = key1 ; 254 else { /* insert new element at the end, possibly resize */ 255 son = h->elements ; 256 if (son == h->size) /* need resize... */ 257 if (heap_init(h, h->elements+1) ) 258 return 1 ; /* failure... */ 259 h->p[son].object = p ; 260 h->p[son].key = key1 ; 261 h->elements++ ; 262 } 263 while (son > 0) { /* bubble up */ 264 int father = HEAP_FATHER(son) ; 265 struct dn_heap_entry tmp ; 266 267 if (DN_KEY_LT( h->p[father].key, h->p[son].key ) ) 268 break ; /* found right position */ 269 /* son smaller than father, swap and repeat */ 270 HEAP_SWAP(h->p[son], h->p[father], tmp) ; 271 SET_OFFSET(h, son); 272 son = father ; 273 } 274 SET_OFFSET(h, son); 275 return 0 ; 276 } 277 278 /* 279 * remove top element from heap, or obj if obj != NULL 280 */ 281 static void 282 heap_extract(struct dn_heap *h, void *obj) 283 { 284 int child, father, max = h->elements - 1 ; 285 286 if (max < 0) { 287 printf("warning, extract from empty heap 0x%p\n", h); 288 return ; 289 } 290 father = 0 ; /* default: move up smallest child */ 291 if (obj != NULL) { /* extract specific element, index is at offset */ 292 if (h->offset <= 0) 293 panic("*** heap_extract from middle not supported on this heap!!!\n"); 294 father = *((int *)((char *)obj + h->offset)) ; 295 if (father < 0 || father >= h->elements) { 296 printf("dummynet: heap_extract, father %d out of bound 0..%d\n", 297 father, h->elements); 298 panic("heap_extract"); 299 } 300 } 301 RESET_OFFSET(h, father); 302 child = HEAP_LEFT(father) ; /* left child */ 303 while (child <= max) { /* valid entry */ 304 if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) ) 305 child = child+1 ; /* take right child, otherwise left */ 306 h->p[father] = h->p[child] ; 307 SET_OFFSET(h, father); 308 father = child ; 309 child = HEAP_LEFT(child) ; /* left child for next loop */ 310 } 311 h->elements-- ; 312 if (father != max) { 313 /* 314 * Fill hole with last entry and bubble up, reusing the insert code 315 */ 316 h->p[father] = h->p[max] ; 317 heap_insert(h, father, NULL); /* this one cannot fail */ 318 } 319 } 320 321 #if 0 322 /* 323 * change object position and update references 324 * XXX this one is never used! 325 */ 326 static void 327 heap_move(struct dn_heap *h, dn_key new_key, void *object) 328 { 329 int temp; 330 int i ; 331 int max = h->elements-1 ; 332 struct dn_heap_entry buf ; 333 334 if (h->offset <= 0) 335 panic("cannot move items on this heap"); 336 337 i = *((int *)((char *)object + h->offset)); 338 if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */ 339 h->p[i].key = new_key ; 340 for (; i>0 && DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key) ; 341 i = temp ) { /* bubble up */ 342 HEAP_SWAP(h->p[i], h->p[temp], buf) ; 343 SET_OFFSET(h, i); 344 } 345 } else { /* must move down */ 346 h->p[i].key = new_key ; 347 while ( (temp = HEAP_LEFT(i)) <= max ) { /* found left child */ 348 if ((temp != max) && DN_KEY_GT(h->p[temp].key, h->p[temp+1].key)) 349 temp++ ; /* select child with min key */ 350 if (DN_KEY_GT(new_key, h->p[temp].key)) { /* go down */ 351 HEAP_SWAP(h->p[i], h->p[temp], buf) ; 352 SET_OFFSET(h, i); 353 } else 354 break ; 355 i = temp ; 356 } 357 } 358 SET_OFFSET(h, i); 359 } 360 #endif /* heap_move, unused */ 361 362 /* 363 * heapify() will reorganize data inside an array to maintain the 364 * heap property. It is needed when we delete a bunch of entries. 365 */ 366 static void 367 heapify(struct dn_heap *h) 368 { 369 int i ; 370 371 for (i = 0 ; i < h->elements ; i++ ) 372 heap_insert(h, i , NULL) ; 373 } 374 375 /* 376 * cleanup the heap and free data structure 377 */ 378 static void 379 heap_free(struct dn_heap *h) 380 { 381 if (h->size >0 ) 382 free(h->p, M_DUMMYNET); 383 bzero(h, sizeof(*h) ); 384 } 385 386 /* 387 * --- end of heap management functions --- 388 */ 389 390 /* 391 * Scheduler functions: 392 * 393 * transmit_event() is called when the delay-line needs to enter 394 * the scheduler, either because of existing pkts getting ready, 395 * or new packets entering the queue. The event handled is the delivery 396 * time of the packet. 397 * 398 * ready_event() does something similar with fixed-rate queues, and the 399 * event handled is the finish time of the head pkt. 400 * 401 * wfq_ready_event() does something similar with WF2Q queues, and the 402 * event handled is the start time of the head pkt. 403 * 404 * In all cases, we make sure that the data structures are consistent 405 * before passing pkts out, because this might trigger recursive 406 * invocations of the procedures. 407 */ 408 static void 409 transmit_event(struct dn_pipe *pipe) 410 { 411 struct dn_pkt *pkt ; 412 413 while ( (pkt = pipe->head) && DN_KEY_LEQ(pkt->output_time, curr_time) ) { 414 /* 415 * first unlink, then call procedures, since ip_input() can invoke 416 * ip_output() and viceversa, thus causing nested calls 417 */ 418 pipe->head = DN_NEXT(pkt) ; 419 420 /* 421 * The actual mbuf is preceded by a struct dn_pkt, resembling an mbuf 422 * (NOT A REAL one, just a small block of malloc'ed memory) with 423 * m_type = MT_TAG, m_flags = PACKET_TAG_DUMMYNET 424 * dn_m (m_next) = actual mbuf to be processed by ip_input/output 425 * and some other fields. 426 * The block IS FREED HERE because it contains parameters passed 427 * to the called routine. 428 */ 429 switch (pkt->dn_dir) { 430 case DN_TO_IP_OUT: 431 (void)ip_output((struct mbuf *)pkt, NULL, NULL, 0, NULL, NULL); 432 rt_unref (pkt->ro.ro_rt) ; 433 break ; 434 435 case DN_TO_IP_IN : 436 { 437 struct netmsg_packet msg; 438 439 msg.nm_packet = (struct mbuf *)&pkt; 440 ip_input((struct netmsg *)&msg) ; 441 break ; 442 } 443 444 case DN_TO_BDG_FWD : 445 if (!BDG_LOADED) { 446 /* somebody unloaded the bridge module. Drop pkt */ 447 printf("-- dropping bridged packet trapped in pipe--\n"); 448 m_freem(pkt->dn_m); 449 break; 450 } /* fallthrough */ 451 case DN_TO_ETH_DEMUX: 452 { 453 struct mbuf *m = (struct mbuf *)pkt ; 454 struct ether_header *eh; 455 456 if (pkt->dn_m->m_len < ETHER_HDR_LEN && 457 (pkt->dn_m = m_pullup(pkt->dn_m, ETHER_HDR_LEN)) == NULL) { 458 printf("dummynet/bridge: pullup fail, dropping pkt\n"); 459 break; 460 } 461 /* 462 * same as ether_input, make eh be a pointer into the mbuf 463 */ 464 eh = mtod(pkt->dn_m, struct ether_header *); 465 m_adj(pkt->dn_m, ETHER_HDR_LEN); 466 /* 467 * bdg_forward() wants a pointer to the pseudo-mbuf-header, but 468 * on return it will supply the pointer to the actual packet 469 * (originally pkt->dn_m, but could be something else now) if 470 * it has not consumed it. 471 */ 472 if (pkt->dn_dir == DN_TO_BDG_FWD) { 473 m = bdg_forward_ptr(m, eh, pkt->ifp); 474 if (m) 475 m_freem(m); 476 } else 477 ether_demux(NULL, eh, m); /* which consumes the mbuf */ 478 } 479 break ; 480 case DN_TO_ETH_OUT: 481 ether_output_frame(pkt->ifp, (struct mbuf *)pkt); 482 break; 483 484 default: 485 printf("dummynet: bad switch %d!\n", pkt->dn_dir); 486 m_freem(pkt->dn_m); 487 break ; 488 } 489 free(pkt, M_DUMMYNET); 490 } 491 /* if there are leftover packets, put into the heap for next event */ 492 if ( (pkt = pipe->head) ) 493 heap_insert(&extract_heap, pkt->output_time, pipe ) ; 494 /* XXX should check errors on heap_insert, by draining the 495 * whole pipe p and hoping in the future we are more successful 496 */ 497 } 498 499 /* 500 * the following macro computes how many ticks we have to wait 501 * before being able to transmit a packet. The credit is taken from 502 * either a pipe (WF2Q) or a flow_queue (per-flow queueing) 503 */ 504 #define SET_TICKS(pkt, q, p) \ 505 (pkt->dn_m->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \ 506 p->bandwidth ; 507 508 /* 509 * extract pkt from queue, compute output time (could be now) 510 * and put into delay line (p_queue) 511 */ 512 static void 513 move_pkt(struct dn_pkt *pkt, struct dn_flow_queue *q, 514 struct dn_pipe *p, int len) 515 { 516 q->head = DN_NEXT(pkt) ; 517 q->len-- ; 518 q->len_bytes -= len ; 519 520 pkt->output_time = curr_time + p->delay ; 521 522 if (p->head == NULL) 523 p->head = pkt; 524 else 525 DN_NEXT(p->tail) = pkt; 526 p->tail = pkt; 527 DN_NEXT(p->tail) = NULL; 528 } 529 530 /* 531 * ready_event() is invoked every time the queue must enter the 532 * scheduler, either because the first packet arrives, or because 533 * a previously scheduled event fired. 534 * On invokation, drain as many pkts as possible (could be 0) and then 535 * if there are leftover packets reinsert the pkt in the scheduler. 536 */ 537 static void 538 ready_event(struct dn_flow_queue *q) 539 { 540 struct dn_pkt *pkt; 541 struct dn_pipe *p = q->fs->pipe ; 542 int p_was_empty ; 543 544 if (p == NULL) { 545 printf("ready_event- pipe is gone\n"); 546 return ; 547 } 548 p_was_empty = (p->head == NULL) ; 549 550 /* 551 * schedule fixed-rate queues linked to this pipe: 552 * Account for the bw accumulated since last scheduling, then 553 * drain as many pkts as allowed by q->numbytes and move to 554 * the delay line (in p) computing output time. 555 * bandwidth==0 (no limit) means we can drain the whole queue, 556 * setting len_scaled = 0 does the job. 557 */ 558 q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth; 559 while ( (pkt = q->head) != NULL ) { 560 int len = pkt->dn_m->m_pkthdr.len; 561 int len_scaled = p->bandwidth ? len*8*hz : 0 ; 562 if (len_scaled > q->numbytes ) 563 break ; 564 q->numbytes -= len_scaled ; 565 move_pkt(pkt, q, p, len); 566 } 567 /* 568 * If we have more packets queued, schedule next ready event 569 * (can only occur when bandwidth != 0, otherwise we would have 570 * flushed the whole queue in the previous loop). 571 * To this purpose we record the current time and compute how many 572 * ticks to go for the finish time of the packet. 573 */ 574 if ( (pkt = q->head) != NULL ) { /* this implies bandwidth != 0 */ 575 dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */ 576 q->sched_time = curr_time ; 577 heap_insert(&ready_heap, curr_time + t, (void *)q ); 578 /* XXX should check errors on heap_insert, and drain the whole 579 * queue on error hoping next time we are luckier. 580 */ 581 } else { /* RED needs to know when the queue becomes empty */ 582 q->q_time = curr_time; 583 q->numbytes = 0; 584 } 585 /* 586 * If the delay line was empty call transmit_event(p) now. 587 * Otherwise, the scheduler will take care of it. 588 */ 589 if (p_was_empty) 590 transmit_event(p); 591 } 592 593 /* 594 * Called when we can transmit packets on WF2Q queues. Take pkts out of 595 * the queues at their start time, and enqueue into the delay line. 596 * Packets are drained until p->numbytes < 0. As long as 597 * len_scaled >= p->numbytes, the packet goes into the delay line 598 * with a deadline p->delay. For the last packet, if p->numbytes<0, 599 * there is an additional delay. 600 */ 601 static void 602 ready_event_wfq(struct dn_pipe *p) 603 { 604 int p_was_empty = (p->head == NULL) ; 605 struct dn_heap *sch = &(p->scheduler_heap); 606 struct dn_heap *neh = &(p->not_eligible_heap) ; 607 608 if (p->if_name[0] == 0) /* tx clock is simulated */ 609 p->numbytes += ( curr_time - p->sched_time ) * p->bandwidth; 610 else { /* tx clock is for real, the ifq must be empty or this is a NOP */ 611 if (p->ifp && p->ifp->if_snd.ifq_head != NULL) 612 return ; 613 else { 614 DEB(printf("pipe %d ready from %s --\n", 615 p->pipe_nr, p->if_name);) 616 } 617 } 618 619 /* 620 * While we have backlogged traffic AND credit, we need to do 621 * something on the queue. 622 */ 623 while ( p->numbytes >=0 && (sch->elements>0 || neh->elements >0) ) { 624 if (sch->elements > 0) { /* have some eligible pkts to send out */ 625 struct dn_flow_queue *q = sch->p[0].object ; 626 struct dn_pkt *pkt = q->head; 627 struct dn_flow_set *fs = q->fs; 628 u_int64_t len = pkt->dn_m->m_pkthdr.len; 629 int len_scaled = p->bandwidth ? len*8*hz : 0 ; 630 631 heap_extract(sch, NULL); /* remove queue from heap */ 632 p->numbytes -= len_scaled ; 633 move_pkt(pkt, q, p, len); 634 635 p->V += (len<<MY_M) / p->sum ; /* update V */ 636 q->S = q->F ; /* update start time */ 637 if (q->len == 0) { /* Flow not backlogged any more */ 638 fs->backlogged-- ; 639 heap_insert(&(p->idle_heap), q->F, q); 640 } else { /* still backlogged */ 641 /* 642 * update F and position in backlogged queue, then 643 * put flow in not_eligible_heap (we will fix this later). 644 */ 645 len = (q->head)->dn_m->m_pkthdr.len; 646 q->F += (len<<MY_M)/(u_int64_t) fs->weight ; 647 if (DN_KEY_LEQ(q->S, p->V)) 648 heap_insert(neh, q->S, q); 649 else 650 heap_insert(sch, q->F, q); 651 } 652 } 653 /* 654 * now compute V = max(V, min(S_i)). Remember that all elements in sch 655 * have by definition S_i <= V so if sch is not empty, V is surely 656 * the max and we must not update it. Conversely, if sch is empty 657 * we only need to look at neh. 658 */ 659 if (sch->elements == 0 && neh->elements > 0) 660 p->V = MAX64 ( p->V, neh->p[0].key ); 661 /* move from neh to sch any packets that have become eligible */ 662 while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V) ) { 663 struct dn_flow_queue *q = neh->p[0].object ; 664 heap_extract(neh, NULL); 665 heap_insert(sch, q->F, q); 666 } 667 668 if (p->if_name[0] != '\0') {/* tx clock is from a real thing */ 669 p->numbytes = -1 ; /* mark not ready for I/O */ 670 break ; 671 } 672 } 673 if (sch->elements == 0 && neh->elements == 0 && p->numbytes >= 0 674 && p->idle_heap.elements > 0) { 675 /* 676 * no traffic and no events scheduled. We can get rid of idle-heap. 677 */ 678 int i ; 679 680 for (i = 0 ; i < p->idle_heap.elements ; i++) { 681 struct dn_flow_queue *q = p->idle_heap.p[i].object ; 682 683 q->F = 0 ; 684 q->S = q->F + 1 ; 685 } 686 p->sum = 0 ; 687 p->V = 0 ; 688 p->idle_heap.elements = 0 ; 689 } 690 /* 691 * If we are getting clocks from dummynet (not a real interface) and 692 * If we are under credit, schedule the next ready event. 693 * Also fix the delivery time of the last packet. 694 */ 695 if (p->if_name[0]==0 && p->numbytes < 0) { /* this implies bandwidth >0 */ 696 dn_key t=0 ; /* number of ticks i have to wait */ 697 698 if (p->bandwidth > 0) 699 t = ( p->bandwidth -1 - p->numbytes) / p->bandwidth ; 700 p->tail->output_time += t ; 701 p->sched_time = curr_time ; 702 heap_insert(&wfq_ready_heap, curr_time + t, (void *)p); 703 /* XXX should check errors on heap_insert, and drain the whole 704 * queue on error hoping next time we are luckier. 705 */ 706 } 707 /* 708 * If the delay line was empty call transmit_event(p) now. 709 * Otherwise, the scheduler will take care of it. 710 */ 711 if (p_was_empty) 712 transmit_event(p); 713 } 714 715 /* 716 * This is called once per tick, or HZ times per second. It is used to 717 * increment the current tick counter and schedule expired events. 718 */ 719 static void 720 dummynet(void * __unused unused) 721 { 722 void *p ; /* generic parameter to handler */ 723 struct dn_heap *h ; 724 int s ; 725 struct dn_heap *heaps[3]; 726 int i; 727 struct dn_pipe *pe ; 728 729 heaps[0] = &ready_heap ; /* fixed-rate queues */ 730 heaps[1] = &wfq_ready_heap ; /* wfq queues */ 731 heaps[2] = &extract_heap ; /* delay line */ 732 s = splimp(); /* see note on top, splnet() is not enough */ 733 curr_time++ ; 734 for (i=0; i < 3 ; i++) { 735 h = heaps[i]; 736 while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) { 737 DDB(if (h->p[0].key > curr_time) 738 printf("-- dummynet: warning, heap %d is %d ticks late\n", 739 i, (int)(curr_time - h->p[0].key));) 740 p = h->p[0].object ; /* store a copy before heap_extract */ 741 heap_extract(h, NULL); /* need to extract before processing */ 742 if (i == 0) 743 ready_event(p) ; 744 else if (i == 1) { 745 struct dn_pipe *pipe = p; 746 if (pipe->if_name[0] != '\0') 747 printf("*** bad ready_event_wfq for pipe %s\n", 748 pipe->if_name); 749 else 750 ready_event_wfq(p) ; 751 } else 752 transmit_event(p); 753 } 754 } 755 /* sweep pipes trying to expire idle flow_queues */ 756 for (pe = all_pipes; pe ; pe = pe->next ) 757 if (pe->idle_heap.elements > 0 && 758 DN_KEY_LT(pe->idle_heap.p[0].key, pe->V) ) { 759 struct dn_flow_queue *q = pe->idle_heap.p[0].object ; 760 761 heap_extract(&(pe->idle_heap), NULL); 762 q->S = q->F + 1 ; /* mark timestamp as invalid */ 763 pe->sum -= q->fs->weight ; 764 } 765 splx(s); 766 dn_timeout = timeout(dummynet, NULL, 1); 767 } 768 769 /* 770 * called by an interface when tx_rdy occurs. 771 */ 772 int 773 if_tx_rdy(struct ifnet *ifp) 774 { 775 struct dn_pipe *p; 776 777 for (p = all_pipes; p ; p = p->next ) 778 if (p->ifp == ifp) 779 break ; 780 if (p == NULL) { 781 for (p = all_pipes; p ; p = p->next ) 782 if (!strcmp(p->if_name, ifp->if_xname) ) { 783 p->ifp = ifp ; 784 DEB(printf("++ tx rdy from %s (now found)\n", ifp->if_xname);) 785 break ; 786 } 787 } 788 if (p != NULL) { 789 DEB(printf("++ tx rdy from %s - qlen %d\n", ifp->if_xname, 790 ifp->if_snd.ifq_len);) 791 p->numbytes = 0 ; /* mark ready for I/O */ 792 ready_event_wfq(p); 793 } 794 return 0; 795 } 796 797 /* 798 * Unconditionally expire empty queues in case of shortage. 799 * Returns the number of queues freed. 800 */ 801 static int 802 expire_queues(struct dn_flow_set *fs) 803 { 804 struct dn_flow_queue *q, *prev ; 805 int i, initial_elements = fs->rq_elements ; 806 807 if (fs->last_expired == time_second) 808 return 0 ; 809 fs->last_expired = time_second ; 810 for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */ 811 for (prev=NULL, q = fs->rq[i] ; q != NULL ; ) 812 if (q->head != NULL || q->S != q->F+1) { 813 prev = q ; 814 q = q->next ; 815 } else { /* entry is idle, expire it */ 816 struct dn_flow_queue *old_q = q ; 817 818 if (prev != NULL) 819 prev->next = q = q->next ; 820 else 821 fs->rq[i] = q = q->next ; 822 fs->rq_elements-- ; 823 free(old_q, M_DUMMYNET); 824 } 825 return initial_elements - fs->rq_elements ; 826 } 827 828 /* 829 * If room, create a new queue and put at head of slot i; 830 * otherwise, create or use the default queue. 831 */ 832 static struct dn_flow_queue * 833 create_queue(struct dn_flow_set *fs, int i) 834 { 835 struct dn_flow_queue *q ; 836 837 if (fs->rq_elements > fs->rq_size * dn_max_ratio && 838 expire_queues(fs) == 0) { 839 /* 840 * No way to get room, use or create overflow queue. 841 */ 842 i = fs->rq_size ; 843 if ( fs->rq[i] != NULL ) 844 return fs->rq[i] ; 845 } 846 q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO); 847 if (q == NULL) { 848 printf("sorry, cannot allocate queue for new flow\n"); 849 return NULL ; 850 } 851 q->fs = fs ; 852 q->hash_slot = i ; 853 q->next = fs->rq[i] ; 854 q->S = q->F + 1; /* hack - mark timestamp as invalid */ 855 fs->rq[i] = q ; 856 fs->rq_elements++ ; 857 return q ; 858 } 859 860 /* 861 * Given a flow_set and a pkt in last_pkt, find a matching queue 862 * after appropriate masking. The queue is moved to front 863 * so that further searches take less time. 864 */ 865 static struct dn_flow_queue * 866 find_queue(struct dn_flow_set *fs, struct ipfw_flow_id *id) 867 { 868 int i = 0 ; /* we need i and q for new allocations */ 869 struct dn_flow_queue *q, *prev; 870 871 if ( !(fs->flags_fs & DN_HAVE_FLOW_MASK) ) 872 q = fs->rq[0] ; 873 else { 874 /* first, do the masking */ 875 id->dst_ip &= fs->flow_mask.dst_ip ; 876 id->src_ip &= fs->flow_mask.src_ip ; 877 id->dst_port &= fs->flow_mask.dst_port ; 878 id->src_port &= fs->flow_mask.src_port ; 879 id->proto &= fs->flow_mask.proto ; 880 id->flags = 0 ; /* we don't care about this one */ 881 /* then, hash function */ 882 i = ( (id->dst_ip) & 0xffff ) ^ 883 ( (id->dst_ip >> 15) & 0xffff ) ^ 884 ( (id->src_ip << 1) & 0xffff ) ^ 885 ( (id->src_ip >> 16 ) & 0xffff ) ^ 886 (id->dst_port << 1) ^ (id->src_port) ^ 887 (id->proto ); 888 i = i % fs->rq_size ; 889 /* finally, scan the current list for a match */ 890 searches++ ; 891 for (prev=NULL, q = fs->rq[i] ; q ; ) { 892 search_steps++; 893 if (id->dst_ip == q->id.dst_ip && 894 id->src_ip == q->id.src_ip && 895 id->dst_port == q->id.dst_port && 896 id->src_port == q->id.src_port && 897 id->proto == q->id.proto && 898 id->flags == q->id.flags) 899 break ; /* found */ 900 else if (pipe_expire && q->head == NULL && q->S == q->F+1 ) { 901 /* entry is idle and not in any heap, expire it */ 902 struct dn_flow_queue *old_q = q ; 903 904 if (prev != NULL) 905 prev->next = q = q->next ; 906 else 907 fs->rq[i] = q = q->next ; 908 fs->rq_elements-- ; 909 free(old_q, M_DUMMYNET); 910 continue ; 911 } 912 prev = q ; 913 q = q->next ; 914 } 915 if (q && prev != NULL) { /* found and not in front */ 916 prev->next = q->next ; 917 q->next = fs->rq[i] ; 918 fs->rq[i] = q ; 919 } 920 } 921 if (q == NULL) { /* no match, need to allocate a new entry */ 922 q = create_queue(fs, i); 923 if (q != NULL) 924 q->id = *id ; 925 } 926 return q ; 927 } 928 929 static int 930 red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len) 931 { 932 /* 933 * RED algorithm 934 * 935 * RED calculates the average queue size (avg) using a low-pass filter 936 * with an exponential weighted (w_q) moving average: 937 * avg <- (1-w_q) * avg + w_q * q_size 938 * where q_size is the queue length (measured in bytes or * packets). 939 * 940 * If q_size == 0, we compute the idle time for the link, and set 941 * avg = (1 - w_q)^(idle/s) 942 * where s is the time needed for transmitting a medium-sized packet. 943 * 944 * Now, if avg < min_th the packet is enqueued. 945 * If avg > max_th the packet is dropped. Otherwise, the packet is 946 * dropped with probability P function of avg. 947 * 948 */ 949 950 int64_t p_b = 0; 951 /* queue in bytes or packets ? */ 952 u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ? q->len_bytes : q->len; 953 954 DEB(printf("\n%d q: %2u ", (int) curr_time, q_size);) 955 956 /* average queue size estimation */ 957 if (q_size != 0) { 958 /* 959 * queue is not empty, avg <- avg + (q_size - avg) * w_q 960 */ 961 int diff = SCALE(q_size) - q->avg; 962 int64_t v = SCALE_MUL((int64_t) diff, (int64_t) fs->w_q); 963 964 q->avg += (int) v; 965 } else { 966 /* 967 * queue is empty, find for how long the queue has been 968 * empty and use a lookup table for computing 969 * (1 - * w_q)^(idle_time/s) where s is the time to send a 970 * (small) packet. 971 * XXX check wraps... 972 */ 973 if (q->avg) { 974 u_int t = (curr_time - q->q_time) / fs->lookup_step; 975 976 q->avg = (t < fs->lookup_depth) ? 977 SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0; 978 } 979 } 980 DEB(printf("avg: %u ", SCALE_VAL(q->avg));) 981 982 /* should i drop ? */ 983 984 if (q->avg < fs->min_th) { 985 q->count = -1; 986 return 0; /* accept packet ; */ 987 } 988 if (q->avg >= fs->max_th) { /* average queue >= max threshold */ 989 if (fs->flags_fs & DN_IS_GENTLE_RED) { 990 /* 991 * According to Gentle-RED, if avg is greater than max_th the 992 * packet is dropped with a probability 993 * p_b = c_3 * avg - c_4 994 * where c_3 = (1 - max_p) / max_th, and c_4 = 1 - 2 * max_p 995 */ 996 p_b = SCALE_MUL((int64_t) fs->c_3, (int64_t) q->avg) - fs->c_4; 997 } else { 998 q->count = -1; 999 printf("- drop"); 1000 return 1 ; 1001 } 1002 } else if (q->avg > fs->min_th) { 1003 /* 1004 * we compute p_b using the linear dropping function p_b = c_1 * 1005 * avg - c_2, where c_1 = max_p / (max_th - min_th), and c_2 = 1006 * max_p * min_th / (max_th - min_th) 1007 */ 1008 p_b = SCALE_MUL((int64_t) fs->c_1, (int64_t) q->avg) - fs->c_2; 1009 } 1010 if (fs->flags_fs & DN_QSIZE_IS_BYTES) 1011 p_b = (p_b * len) / fs->max_pkt_size; 1012 if (++q->count == 0) 1013 q->random = random() & 0xffff; 1014 else { 1015 /* 1016 * q->count counts packets arrived since last drop, so a greater 1017 * value of q->count means a greater packet drop probability. 1018 */ 1019 if (SCALE_MUL(p_b, SCALE((int64_t) q->count)) > q->random) { 1020 q->count = 0; 1021 DEB(printf("- red drop");) 1022 /* after a drop we calculate a new random value */ 1023 q->random = random() & 0xffff; 1024 return 1; /* drop */ 1025 } 1026 } 1027 /* end of RED algorithm */ 1028 return 0 ; /* accept */ 1029 } 1030 1031 static __inline 1032 struct dn_flow_set * 1033 locate_flowset(int pipe_nr, struct ip_fw *rule) 1034 { 1035 #if IPFW2 1036 struct dn_flow_set *fs; 1037 ipfw_insn *cmd = rule->cmd + rule->act_ofs; 1038 1039 if (cmd->opcode == O_LOG) 1040 cmd += F_LEN(cmd); 1041 fs = ((ipfw_insn_pipe *)cmd)->pipe_ptr; 1042 1043 if (fs != NULL) 1044 return fs; 1045 1046 if (cmd->opcode == O_QUEUE) 1047 #else /* !IPFW2 */ 1048 struct dn_flow_set *fs = NULL ; 1049 1050 if ( (rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_QUEUE ) 1051 #endif /* !IPFW2 */ 1052 for (fs=all_flow_sets; fs && fs->fs_nr != pipe_nr; fs=fs->next) 1053 ; 1054 else { 1055 struct dn_pipe *p1; 1056 for (p1 = all_pipes; p1 && p1->pipe_nr != pipe_nr; p1 = p1->next) 1057 ; 1058 if (p1 != NULL) 1059 fs = &(p1->fs) ; 1060 } 1061 /* record for the future */ 1062 #if IPFW2 1063 ((ipfw_insn_pipe *)cmd)->pipe_ptr = fs; 1064 #else 1065 if (fs != NULL) 1066 rule->pipe_ptr = fs; 1067 #endif 1068 return fs ; 1069 } 1070 1071 /* 1072 * dummynet hook for packets. Below 'pipe' is a pipe or a queue 1073 * depending on whether WF2Q or fixed bw is used. 1074 * 1075 * pipe_nr pipe or queue the packet is destined for. 1076 * dir where shall we send the packet after dummynet. 1077 * m the mbuf with the packet 1078 * ifp the 'ifp' parameter from the caller. 1079 * NULL in ip_input, destination interface in ip_output, 1080 * real_dst in bdg_forward 1081 * ro route parameter (only used in ip_output, NULL otherwise) 1082 * dst destination address, only used by ip_output 1083 * rule matching rule, in case of multiple passes 1084 * flags flags from the caller, only used in ip_output 1085 * 1086 */ 1087 static int 1088 dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) 1089 { 1090 struct dn_pkt *pkt; 1091 struct dn_flow_set *fs; 1092 struct dn_pipe *pipe ; 1093 u_int64_t len = m->m_pkthdr.len ; 1094 struct dn_flow_queue *q = NULL ; 1095 int s = splimp(); 1096 int is_pipe; 1097 #if IPFW2 1098 ipfw_insn *cmd = fwa->rule->cmd + fwa->rule->act_ofs; 1099 1100 if (cmd->opcode == O_LOG) 1101 cmd += F_LEN(cmd); 1102 is_pipe = (cmd->opcode == O_PIPE); 1103 #else 1104 is_pipe = (fwa->rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_PIPE; 1105 #endif 1106 1107 pipe_nr &= 0xffff ; 1108 1109 /* 1110 * this is a dummynet rule, so we expect a O_PIPE or O_QUEUE rule 1111 */ 1112 fs = locate_flowset(pipe_nr, fwa->rule); 1113 if (fs == NULL) 1114 goto dropit ; /* this queue/pipe does not exist! */ 1115 pipe = fs->pipe ; 1116 if (pipe == NULL) { /* must be a queue, try find a matching pipe */ 1117 for (pipe = all_pipes; pipe && pipe->pipe_nr != fs->parent_nr; 1118 pipe = pipe->next) 1119 ; 1120 if (pipe != NULL) 1121 fs->pipe = pipe ; 1122 else { 1123 printf("No pipe %d for queue %d, drop pkt\n", 1124 fs->parent_nr, fs->fs_nr); 1125 goto dropit ; 1126 } 1127 } 1128 q = find_queue(fs, &(fwa->f_id)); 1129 if ( q == NULL ) 1130 goto dropit ; /* cannot allocate queue */ 1131 /* 1132 * update statistics, then check reasons to drop pkt 1133 */ 1134 q->tot_bytes += len ; 1135 q->tot_pkts++ ; 1136 if ( fs->plr && random() < fs->plr ) 1137 goto dropit ; /* random pkt drop */ 1138 if ( fs->flags_fs & DN_QSIZE_IS_BYTES) { 1139 if (q->len_bytes > fs->qsize) 1140 goto dropit ; /* queue size overflow */ 1141 } else { 1142 if (q->len >= fs->qsize) 1143 goto dropit ; /* queue count overflow */ 1144 } 1145 if ( fs->flags_fs & DN_IS_RED && red_drops(fs, q, len) ) 1146 goto dropit ; 1147 1148 /* XXX expensive to zero, see if we can remove it*/ 1149 pkt = (struct dn_pkt *)malloc(sizeof (*pkt), M_DUMMYNET, M_NOWAIT|M_ZERO); 1150 if ( pkt == NULL ) 1151 goto dropit ; /* cannot allocate packet header */ 1152 /* ok, i can handle the pkt now... */ 1153 /* build and enqueue packet + parameters */ 1154 pkt->hdr.mh_type = MT_TAG; 1155 pkt->hdr.mh_flags = PACKET_TAG_DUMMYNET; 1156 pkt->rule = fwa->rule ; 1157 DN_NEXT(pkt) = NULL; 1158 pkt->dn_m = m; 1159 pkt->dn_dir = dir ; 1160 1161 pkt->ifp = fwa->oif; 1162 if (dir == DN_TO_IP_OUT) { 1163 /* 1164 * We need to copy *ro because for ICMP pkts (and maybe others) 1165 * the caller passed a pointer into the stack; dst might also be 1166 * a pointer into *ro so it needs to be updated. 1167 */ 1168 pkt->ro = *(fwa->ro); 1169 if (fwa->ro->ro_rt) 1170 fwa->ro->ro_rt->rt_refcnt++ ; 1171 if (fwa->dst == (struct sockaddr_in *)&fwa->ro->ro_dst) /* dst points into ro */ 1172 fwa->dst = (struct sockaddr_in *)&(pkt->ro.ro_dst) ; 1173 1174 pkt->dn_dst = fwa->dst; 1175 pkt->flags = fwa->flags; 1176 } 1177 if (q->head == NULL) 1178 q->head = pkt; 1179 else 1180 DN_NEXT(q->tail) = pkt; 1181 q->tail = pkt; 1182 q->len++; 1183 q->len_bytes += len ; 1184 1185 if ( q->head != pkt ) /* flow was not idle, we are done */ 1186 goto done; 1187 /* 1188 * If we reach this point the flow was previously idle, so we need 1189 * to schedule it. This involves different actions for fixed-rate or 1190 * WF2Q queues. 1191 */ 1192 if (is_pipe) { 1193 /* 1194 * Fixed-rate queue: just insert into the ready_heap. 1195 */ 1196 dn_key t = 0 ; 1197 if (pipe->bandwidth) 1198 t = SET_TICKS(pkt, q, pipe); 1199 q->sched_time = curr_time ; 1200 if (t == 0) /* must process it now */ 1201 ready_event( q ); 1202 else 1203 heap_insert(&ready_heap, curr_time + t , q ); 1204 } else { 1205 /* 1206 * WF2Q. First, compute start time S: if the flow was idle (S=F+1) 1207 * set S to the virtual time V for the controlling pipe, and update 1208 * the sum of weights for the pipe; otherwise, remove flow from 1209 * idle_heap and set S to max(F,V). 1210 * Second, compute finish time F = S + len/weight. 1211 * Third, if pipe was idle, update V=max(S, V). 1212 * Fourth, count one more backlogged flow. 1213 */ 1214 if (DN_KEY_GT(q->S, q->F)) { /* means timestamps are invalid */ 1215 q->S = pipe->V ; 1216 pipe->sum += fs->weight ; /* add weight of new queue */ 1217 } else { 1218 heap_extract(&(pipe->idle_heap), q); 1219 q->S = MAX64(q->F, pipe->V ) ; 1220 } 1221 q->F = q->S + ( len<<MY_M )/(u_int64_t) fs->weight; 1222 1223 if (pipe->not_eligible_heap.elements == 0 && 1224 pipe->scheduler_heap.elements == 0) 1225 pipe->V = MAX64 ( q->S, pipe->V ); 1226 fs->backlogged++ ; 1227 /* 1228 * Look at eligibility. A flow is not eligibile if S>V (when 1229 * this happens, it means that there is some other flow already 1230 * scheduled for the same pipe, so the scheduler_heap cannot be 1231 * empty). If the flow is not eligible we just store it in the 1232 * not_eligible_heap. Otherwise, we store in the scheduler_heap 1233 * and possibly invoke ready_event_wfq() right now if there is 1234 * leftover credit. 1235 * Note that for all flows in scheduler_heap (SCH), S_i <= V, 1236 * and for all flows in not_eligible_heap (NEH), S_i > V . 1237 * So when we need to compute max( V, min(S_i) ) forall i in SCH+NEH, 1238 * we only need to look into NEH. 1239 */ 1240 if (DN_KEY_GT(q->S, pipe->V) ) { /* not eligible */ 1241 if (pipe->scheduler_heap.elements == 0) 1242 printf("++ ouch! not eligible but empty scheduler!\n"); 1243 heap_insert(&(pipe->not_eligible_heap), q->S, q); 1244 } else { 1245 heap_insert(&(pipe->scheduler_heap), q->F, q); 1246 if (pipe->numbytes >= 0) { /* pipe is idle */ 1247 if (pipe->scheduler_heap.elements != 1) 1248 printf("*** OUCH! pipe should have been idle!\n"); 1249 DEB(printf("Waking up pipe %d at %d\n", 1250 pipe->pipe_nr, (int)(q->F >> MY_M)); ) 1251 pipe->sched_time = curr_time ; 1252 ready_event_wfq(pipe); 1253 } 1254 } 1255 } 1256 done: 1257 splx(s); 1258 return 0; 1259 1260 dropit: 1261 splx(s); 1262 if (q) 1263 q->drops++ ; 1264 m_freem(m); 1265 return ( (fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS); 1266 } 1267 1268 /* 1269 * Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT) 1270 * Doing this would probably save us the initial bzero of dn_pkt 1271 */ 1272 #define DN_FREE_PKT(pkt) { \ 1273 struct dn_pkt *n = pkt ; \ 1274 rt_unref ( n->ro.ro_rt ) ; \ 1275 m_freem(n->dn_m); \ 1276 pkt = DN_NEXT(n) ; \ 1277 free(n, M_DUMMYNET) ; } 1278 1279 /* 1280 * Dispose all packets and flow_queues on a flow_set. 1281 * If all=1, also remove red lookup table and other storage, 1282 * including the descriptor itself. 1283 * For the one in dn_pipe MUST also cleanup ready_heap... 1284 */ 1285 static void 1286 purge_flow_set(struct dn_flow_set *fs, int all) 1287 { 1288 struct dn_pkt *pkt ; 1289 struct dn_flow_queue *q, *qn ; 1290 int i ; 1291 1292 for (i = 0 ; i <= fs->rq_size ; i++ ) { 1293 for (q = fs->rq[i] ; q ; q = qn ) { 1294 for (pkt = q->head ; pkt ; ) 1295 DN_FREE_PKT(pkt) ; 1296 qn = q->next ; 1297 free(q, M_DUMMYNET); 1298 } 1299 fs->rq[i] = NULL ; 1300 } 1301 fs->rq_elements = 0 ; 1302 if (all) { 1303 /* RED - free lookup table */ 1304 if (fs->w_q_lookup) 1305 free(fs->w_q_lookup, M_DUMMYNET); 1306 if (fs->rq) 1307 free(fs->rq, M_DUMMYNET); 1308 /* if this fs is not part of a pipe, free it */ 1309 if (fs->pipe && fs != &(fs->pipe->fs) ) 1310 free(fs, M_DUMMYNET); 1311 } 1312 } 1313 1314 /* 1315 * Dispose all packets queued on a pipe (not a flow_set). 1316 * Also free all resources associated to a pipe, which is about 1317 * to be deleted. 1318 */ 1319 static void 1320 purge_pipe(struct dn_pipe *pipe) 1321 { 1322 struct dn_pkt *pkt ; 1323 1324 purge_flow_set( &(pipe->fs), 1 ); 1325 1326 for (pkt = pipe->head ; pkt ; ) 1327 DN_FREE_PKT(pkt) ; 1328 1329 heap_free( &(pipe->scheduler_heap) ); 1330 heap_free( &(pipe->not_eligible_heap) ); 1331 heap_free( &(pipe->idle_heap) ); 1332 } 1333 1334 /* 1335 * Delete all pipes and heaps returning memory. Must also 1336 * remove references from all ipfw rules to all pipes. 1337 */ 1338 static void 1339 dummynet_flush() 1340 { 1341 struct dn_pipe *curr_p, *p ; 1342 struct dn_flow_set *fs, *curr_fs; 1343 int s ; 1344 1345 s = splimp() ; 1346 1347 /* remove all references to pipes ...*/ 1348 flush_pipe_ptrs(NULL); 1349 /* prevent future matches... */ 1350 p = all_pipes ; 1351 all_pipes = NULL ; 1352 fs = all_flow_sets ; 1353 all_flow_sets = NULL ; 1354 /* and free heaps so we don't have unwanted events */ 1355 heap_free(&ready_heap); 1356 heap_free(&wfq_ready_heap); 1357 heap_free(&extract_heap); 1358 splx(s) ; 1359 /* 1360 * Now purge all queued pkts and delete all pipes 1361 */ 1362 /* scan and purge all flow_sets. */ 1363 for ( ; fs ; ) { 1364 curr_fs = fs ; 1365 fs = fs->next ; 1366 purge_flow_set(curr_fs, 1); 1367 } 1368 for ( ; p ; ) { 1369 purge_pipe(p); 1370 curr_p = p ; 1371 p = p->next ; 1372 free(curr_p, M_DUMMYNET); 1373 } 1374 } 1375 1376 1377 extern struct ip_fw *ip_fw_default_rule ; 1378 static void 1379 dn_rule_delete_fs(struct dn_flow_set *fs, void *r) 1380 { 1381 int i ; 1382 struct dn_flow_queue *q ; 1383 struct dn_pkt *pkt ; 1384 1385 for (i = 0 ; i <= fs->rq_size ; i++) /* last one is ovflow */ 1386 for (q = fs->rq[i] ; q ; q = q->next ) 1387 for (pkt = q->head ; pkt ; pkt = DN_NEXT(pkt) ) 1388 if (pkt->rule == r) 1389 pkt->rule = ip_fw_default_rule ; 1390 } 1391 /* 1392 * when a firewall rule is deleted, scan all queues and remove the flow-id 1393 * from packets matching this rule. 1394 */ 1395 void 1396 dn_rule_delete(void *r) 1397 { 1398 struct dn_pipe *p ; 1399 struct dn_pkt *pkt ; 1400 struct dn_flow_set *fs ; 1401 1402 /* 1403 * If the rule references a queue (dn_flow_set), then scan 1404 * the flow set, otherwise scan pipes. Should do either, but doing 1405 * both does not harm. 1406 */ 1407 for ( fs = all_flow_sets ; fs ; fs = fs->next ) 1408 dn_rule_delete_fs(fs, r); 1409 for ( p = all_pipes ; p ; p = p->next ) { 1410 fs = &(p->fs) ; 1411 dn_rule_delete_fs(fs, r); 1412 for (pkt = p->head ; pkt ; pkt = DN_NEXT(pkt) ) 1413 if (pkt->rule == r) 1414 pkt->rule = ip_fw_default_rule ; 1415 } 1416 } 1417 1418 /* 1419 * setup RED parameters 1420 */ 1421 static int 1422 config_red(struct dn_flow_set *p, struct dn_flow_set * x) 1423 { 1424 int i; 1425 1426 x->w_q = p->w_q; 1427 x->min_th = SCALE(p->min_th); 1428 x->max_th = SCALE(p->max_th); 1429 x->max_p = p->max_p; 1430 1431 x->c_1 = p->max_p / (p->max_th - p->min_th); 1432 x->c_2 = SCALE_MUL(x->c_1, SCALE(p->min_th)); 1433 if (x->flags_fs & DN_IS_GENTLE_RED) { 1434 x->c_3 = (SCALE(1) - p->max_p) / p->max_th; 1435 x->c_4 = (SCALE(1) - 2 * p->max_p); 1436 } 1437 1438 /* if the lookup table already exist, free and create it again */ 1439 if (x->w_q_lookup) { 1440 free(x->w_q_lookup, M_DUMMYNET); 1441 x->w_q_lookup = NULL ; 1442 } 1443 if (red_lookup_depth == 0) { 1444 printf("\nnet.inet.ip.dummynet.red_lookup_depth must be > 0"); 1445 free(x, M_DUMMYNET); 1446 return EINVAL; 1447 } 1448 x->lookup_depth = red_lookup_depth; 1449 x->w_q_lookup = (u_int *) malloc(x->lookup_depth * sizeof(int), 1450 M_DUMMYNET, M_NOWAIT); 1451 if (x->w_q_lookup == NULL) { 1452 printf("sorry, cannot allocate red lookup table\n"); 1453 free(x, M_DUMMYNET); 1454 return ENOSPC; 1455 } 1456 1457 /* fill the lookup table with (1 - w_q)^x */ 1458 x->lookup_step = p->lookup_step ; 1459 x->lookup_weight = p->lookup_weight ; 1460 x->w_q_lookup[0] = SCALE(1) - x->w_q; 1461 for (i = 1; i < x->lookup_depth; i++) 1462 x->w_q_lookup[i] = SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight); 1463 if (red_avg_pkt_size < 1) 1464 red_avg_pkt_size = 512 ; 1465 x->avg_pkt_size = red_avg_pkt_size ; 1466 if (red_max_pkt_size < 1) 1467 red_max_pkt_size = 1500 ; 1468 x->max_pkt_size = red_max_pkt_size ; 1469 return 0 ; 1470 } 1471 1472 static int 1473 alloc_hash(struct dn_flow_set *x, struct dn_flow_set *pfs) 1474 { 1475 if (x->flags_fs & DN_HAVE_FLOW_MASK) { /* allocate some slots */ 1476 int l = pfs->rq_size; 1477 1478 if (l == 0) 1479 l = dn_hash_size; 1480 if (l < 4) 1481 l = 4; 1482 else if (l > DN_MAX_HASH_SIZE) 1483 l = DN_MAX_HASH_SIZE; 1484 x->rq_size = l; 1485 } else /* one is enough for null mask */ 1486 x->rq_size = 1; 1487 x->rq = malloc((1 + x->rq_size) * sizeof(struct dn_flow_queue *), 1488 M_DUMMYNET, M_NOWAIT | M_ZERO); 1489 if (x->rq == NULL) { 1490 printf("sorry, cannot allocate queue\n"); 1491 return ENOSPC; 1492 } 1493 x->rq_elements = 0; 1494 return 0 ; 1495 } 1496 1497 static void 1498 set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src) 1499 { 1500 x->flags_fs = src->flags_fs; 1501 x->qsize = src->qsize; 1502 x->plr = src->plr; 1503 x->flow_mask = src->flow_mask; 1504 if (x->flags_fs & DN_QSIZE_IS_BYTES) { 1505 if (x->qsize > 1024*1024) 1506 x->qsize = 1024*1024 ; 1507 } else { 1508 if (x->qsize == 0) 1509 x->qsize = 50 ; 1510 if (x->qsize > 100) 1511 x->qsize = 50 ; 1512 } 1513 /* configuring RED */ 1514 if ( x->flags_fs & DN_IS_RED ) 1515 config_red(src, x) ; /* XXX should check errors */ 1516 } 1517 1518 /* 1519 * setup pipe or queue parameters. 1520 */ 1521 1522 static int 1523 config_pipe(struct dn_pipe *p) 1524 { 1525 int i, s; 1526 struct dn_flow_set *pfs = &(p->fs); 1527 struct dn_flow_queue *q; 1528 1529 /* 1530 * The config program passes parameters as follows: 1531 * bw = bits/second (0 means no limits), 1532 * delay = ms, must be translated into ticks. 1533 * qsize = slots/bytes 1534 */ 1535 p->delay = ( p->delay * hz ) / 1000 ; 1536 /* We need either a pipe number or a flow_set number */ 1537 if (p->pipe_nr == 0 && pfs->fs_nr == 0) 1538 return EINVAL ; 1539 if (p->pipe_nr != 0 && pfs->fs_nr != 0) 1540 return EINVAL ; 1541 if (p->pipe_nr != 0) { /* this is a pipe */ 1542 struct dn_pipe *x, *a, *b; 1543 /* locate pipe */ 1544 for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ; 1545 a = b , b = b->next) ; 1546 1547 if (b == NULL || b->pipe_nr != p->pipe_nr) { /* new pipe */ 1548 x = malloc(sizeof(struct dn_pipe), M_DUMMYNET, M_NOWAIT | M_ZERO); 1549 if (x == NULL) { 1550 printf("ip_dummynet.c: no memory for new pipe\n"); 1551 return ENOSPC; 1552 } 1553 x->pipe_nr = p->pipe_nr; 1554 x->fs.pipe = x ; 1555 /* idle_heap is the only one from which we extract from the middle. 1556 */ 1557 x->idle_heap.size = x->idle_heap.elements = 0 ; 1558 x->idle_heap.offset=OFFSET_OF(struct dn_flow_queue, heap_pos); 1559 } else { 1560 x = b; 1561 s = splimp(); 1562 /* Flush accumulated credit for all queues */ 1563 for (i = 0; i <= x->fs.rq_size; i++) 1564 for (q = x->fs.rq[i]; q; q = q->next) 1565 q->numbytes = 0; 1566 splx(s); 1567 } 1568 1569 s = splimp(); 1570 x->bandwidth = p->bandwidth ; 1571 x->numbytes = 0; /* just in case... */ 1572 bcopy(p->if_name, x->if_name, sizeof(p->if_name) ); 1573 x->ifp = NULL ; /* reset interface ptr */ 1574 x->delay = p->delay ; 1575 set_fs_parms(&(x->fs), pfs); 1576 1577 1578 if ( x->fs.rq == NULL ) { /* a new pipe */ 1579 s = alloc_hash(&(x->fs), pfs) ; 1580 if (s) { 1581 free(x, M_DUMMYNET); 1582 return s ; 1583 } 1584 x->next = b ; 1585 if (a == NULL) 1586 all_pipes = x ; 1587 else 1588 a->next = x ; 1589 } 1590 splx(s); 1591 } else { /* config queue */ 1592 struct dn_flow_set *x, *a, *b ; 1593 1594 /* locate flow_set */ 1595 for (a=NULL, b=all_flow_sets ; b && b->fs_nr < pfs->fs_nr ; 1596 a = b , b = b->next) ; 1597 1598 if (b == NULL || b->fs_nr != pfs->fs_nr) { /* new */ 1599 if (pfs->parent_nr == 0) /* need link to a pipe */ 1600 return EINVAL ; 1601 x = malloc(sizeof(struct dn_flow_set), M_DUMMYNET, M_NOWAIT|M_ZERO); 1602 if (x == NULL) { 1603 printf("ip_dummynet.c: no memory for new flow_set\n"); 1604 return ENOSPC; 1605 } 1606 x->fs_nr = pfs->fs_nr; 1607 x->parent_nr = pfs->parent_nr; 1608 x->weight = pfs->weight ; 1609 if (x->weight == 0) 1610 x->weight = 1 ; 1611 else if (x->weight > 100) 1612 x->weight = 100 ; 1613 } else { 1614 /* Change parent pipe not allowed; must delete and recreate */ 1615 if (pfs->parent_nr != 0 && b->parent_nr != pfs->parent_nr) 1616 return EINVAL ; 1617 x = b; 1618 } 1619 s = splimp(); 1620 set_fs_parms(x, pfs); 1621 1622 if ( x->rq == NULL ) { /* a new flow_set */ 1623 s = alloc_hash(x, pfs) ; 1624 if (s) { 1625 free(x, M_DUMMYNET); 1626 return s ; 1627 } 1628 x->next = b; 1629 if (a == NULL) 1630 all_flow_sets = x; 1631 else 1632 a->next = x; 1633 } 1634 splx(s); 1635 } 1636 return 0 ; 1637 } 1638 1639 /* 1640 * Helper function to remove from a heap queues which are linked to 1641 * a flow_set about to be deleted. 1642 */ 1643 static void 1644 fs_remove_from_heap(struct dn_heap *h, struct dn_flow_set *fs) 1645 { 1646 int i = 0, found = 0 ; 1647 for (; i < h->elements ;) 1648 if ( ((struct dn_flow_queue *)h->p[i].object)->fs == fs) { 1649 h->elements-- ; 1650 h->p[i] = h->p[h->elements] ; 1651 found++ ; 1652 } else 1653 i++ ; 1654 if (found) 1655 heapify(h); 1656 } 1657 1658 /* 1659 * helper function to remove a pipe from a heap (can be there at most once) 1660 */ 1661 static void 1662 pipe_remove_from_heap(struct dn_heap *h, struct dn_pipe *p) 1663 { 1664 if (h->elements > 0) { 1665 int i = 0 ; 1666 for (i=0; i < h->elements ; i++ ) { 1667 if (h->p[i].object == p) { /* found it */ 1668 h->elements-- ; 1669 h->p[i] = h->p[h->elements] ; 1670 heapify(h); 1671 break ; 1672 } 1673 } 1674 } 1675 } 1676 1677 /* 1678 * drain all queues. Called in case of severe mbuf shortage. 1679 */ 1680 void 1681 dummynet_drain() 1682 { 1683 struct dn_flow_set *fs; 1684 struct dn_pipe *p; 1685 struct dn_pkt *pkt; 1686 1687 heap_free(&ready_heap); 1688 heap_free(&wfq_ready_heap); 1689 heap_free(&extract_heap); 1690 /* remove all references to this pipe from flow_sets */ 1691 for (fs = all_flow_sets; fs; fs= fs->next ) 1692 purge_flow_set(fs, 0); 1693 1694 for (p = all_pipes; p; p= p->next ) { 1695 purge_flow_set(&(p->fs), 0); 1696 for (pkt = p->head ; pkt ; ) 1697 DN_FREE_PKT(pkt) ; 1698 p->head = p->tail = NULL ; 1699 } 1700 } 1701 1702 /* 1703 * Fully delete a pipe or a queue, cleaning up associated info. 1704 */ 1705 static int 1706 delete_pipe(struct dn_pipe *p) 1707 { 1708 int s ; 1709 1710 if (p->pipe_nr == 0 && p->fs.fs_nr == 0) 1711 return EINVAL ; 1712 if (p->pipe_nr != 0 && p->fs.fs_nr != 0) 1713 return EINVAL ; 1714 if (p->pipe_nr != 0) { /* this is an old-style pipe */ 1715 struct dn_pipe *a, *b; 1716 struct dn_flow_set *fs; 1717 1718 /* locate pipe */ 1719 for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ; 1720 a = b , b = b->next) ; 1721 if (b == NULL || (b->pipe_nr != p->pipe_nr) ) 1722 return EINVAL ; /* not found */ 1723 1724 s = splimp() ; 1725 1726 /* unlink from list of pipes */ 1727 if (a == NULL) 1728 all_pipes = b->next ; 1729 else 1730 a->next = b->next ; 1731 /* remove references to this pipe from the ip_fw rules. */ 1732 flush_pipe_ptrs(&(b->fs)); 1733 1734 /* remove all references to this pipe from flow_sets */ 1735 for (fs = all_flow_sets; fs; fs= fs->next ) 1736 if (fs->pipe == b) { 1737 printf("++ ref to pipe %d from fs %d\n", 1738 p->pipe_nr, fs->fs_nr); 1739 fs->pipe = NULL ; 1740 purge_flow_set(fs, 0); 1741 } 1742 fs_remove_from_heap(&ready_heap, &(b->fs)); 1743 purge_pipe(b); /* remove all data associated to this pipe */ 1744 /* remove reference to here from extract_heap and wfq_ready_heap */ 1745 pipe_remove_from_heap(&extract_heap, b); 1746 pipe_remove_from_heap(&wfq_ready_heap, b); 1747 splx(s); 1748 free(b, M_DUMMYNET); 1749 } else { /* this is a WF2Q queue (dn_flow_set) */ 1750 struct dn_flow_set *a, *b; 1751 1752 /* locate set */ 1753 for (a = NULL, b = all_flow_sets ; b && b->fs_nr < p->fs.fs_nr ; 1754 a = b , b = b->next) ; 1755 if (b == NULL || (b->fs_nr != p->fs.fs_nr) ) 1756 return EINVAL ; /* not found */ 1757 1758 s = splimp() ; 1759 if (a == NULL) 1760 all_flow_sets = b->next ; 1761 else 1762 a->next = b->next ; 1763 /* remove references to this flow_set from the ip_fw rules. */ 1764 flush_pipe_ptrs(b); 1765 1766 if (b->pipe != NULL) { 1767 /* Update total weight on parent pipe and cleanup parent heaps */ 1768 b->pipe->sum -= b->weight * b->backlogged ; 1769 fs_remove_from_heap(&(b->pipe->not_eligible_heap), b); 1770 fs_remove_from_heap(&(b->pipe->scheduler_heap), b); 1771 #if 1 /* XXX should i remove from idle_heap as well ? */ 1772 fs_remove_from_heap(&(b->pipe->idle_heap), b); 1773 #endif 1774 } 1775 purge_flow_set(b, 1); 1776 splx(s); 1777 } 1778 return 0 ; 1779 } 1780 1781 /* 1782 * helper function used to copy data from kernel in DUMMYNET_GET 1783 */ 1784 static char * 1785 dn_copy_set(struct dn_flow_set *set, char *bp) 1786 { 1787 int i, copied = 0 ; 1788 struct dn_flow_queue *q, *qp = (struct dn_flow_queue *)bp; 1789 1790 for (i = 0 ; i <= set->rq_size ; i++) 1791 for (q = set->rq[i] ; q ; q = q->next, qp++ ) { 1792 if (q->hash_slot != i) 1793 printf("++ at %d: wrong slot (have %d, " 1794 "should be %d)\n", copied, q->hash_slot, i); 1795 if (q->fs != set) 1796 printf("++ at %d: wrong fs ptr (have %p, should be %p)\n", 1797 i, q->fs, set); 1798 copied++ ; 1799 bcopy(q, qp, sizeof( *q ) ); 1800 /* cleanup pointers */ 1801 qp->next = NULL ; 1802 qp->head = qp->tail = NULL ; 1803 qp->fs = NULL ; 1804 } 1805 if (copied != set->rq_elements) 1806 printf("++ wrong count, have %d should be %d\n", 1807 copied, set->rq_elements); 1808 return (char *)qp ; 1809 } 1810 1811 static int 1812 dummynet_get(struct sockopt *sopt) 1813 { 1814 char *buf, *bp ; /* bp is the "copy-pointer" */ 1815 size_t size ; 1816 struct dn_flow_set *set ; 1817 struct dn_pipe *p ; 1818 int s, error=0 ; 1819 1820 s = splimp(); 1821 /* 1822 * compute size of data structures: list of pipes and flow_sets. 1823 */ 1824 for (p = all_pipes, size = 0 ; p ; p = p->next ) 1825 size += sizeof( *p ) + 1826 p->fs.rq_elements * sizeof(struct dn_flow_queue); 1827 for (set = all_flow_sets ; set ; set = set->next ) 1828 size += sizeof ( *set ) + 1829 set->rq_elements * sizeof(struct dn_flow_queue); 1830 buf = malloc(size, M_TEMP, M_NOWAIT); 1831 if (buf == 0) { 1832 splx(s); 1833 return ENOBUFS ; 1834 } 1835 for (p = all_pipes, bp = buf ; p ; p = p->next ) { 1836 struct dn_pipe *pipe_bp = (struct dn_pipe *)bp ; 1837 1838 /* 1839 * copy pipe descriptor into *bp, convert delay back to ms, 1840 * then copy the flow_set descriptor(s) one at a time. 1841 * After each flow_set, copy the queue descriptor it owns. 1842 */ 1843 bcopy(p, bp, sizeof( *p ) ); 1844 pipe_bp->delay = (pipe_bp->delay * 1000) / hz ; 1845 /* 1846 * XXX the following is a hack based on ->next being the 1847 * first field in dn_pipe and dn_flow_set. The correct 1848 * solution would be to move the dn_flow_set to the beginning 1849 * of struct dn_pipe. 1850 */ 1851 pipe_bp->next = (struct dn_pipe *)DN_IS_PIPE ; 1852 /* clean pointers */ 1853 pipe_bp->head = pipe_bp->tail = NULL ; 1854 pipe_bp->fs.next = NULL ; 1855 pipe_bp->fs.pipe = NULL ; 1856 pipe_bp->fs.rq = NULL ; 1857 1858 bp += sizeof( *p ) ; 1859 bp = dn_copy_set( &(p->fs), bp ); 1860 } 1861 for (set = all_flow_sets ; set ; set = set->next ) { 1862 struct dn_flow_set *fs_bp = (struct dn_flow_set *)bp ; 1863 bcopy(set, bp, sizeof( *set ) ); 1864 /* XXX same hack as above */ 1865 fs_bp->next = (struct dn_flow_set *)DN_IS_QUEUE ; 1866 fs_bp->pipe = NULL ; 1867 fs_bp->rq = NULL ; 1868 bp += sizeof( *set ) ; 1869 bp = dn_copy_set( set, bp ); 1870 } 1871 splx(s); 1872 error = sooptcopyout(sopt, buf, size); 1873 free(buf, M_TEMP); 1874 return error ; 1875 } 1876 1877 /* 1878 * Handler for the various dummynet socket options (get, flush, config, del) 1879 */ 1880 static int 1881 ip_dn_ctl(struct sockopt *sopt) 1882 { 1883 int error = 0 ; 1884 struct dn_pipe *p, tmp_pipe; 1885 1886 /* Disallow sets in really-really secure mode. */ 1887 if (sopt->sopt_dir == SOPT_SET) { 1888 #if defined(__FreeBSD__) && __FreeBSD_version >= 500034 1889 error = securelevel_ge(sopt->sopt_td->td_ucred, 3); 1890 if (error) 1891 return (error); 1892 #else 1893 if (securelevel >= 3) 1894 return (EPERM); 1895 #endif 1896 } 1897 1898 switch (sopt->sopt_name) { 1899 default : 1900 printf("ip_dn_ctl -- unknown option %d", sopt->sopt_name); 1901 return EINVAL ; 1902 1903 case IP_DUMMYNET_GET : 1904 error = dummynet_get(sopt); 1905 break ; 1906 1907 case IP_DUMMYNET_FLUSH : 1908 dummynet_flush() ; 1909 break ; 1910 1911 case IP_DUMMYNET_CONFIGURE : 1912 p = &tmp_pipe ; 1913 error = sooptcopyin(sopt, p, sizeof *p, sizeof *p); 1914 if (error) 1915 break ; 1916 error = config_pipe(p); 1917 break ; 1918 1919 case IP_DUMMYNET_DEL : /* remove a pipe or queue */ 1920 p = &tmp_pipe ; 1921 error = sooptcopyin(sopt, p, sizeof *p, sizeof *p); 1922 if (error) 1923 break ; 1924 1925 error = delete_pipe(p); 1926 break ; 1927 } 1928 return error ; 1929 } 1930 1931 static void 1932 ip_dn_init(void) 1933 { 1934 printf("DUMMYNET initialized (011031)\n"); 1935 all_pipes = NULL ; 1936 all_flow_sets = NULL ; 1937 ready_heap.size = ready_heap.elements = 0 ; 1938 ready_heap.offset = 0 ; 1939 1940 wfq_ready_heap.size = wfq_ready_heap.elements = 0 ; 1941 wfq_ready_heap.offset = 0 ; 1942 1943 extract_heap.size = extract_heap.elements = 0 ; 1944 extract_heap.offset = 0 ; 1945 ip_dn_ctl_ptr = ip_dn_ctl; 1946 ip_dn_io_ptr = dummynet_io; 1947 ip_dn_ruledel_ptr = dn_rule_delete; 1948 bzero(&dn_timeout, sizeof(struct callout_handle)); 1949 dn_timeout = timeout(dummynet, NULL, 1); 1950 } 1951 1952 static int 1953 dummynet_modevent(module_t mod, int type, void *data) 1954 { 1955 int s; 1956 switch (type) { 1957 case MOD_LOAD: 1958 s = splimp(); 1959 if (DUMMYNET_LOADED) { 1960 splx(s); 1961 printf("DUMMYNET already loaded\n"); 1962 return EEXIST ; 1963 } 1964 ip_dn_init(); 1965 splx(s); 1966 break; 1967 1968 case MOD_UNLOAD: 1969 #if !defined(KLD_MODULE) 1970 printf("dummynet statically compiled, cannot unload\n"); 1971 return EINVAL ; 1972 #else 1973 s = splimp(); 1974 untimeout(dummynet, NULL, dn_timeout); 1975 dummynet_flush(); 1976 ip_dn_ctl_ptr = NULL; 1977 ip_dn_io_ptr = NULL; 1978 ip_dn_ruledel_ptr = NULL; 1979 splx(s); 1980 #endif 1981 break ; 1982 default: 1983 break ; 1984 } 1985 return 0 ; 1986 } 1987 1988 static moduledata_t dummynet_mod = { 1989 "dummynet", 1990 dummynet_modevent, 1991 NULL 1992 }; 1993 DECLARE_MODULE(dummynet, dummynet_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 1994 MODULE_DEPEND(dummynet, ipfw, 1, 1, 1); 1995 MODULE_VERSION(dummynet, 1); 1996