1 /* 2 * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa 3 * Portions Copyright (c) 2000 Akamba Corp. 4 * All rights reserved 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.24.2.22 2003/05/13 09:31:06 maxim Exp $ 28 * $DragonFly: src/sys/net/dummynet/ip_dummynet.c,v 1.3 2003/08/07 21:17:24 dillon Exp $ 29 */ 30 31 #if !defined(KLD_MODULE) 32 #include "opt_ipfw.h" /* for IPFW2 definition */ 33 #endif 34 35 #define DEB(x) 36 #define DDB(x) x 37 38 /* 39 * This module implements IP dummynet, a bandwidth limiter/delay emulator 40 * used in conjunction with the ipfw package. 41 * Description of the data structures used is in ip_dummynet.h 42 * Here you mainly find the following blocks of code: 43 * + variable declarations; 44 * + heap management functions; 45 * + scheduler and dummynet functions; 46 * + configuration and initialization. 47 * 48 * NOTA BENE: critical sections are protected by splimp()/splx() 49 * pairs. One would think that splnet() is enough as for most of 50 * the netinet code, but it is not so because when used with 51 * bridging, dummynet is invoked at splimp(). 52 * 53 * Most important Changes: 54 * 55 * 011004: KLDable 56 * 010124: Fixed WF2Q behaviour 57 * 010122: Fixed spl protection. 58 * 000601: WF2Q support 59 * 000106: large rewrite, use heaps to handle very many pipes. 60 * 980513: initial release 61 * 62 * include files marked with XXX are probably not needed 63 */ 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/malloc.h> 68 #include <sys/mbuf.h> 69 #include <sys/kernel.h> 70 #include <sys/module.h> 71 #include <sys/proc.h> 72 #include <sys/socket.h> 73 #include <sys/socketvar.h> 74 #include <sys/time.h> 75 #include <sys/sysctl.h> 76 #include <net/if.h> 77 #include <net/route.h> 78 #include <netinet/in.h> 79 #include <netinet/in_systm.h> 80 #include <netinet/in_var.h> 81 #include <netinet/ip.h> 82 #include <net/ipfw/ip_fw.h> 83 #include "ip_dummynet.h" 84 #include <netinet/ip_var.h> 85 86 #include <netinet/if_ether.h> /* for struct arpcom */ 87 #include <net/bridge/bridge.h> 88 89 /* 90 * We keep a private variable for the simulation time, but we could 91 * probably use an existing one ("softticks" in sys/kern/kern_timer.c) 92 */ 93 static dn_key curr_time = 0 ; /* current simulation time */ 94 95 static int dn_hash_size = 64 ; /* default hash size */ 96 97 /* statistics on number of queue searches and search steps */ 98 static int searches, search_steps ; 99 static int pipe_expire = 1 ; /* expire queue if empty */ 100 static int dn_max_ratio = 16 ; /* max queues/buckets ratio */ 101 102 static int red_lookup_depth = 256; /* RED - default lookup table depth */ 103 static int red_avg_pkt_size = 512; /* RED - default medium packet size */ 104 static int red_max_pkt_size = 1500; /* RED - default max packet size */ 105 106 /* 107 * Three heaps contain queues and pipes that the scheduler handles: 108 * 109 * ready_heap contains all dn_flow_queue related to fixed-rate pipes. 110 * 111 * wfq_ready_heap contains the pipes associated with WF2Q flows 112 * 113 * extract_heap contains pipes associated with delay lines. 114 * 115 */ 116 117 MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap"); 118 119 static struct dn_heap ready_heap, extract_heap, wfq_ready_heap ; 120 121 static int heap_init(struct dn_heap *h, int size) ; 122 static int heap_insert (struct dn_heap *h, dn_key key1, void *p); 123 static void heap_extract(struct dn_heap *h, void *obj); 124 125 static void transmit_event(struct dn_pipe *pipe); 126 static void ready_event(struct dn_flow_queue *q); 127 128 static struct dn_pipe *all_pipes = NULL ; /* list of all pipes */ 129 static struct dn_flow_set *all_flow_sets = NULL ;/* list of all flow_sets */ 130 131 static struct callout_handle dn_timeout; 132 133 #ifdef SYSCTL_NODE 134 SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, 135 CTLFLAG_RW, 0, "Dummynet"); 136 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size, 137 CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size"); 138 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, curr_time, 139 CTLFLAG_RD, &curr_time, 0, "Current tick"); 140 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap, 141 CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap"); 142 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap, 143 CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap"); 144 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, searches, 145 CTLFLAG_RD, &searches, 0, "Number of queue searches"); 146 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, search_steps, 147 CTLFLAG_RD, &search_steps, 0, "Number of queue search steps"); 148 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire, 149 CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty"); 150 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len, 151 CTLFLAG_RW, &dn_max_ratio, 0, 152 "Max ratio between dynamic queues and buckets"); 153 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth, 154 CTLFLAG_RD, &red_lookup_depth, 0, "Depth of RED lookup table"); 155 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size, 156 CTLFLAG_RD, &red_avg_pkt_size, 0, "RED Medium packet size"); 157 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size, 158 CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size"); 159 #endif 160 161 static int config_pipe(struct dn_pipe *p); 162 static int ip_dn_ctl(struct sockopt *sopt); 163 164 static void rt_unref(struct rtentry *); 165 static void dummynet(void *); 166 static void dummynet_flush(void); 167 void dummynet_drain(void); 168 static ip_dn_io_t dummynet_io; 169 static void dn_rule_delete(void *); 170 171 int if_tx_rdy(struct ifnet *ifp); 172 173 static void 174 rt_unref(struct rtentry *rt) 175 { 176 if (rt == NULL) 177 return ; 178 if (rt->rt_refcnt <= 0) 179 printf("-- warning, refcnt now %ld, decreasing\n", rt->rt_refcnt); 180 RTFREE(rt); 181 } 182 183 /* 184 * Heap management functions. 185 * 186 * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2. 187 * Some macros help finding parent/children so we can optimize them. 188 * 189 * heap_init() is called to expand the heap when needed. 190 * Increment size in blocks of 16 entries. 191 * XXX failure to allocate a new element is a pretty bad failure 192 * as we basically stall a whole queue forever!! 193 * Returns 1 on error, 0 on success 194 */ 195 #define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 ) 196 #define HEAP_LEFT(x) ( 2*(x) + 1 ) 197 #define HEAP_IS_LEFT(x) ( (x) & 1 ) 198 #define HEAP_RIGHT(x) ( 2*(x) + 2 ) 199 #define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; } 200 #define HEAP_INCREMENT 15 201 202 static int 203 heap_init(struct dn_heap *h, int new_size) 204 { 205 struct dn_heap_entry *p; 206 207 if (h->size >= new_size ) { 208 printf("heap_init, Bogus call, have %d want %d\n", 209 h->size, new_size); 210 return 0 ; 211 } 212 new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ; 213 p = malloc(new_size * sizeof(*p), M_DUMMYNET, M_NOWAIT); 214 if (p == NULL) { 215 printf(" heap_init, resize %d failed\n", new_size ); 216 return 1 ; /* error */ 217 } 218 if (h->size > 0) { 219 bcopy(h->p, p, h->size * sizeof(*p) ); 220 free(h->p, M_DUMMYNET); 221 } 222 h->p = p ; 223 h->size = new_size ; 224 return 0 ; 225 } 226 227 /* 228 * Insert element in heap. Normally, p != NULL, we insert p in 229 * a new position and bubble up. If p == NULL, then the element is 230 * already in place, and key is the position where to start the 231 * bubble-up. 232 * Returns 1 on failure (cannot allocate new heap entry) 233 * 234 * If offset > 0 the position (index, int) of the element in the heap is 235 * also stored in the element itself at the given offset in bytes. 236 */ 237 #define SET_OFFSET(heap, node) \ 238 if (heap->offset > 0) \ 239 *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ; 240 /* 241 * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value. 242 */ 243 #define RESET_OFFSET(heap, node) \ 244 if (heap->offset > 0) \ 245 *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ; 246 static int 247 heap_insert(struct dn_heap *h, dn_key key1, void *p) 248 { 249 int son = h->elements ; 250 251 if (p == NULL) /* data already there, set starting point */ 252 son = key1 ; 253 else { /* insert new element at the end, possibly resize */ 254 son = h->elements ; 255 if (son == h->size) /* need resize... */ 256 if (heap_init(h, h->elements+1) ) 257 return 1 ; /* failure... */ 258 h->p[son].object = p ; 259 h->p[son].key = key1 ; 260 h->elements++ ; 261 } 262 while (son > 0) { /* bubble up */ 263 int father = HEAP_FATHER(son) ; 264 struct dn_heap_entry tmp ; 265 266 if (DN_KEY_LT( h->p[father].key, h->p[son].key ) ) 267 break ; /* found right position */ 268 /* son smaller than father, swap and repeat */ 269 HEAP_SWAP(h->p[son], h->p[father], tmp) ; 270 SET_OFFSET(h, son); 271 son = father ; 272 } 273 SET_OFFSET(h, son); 274 return 0 ; 275 } 276 277 /* 278 * remove top element from heap, or obj if obj != NULL 279 */ 280 static void 281 heap_extract(struct dn_heap *h, void *obj) 282 { 283 int child, father, max = h->elements - 1 ; 284 285 if (max < 0) { 286 printf("warning, extract from empty heap 0x%p\n", h); 287 return ; 288 } 289 father = 0 ; /* default: move up smallest child */ 290 if (obj != NULL) { /* extract specific element, index is at offset */ 291 if (h->offset <= 0) 292 panic("*** heap_extract from middle not supported on this heap!!!\n"); 293 father = *((int *)((char *)obj + h->offset)) ; 294 if (father < 0 || father >= h->elements) { 295 printf("dummynet: heap_extract, father %d out of bound 0..%d\n", 296 father, h->elements); 297 panic("heap_extract"); 298 } 299 } 300 RESET_OFFSET(h, father); 301 child = HEAP_LEFT(father) ; /* left child */ 302 while (child <= max) { /* valid entry */ 303 if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) ) 304 child = child+1 ; /* take right child, otherwise left */ 305 h->p[father] = h->p[child] ; 306 SET_OFFSET(h, father); 307 father = child ; 308 child = HEAP_LEFT(child) ; /* left child for next loop */ 309 } 310 h->elements-- ; 311 if (father != max) { 312 /* 313 * Fill hole with last entry and bubble up, reusing the insert code 314 */ 315 h->p[father] = h->p[max] ; 316 heap_insert(h, father, NULL); /* this one cannot fail */ 317 } 318 } 319 320 #if 0 321 /* 322 * change object position and update references 323 * XXX this one is never used! 324 */ 325 static void 326 heap_move(struct dn_heap *h, dn_key new_key, void *object) 327 { 328 int temp; 329 int i ; 330 int max = h->elements-1 ; 331 struct dn_heap_entry buf ; 332 333 if (h->offset <= 0) 334 panic("cannot move items on this heap"); 335 336 i = *((int *)((char *)object + h->offset)); 337 if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */ 338 h->p[i].key = new_key ; 339 for (; i>0 && DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key) ; 340 i = temp ) { /* bubble up */ 341 HEAP_SWAP(h->p[i], h->p[temp], buf) ; 342 SET_OFFSET(h, i); 343 } 344 } else { /* must move down */ 345 h->p[i].key = new_key ; 346 while ( (temp = HEAP_LEFT(i)) <= max ) { /* found left child */ 347 if ((temp != max) && DN_KEY_GT(h->p[temp].key, h->p[temp+1].key)) 348 temp++ ; /* select child with min key */ 349 if (DN_KEY_GT(new_key, h->p[temp].key)) { /* go down */ 350 HEAP_SWAP(h->p[i], h->p[temp], buf) ; 351 SET_OFFSET(h, i); 352 } else 353 break ; 354 i = temp ; 355 } 356 } 357 SET_OFFSET(h, i); 358 } 359 #endif /* heap_move, unused */ 360 361 /* 362 * heapify() will reorganize data inside an array to maintain the 363 * heap property. It is needed when we delete a bunch of entries. 364 */ 365 static void 366 heapify(struct dn_heap *h) 367 { 368 int i ; 369 370 for (i = 0 ; i < h->elements ; i++ ) 371 heap_insert(h, i , NULL) ; 372 } 373 374 /* 375 * cleanup the heap and free data structure 376 */ 377 static void 378 heap_free(struct dn_heap *h) 379 { 380 if (h->size >0 ) 381 free(h->p, M_DUMMYNET); 382 bzero(h, sizeof(*h) ); 383 } 384 385 /* 386 * --- end of heap management functions --- 387 */ 388 389 /* 390 * Scheduler functions: 391 * 392 * transmit_event() is called when the delay-line needs to enter 393 * the scheduler, either because of existing pkts getting ready, 394 * or new packets entering the queue. The event handled is the delivery 395 * time of the packet. 396 * 397 * ready_event() does something similar with fixed-rate queues, and the 398 * event handled is the finish time of the head pkt. 399 * 400 * wfq_ready_event() does something similar with WF2Q queues, and the 401 * event handled is the start time of the head pkt. 402 * 403 * In all cases, we make sure that the data structures are consistent 404 * before passing pkts out, because this might trigger recursive 405 * invocations of the procedures. 406 */ 407 static void 408 transmit_event(struct dn_pipe *pipe) 409 { 410 struct dn_pkt *pkt ; 411 412 while ( (pkt = pipe->head) && DN_KEY_LEQ(pkt->output_time, curr_time) ) { 413 /* 414 * first unlink, then call procedures, since ip_input() can invoke 415 * ip_output() and viceversa, thus causing nested calls 416 */ 417 pipe->head = DN_NEXT(pkt) ; 418 419 /* 420 * The actual mbuf is preceded by a struct dn_pkt, resembling an mbuf 421 * (NOT A REAL one, just a small block of malloc'ed memory) with 422 * m_type = MT_TAG, m_flags = PACKET_TAG_DUMMYNET 423 * dn_m (m_next) = actual mbuf to be processed by ip_input/output 424 * and some other fields. 425 * The block IS FREED HERE because it contains parameters passed 426 * to the called routine. 427 */ 428 switch (pkt->dn_dir) { 429 case DN_TO_IP_OUT: 430 (void)ip_output((struct mbuf *)pkt, NULL, NULL, 0, NULL, NULL); 431 rt_unref (pkt->ro.ro_rt) ; 432 break ; 433 434 case DN_TO_IP_IN : 435 ip_input((struct mbuf *)pkt) ; 436 break ; 437 438 case DN_TO_BDG_FWD : 439 if (!BDG_LOADED) { 440 /* somebody unloaded the bridge module. Drop pkt */ 441 printf("-- dropping bridged packet trapped in pipe--\n"); 442 m_freem(pkt->dn_m); 443 break; 444 } /* fallthrough */ 445 case DN_TO_ETH_DEMUX: 446 { 447 struct mbuf *m = (struct mbuf *)pkt ; 448 struct ether_header *eh; 449 450 if (pkt->dn_m->m_len < ETHER_HDR_LEN && 451 (pkt->dn_m = m_pullup(pkt->dn_m, ETHER_HDR_LEN)) == NULL) { 452 printf("dummynet/bridge: pullup fail, dropping pkt\n"); 453 break; 454 } 455 /* 456 * same as ether_input, make eh be a pointer into the mbuf 457 */ 458 eh = mtod(pkt->dn_m, struct ether_header *); 459 m_adj(pkt->dn_m, ETHER_HDR_LEN); 460 /* 461 * bdg_forward() wants a pointer to the pseudo-mbuf-header, but 462 * on return it will supply the pointer to the actual packet 463 * (originally pkt->dn_m, but could be something else now) if 464 * it has not consumed it. 465 */ 466 if (pkt->dn_dir == DN_TO_BDG_FWD) { 467 m = bdg_forward_ptr(m, eh, pkt->ifp); 468 if (m) 469 m_freem(m); 470 } else 471 ether_demux(NULL, eh, m); /* which consumes the mbuf */ 472 } 473 break ; 474 case DN_TO_ETH_OUT: 475 ether_output_frame(pkt->ifp, (struct mbuf *)pkt); 476 break; 477 478 default: 479 printf("dummynet: bad switch %d!\n", pkt->dn_dir); 480 m_freem(pkt->dn_m); 481 break ; 482 } 483 free(pkt, M_DUMMYNET); 484 } 485 /* if there are leftover packets, put into the heap for next event */ 486 if ( (pkt = pipe->head) ) 487 heap_insert(&extract_heap, pkt->output_time, pipe ) ; 488 /* XXX should check errors on heap_insert, by draining the 489 * whole pipe p and hoping in the future we are more successful 490 */ 491 } 492 493 /* 494 * the following macro computes how many ticks we have to wait 495 * before being able to transmit a packet. The credit is taken from 496 * either a pipe (WF2Q) or a flow_queue (per-flow queueing) 497 */ 498 #define SET_TICKS(pkt, q, p) \ 499 (pkt->dn_m->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \ 500 p->bandwidth ; 501 502 /* 503 * extract pkt from queue, compute output time (could be now) 504 * and put into delay line (p_queue) 505 */ 506 static void 507 move_pkt(struct dn_pkt *pkt, struct dn_flow_queue *q, 508 struct dn_pipe *p, int len) 509 { 510 q->head = DN_NEXT(pkt) ; 511 q->len-- ; 512 q->len_bytes -= len ; 513 514 pkt->output_time = curr_time + p->delay ; 515 516 if (p->head == NULL) 517 p->head = pkt; 518 else 519 DN_NEXT(p->tail) = pkt; 520 p->tail = pkt; 521 DN_NEXT(p->tail) = NULL; 522 } 523 524 /* 525 * ready_event() is invoked every time the queue must enter the 526 * scheduler, either because the first packet arrives, or because 527 * a previously scheduled event fired. 528 * On invokation, drain as many pkts as possible (could be 0) and then 529 * if there are leftover packets reinsert the pkt in the scheduler. 530 */ 531 static void 532 ready_event(struct dn_flow_queue *q) 533 { 534 struct dn_pkt *pkt; 535 struct dn_pipe *p = q->fs->pipe ; 536 int p_was_empty ; 537 538 if (p == NULL) { 539 printf("ready_event- pipe is gone\n"); 540 return ; 541 } 542 p_was_empty = (p->head == NULL) ; 543 544 /* 545 * schedule fixed-rate queues linked to this pipe: 546 * Account for the bw accumulated since last scheduling, then 547 * drain as many pkts as allowed by q->numbytes and move to 548 * the delay line (in p) computing output time. 549 * bandwidth==0 (no limit) means we can drain the whole queue, 550 * setting len_scaled = 0 does the job. 551 */ 552 q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth; 553 while ( (pkt = q->head) != NULL ) { 554 int len = pkt->dn_m->m_pkthdr.len; 555 int len_scaled = p->bandwidth ? len*8*hz : 0 ; 556 if (len_scaled > q->numbytes ) 557 break ; 558 q->numbytes -= len_scaled ; 559 move_pkt(pkt, q, p, len); 560 } 561 /* 562 * If we have more packets queued, schedule next ready event 563 * (can only occur when bandwidth != 0, otherwise we would have 564 * flushed the whole queue in the previous loop). 565 * To this purpose we record the current time and compute how many 566 * ticks to go for the finish time of the packet. 567 */ 568 if ( (pkt = q->head) != NULL ) { /* this implies bandwidth != 0 */ 569 dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */ 570 q->sched_time = curr_time ; 571 heap_insert(&ready_heap, curr_time + t, (void *)q ); 572 /* XXX should check errors on heap_insert, and drain the whole 573 * queue on error hoping next time we are luckier. 574 */ 575 } else { /* RED needs to know when the queue becomes empty */ 576 q->q_time = curr_time; 577 q->numbytes = 0; 578 } 579 /* 580 * If the delay line was empty call transmit_event(p) now. 581 * Otherwise, the scheduler will take care of it. 582 */ 583 if (p_was_empty) 584 transmit_event(p); 585 } 586 587 /* 588 * Called when we can transmit packets on WF2Q queues. Take pkts out of 589 * the queues at their start time, and enqueue into the delay line. 590 * Packets are drained until p->numbytes < 0. As long as 591 * len_scaled >= p->numbytes, the packet goes into the delay line 592 * with a deadline p->delay. For the last packet, if p->numbytes<0, 593 * there is an additional delay. 594 */ 595 static void 596 ready_event_wfq(struct dn_pipe *p) 597 { 598 int p_was_empty = (p->head == NULL) ; 599 struct dn_heap *sch = &(p->scheduler_heap); 600 struct dn_heap *neh = &(p->not_eligible_heap) ; 601 602 if (p->if_name[0] == 0) /* tx clock is simulated */ 603 p->numbytes += ( curr_time - p->sched_time ) * p->bandwidth; 604 else { /* tx clock is for real, the ifq must be empty or this is a NOP */ 605 if (p->ifp && p->ifp->if_snd.ifq_head != NULL) 606 return ; 607 else { 608 DEB(printf("pipe %d ready from %s --\n", 609 p->pipe_nr, p->if_name);) 610 } 611 } 612 613 /* 614 * While we have backlogged traffic AND credit, we need to do 615 * something on the queue. 616 */ 617 while ( p->numbytes >=0 && (sch->elements>0 || neh->elements >0) ) { 618 if (sch->elements > 0) { /* have some eligible pkts to send out */ 619 struct dn_flow_queue *q = sch->p[0].object ; 620 struct dn_pkt *pkt = q->head; 621 struct dn_flow_set *fs = q->fs; 622 u_int64_t len = pkt->dn_m->m_pkthdr.len; 623 int len_scaled = p->bandwidth ? len*8*hz : 0 ; 624 625 heap_extract(sch, NULL); /* remove queue from heap */ 626 p->numbytes -= len_scaled ; 627 move_pkt(pkt, q, p, len); 628 629 p->V += (len<<MY_M) / p->sum ; /* update V */ 630 q->S = q->F ; /* update start time */ 631 if (q->len == 0) { /* Flow not backlogged any more */ 632 fs->backlogged-- ; 633 heap_insert(&(p->idle_heap), q->F, q); 634 } else { /* still backlogged */ 635 /* 636 * update F and position in backlogged queue, then 637 * put flow in not_eligible_heap (we will fix this later). 638 */ 639 len = (q->head)->dn_m->m_pkthdr.len; 640 q->F += (len<<MY_M)/(u_int64_t) fs->weight ; 641 if (DN_KEY_LEQ(q->S, p->V)) 642 heap_insert(neh, q->S, q); 643 else 644 heap_insert(sch, q->F, q); 645 } 646 } 647 /* 648 * now compute V = max(V, min(S_i)). Remember that all elements in sch 649 * have by definition S_i <= V so if sch is not empty, V is surely 650 * the max and we must not update it. Conversely, if sch is empty 651 * we only need to look at neh. 652 */ 653 if (sch->elements == 0 && neh->elements > 0) 654 p->V = MAX64 ( p->V, neh->p[0].key ); 655 /* move from neh to sch any packets that have become eligible */ 656 while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V) ) { 657 struct dn_flow_queue *q = neh->p[0].object ; 658 heap_extract(neh, NULL); 659 heap_insert(sch, q->F, q); 660 } 661 662 if (p->if_name[0] != '\0') {/* tx clock is from a real thing */ 663 p->numbytes = -1 ; /* mark not ready for I/O */ 664 break ; 665 } 666 } 667 if (sch->elements == 0 && neh->elements == 0 && p->numbytes >= 0 668 && p->idle_heap.elements > 0) { 669 /* 670 * no traffic and no events scheduled. We can get rid of idle-heap. 671 */ 672 int i ; 673 674 for (i = 0 ; i < p->idle_heap.elements ; i++) { 675 struct dn_flow_queue *q = p->idle_heap.p[i].object ; 676 677 q->F = 0 ; 678 q->S = q->F + 1 ; 679 } 680 p->sum = 0 ; 681 p->V = 0 ; 682 p->idle_heap.elements = 0 ; 683 } 684 /* 685 * If we are getting clocks from dummynet (not a real interface) and 686 * If we are under credit, schedule the next ready event. 687 * Also fix the delivery time of the last packet. 688 */ 689 if (p->if_name[0]==0 && p->numbytes < 0) { /* this implies bandwidth >0 */ 690 dn_key t=0 ; /* number of ticks i have to wait */ 691 692 if (p->bandwidth > 0) 693 t = ( p->bandwidth -1 - p->numbytes) / p->bandwidth ; 694 p->tail->output_time += t ; 695 p->sched_time = curr_time ; 696 heap_insert(&wfq_ready_heap, curr_time + t, (void *)p); 697 /* XXX should check errors on heap_insert, and drain the whole 698 * queue on error hoping next time we are luckier. 699 */ 700 } 701 /* 702 * If the delay line was empty call transmit_event(p) now. 703 * Otherwise, the scheduler will take care of it. 704 */ 705 if (p_was_empty) 706 transmit_event(p); 707 } 708 709 /* 710 * This is called once per tick, or HZ times per second. It is used to 711 * increment the current tick counter and schedule expired events. 712 */ 713 static void 714 dummynet(void * __unused unused) 715 { 716 void *p ; /* generic parameter to handler */ 717 struct dn_heap *h ; 718 int s ; 719 struct dn_heap *heaps[3]; 720 int i; 721 struct dn_pipe *pe ; 722 723 heaps[0] = &ready_heap ; /* fixed-rate queues */ 724 heaps[1] = &wfq_ready_heap ; /* wfq queues */ 725 heaps[2] = &extract_heap ; /* delay line */ 726 s = splimp(); /* see note on top, splnet() is not enough */ 727 curr_time++ ; 728 for (i=0; i < 3 ; i++) { 729 h = heaps[i]; 730 while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) { 731 DDB(if (h->p[0].key > curr_time) 732 printf("-- dummynet: warning, heap %d is %d ticks late\n", 733 i, (int)(curr_time - h->p[0].key));) 734 p = h->p[0].object ; /* store a copy before heap_extract */ 735 heap_extract(h, NULL); /* need to extract before processing */ 736 if (i == 0) 737 ready_event(p) ; 738 else if (i == 1) { 739 struct dn_pipe *pipe = p; 740 if (pipe->if_name[0] != '\0') 741 printf("*** bad ready_event_wfq for pipe %s\n", 742 pipe->if_name); 743 else 744 ready_event_wfq(p) ; 745 } else 746 transmit_event(p); 747 } 748 } 749 /* sweep pipes trying to expire idle flow_queues */ 750 for (pe = all_pipes; pe ; pe = pe->next ) 751 if (pe->idle_heap.elements > 0 && 752 DN_KEY_LT(pe->idle_heap.p[0].key, pe->V) ) { 753 struct dn_flow_queue *q = pe->idle_heap.p[0].object ; 754 755 heap_extract(&(pe->idle_heap), NULL); 756 q->S = q->F + 1 ; /* mark timestamp as invalid */ 757 pe->sum -= q->fs->weight ; 758 } 759 splx(s); 760 dn_timeout = timeout(dummynet, NULL, 1); 761 } 762 763 /* 764 * called by an interface when tx_rdy occurs. 765 */ 766 int 767 if_tx_rdy(struct ifnet *ifp) 768 { 769 struct dn_pipe *p; 770 771 for (p = all_pipes; p ; p = p->next ) 772 if (p->ifp == ifp) 773 break ; 774 if (p == NULL) { 775 char buf[32]; 776 sprintf(buf, "%s%d",ifp->if_name, ifp->if_unit); 777 for (p = all_pipes; p ; p = p->next ) 778 if (!strcmp(p->if_name, buf) ) { 779 p->ifp = ifp ; 780 DEB(printf("++ tx rdy from %s (now found)\n", buf);) 781 break ; 782 } 783 } 784 if (p != NULL) { 785 DEB(printf("++ tx rdy from %s%d - qlen %d\n", ifp->if_name, 786 ifp->if_unit, ifp->if_snd.ifq_len);) 787 p->numbytes = 0 ; /* mark ready for I/O */ 788 ready_event_wfq(p); 789 } 790 return 0; 791 } 792 793 /* 794 * Unconditionally expire empty queues in case of shortage. 795 * Returns the number of queues freed. 796 */ 797 static int 798 expire_queues(struct dn_flow_set *fs) 799 { 800 struct dn_flow_queue *q, *prev ; 801 int i, initial_elements = fs->rq_elements ; 802 803 if (fs->last_expired == time_second) 804 return 0 ; 805 fs->last_expired = time_second ; 806 for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */ 807 for (prev=NULL, q = fs->rq[i] ; q != NULL ; ) 808 if (q->head != NULL || q->S != q->F+1) { 809 prev = q ; 810 q = q->next ; 811 } else { /* entry is idle, expire it */ 812 struct dn_flow_queue *old_q = q ; 813 814 if (prev != NULL) 815 prev->next = q = q->next ; 816 else 817 fs->rq[i] = q = q->next ; 818 fs->rq_elements-- ; 819 free(old_q, M_DUMMYNET); 820 } 821 return initial_elements - fs->rq_elements ; 822 } 823 824 /* 825 * If room, create a new queue and put at head of slot i; 826 * otherwise, create or use the default queue. 827 */ 828 static struct dn_flow_queue * 829 create_queue(struct dn_flow_set *fs, int i) 830 { 831 struct dn_flow_queue *q ; 832 833 if (fs->rq_elements > fs->rq_size * dn_max_ratio && 834 expire_queues(fs) == 0) { 835 /* 836 * No way to get room, use or create overflow queue. 837 */ 838 i = fs->rq_size ; 839 if ( fs->rq[i] != NULL ) 840 return fs->rq[i] ; 841 } 842 q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO); 843 if (q == NULL) { 844 printf("sorry, cannot allocate queue for new flow\n"); 845 return NULL ; 846 } 847 q->fs = fs ; 848 q->hash_slot = i ; 849 q->next = fs->rq[i] ; 850 q->S = q->F + 1; /* hack - mark timestamp as invalid */ 851 fs->rq[i] = q ; 852 fs->rq_elements++ ; 853 return q ; 854 } 855 856 /* 857 * Given a flow_set and a pkt in last_pkt, find a matching queue 858 * after appropriate masking. The queue is moved to front 859 * so that further searches take less time. 860 */ 861 static struct dn_flow_queue * 862 find_queue(struct dn_flow_set *fs, struct ipfw_flow_id *id) 863 { 864 int i = 0 ; /* we need i and q for new allocations */ 865 struct dn_flow_queue *q, *prev; 866 867 if ( !(fs->flags_fs & DN_HAVE_FLOW_MASK) ) 868 q = fs->rq[0] ; 869 else { 870 /* first, do the masking */ 871 id->dst_ip &= fs->flow_mask.dst_ip ; 872 id->src_ip &= fs->flow_mask.src_ip ; 873 id->dst_port &= fs->flow_mask.dst_port ; 874 id->src_port &= fs->flow_mask.src_port ; 875 id->proto &= fs->flow_mask.proto ; 876 id->flags = 0 ; /* we don't care about this one */ 877 /* then, hash function */ 878 i = ( (id->dst_ip) & 0xffff ) ^ 879 ( (id->dst_ip >> 15) & 0xffff ) ^ 880 ( (id->src_ip << 1) & 0xffff ) ^ 881 ( (id->src_ip >> 16 ) & 0xffff ) ^ 882 (id->dst_port << 1) ^ (id->src_port) ^ 883 (id->proto ); 884 i = i % fs->rq_size ; 885 /* finally, scan the current list for a match */ 886 searches++ ; 887 for (prev=NULL, q = fs->rq[i] ; q ; ) { 888 search_steps++; 889 if (id->dst_ip == q->id.dst_ip && 890 id->src_ip == q->id.src_ip && 891 id->dst_port == q->id.dst_port && 892 id->src_port == q->id.src_port && 893 id->proto == q->id.proto && 894 id->flags == q->id.flags) 895 break ; /* found */ 896 else if (pipe_expire && q->head == NULL && q->S == q->F+1 ) { 897 /* entry is idle and not in any heap, expire it */ 898 struct dn_flow_queue *old_q = q ; 899 900 if (prev != NULL) 901 prev->next = q = q->next ; 902 else 903 fs->rq[i] = q = q->next ; 904 fs->rq_elements-- ; 905 free(old_q, M_DUMMYNET); 906 continue ; 907 } 908 prev = q ; 909 q = q->next ; 910 } 911 if (q && prev != NULL) { /* found and not in front */ 912 prev->next = q->next ; 913 q->next = fs->rq[i] ; 914 fs->rq[i] = q ; 915 } 916 } 917 if (q == NULL) { /* no match, need to allocate a new entry */ 918 q = create_queue(fs, i); 919 if (q != NULL) 920 q->id = *id ; 921 } 922 return q ; 923 } 924 925 static int 926 red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len) 927 { 928 /* 929 * RED algorithm 930 * 931 * RED calculates the average queue size (avg) using a low-pass filter 932 * with an exponential weighted (w_q) moving average: 933 * avg <- (1-w_q) * avg + w_q * q_size 934 * where q_size is the queue length (measured in bytes or * packets). 935 * 936 * If q_size == 0, we compute the idle time for the link, and set 937 * avg = (1 - w_q)^(idle/s) 938 * where s is the time needed for transmitting a medium-sized packet. 939 * 940 * Now, if avg < min_th the packet is enqueued. 941 * If avg > max_th the packet is dropped. Otherwise, the packet is 942 * dropped with probability P function of avg. 943 * 944 */ 945 946 int64_t p_b = 0; 947 /* queue in bytes or packets ? */ 948 u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ? q->len_bytes : q->len; 949 950 DEB(printf("\n%d q: %2u ", (int) curr_time, q_size);) 951 952 /* average queue size estimation */ 953 if (q_size != 0) { 954 /* 955 * queue is not empty, avg <- avg + (q_size - avg) * w_q 956 */ 957 int diff = SCALE(q_size) - q->avg; 958 int64_t v = SCALE_MUL((int64_t) diff, (int64_t) fs->w_q); 959 960 q->avg += (int) v; 961 } else { 962 /* 963 * queue is empty, find for how long the queue has been 964 * empty and use a lookup table for computing 965 * (1 - * w_q)^(idle_time/s) where s is the time to send a 966 * (small) packet. 967 * XXX check wraps... 968 */ 969 if (q->avg) { 970 u_int t = (curr_time - q->q_time) / fs->lookup_step; 971 972 q->avg = (t < fs->lookup_depth) ? 973 SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0; 974 } 975 } 976 DEB(printf("avg: %u ", SCALE_VAL(q->avg));) 977 978 /* should i drop ? */ 979 980 if (q->avg < fs->min_th) { 981 q->count = -1; 982 return 0; /* accept packet ; */ 983 } 984 if (q->avg >= fs->max_th) { /* average queue >= max threshold */ 985 if (fs->flags_fs & DN_IS_GENTLE_RED) { 986 /* 987 * According to Gentle-RED, if avg is greater than max_th the 988 * packet is dropped with a probability 989 * p_b = c_3 * avg - c_4 990 * where c_3 = (1 - max_p) / max_th, and c_4 = 1 - 2 * max_p 991 */ 992 p_b = SCALE_MUL((int64_t) fs->c_3, (int64_t) q->avg) - fs->c_4; 993 } else { 994 q->count = -1; 995 printf("- drop"); 996 return 1 ; 997 } 998 } else if (q->avg > fs->min_th) { 999 /* 1000 * we compute p_b using the linear dropping function p_b = c_1 * 1001 * avg - c_2, where c_1 = max_p / (max_th - min_th), and c_2 = 1002 * max_p * min_th / (max_th - min_th) 1003 */ 1004 p_b = SCALE_MUL((int64_t) fs->c_1, (int64_t) q->avg) - fs->c_2; 1005 } 1006 if (fs->flags_fs & DN_QSIZE_IS_BYTES) 1007 p_b = (p_b * len) / fs->max_pkt_size; 1008 if (++q->count == 0) 1009 q->random = random() & 0xffff; 1010 else { 1011 /* 1012 * q->count counts packets arrived since last drop, so a greater 1013 * value of q->count means a greater packet drop probability. 1014 */ 1015 if (SCALE_MUL(p_b, SCALE((int64_t) q->count)) > q->random) { 1016 q->count = 0; 1017 DEB(printf("- red drop");) 1018 /* after a drop we calculate a new random value */ 1019 q->random = random() & 0xffff; 1020 return 1; /* drop */ 1021 } 1022 } 1023 /* end of RED algorithm */ 1024 return 0 ; /* accept */ 1025 } 1026 1027 static __inline 1028 struct dn_flow_set * 1029 locate_flowset(int pipe_nr, struct ip_fw *rule) 1030 { 1031 #if IPFW2 1032 struct dn_flow_set *fs; 1033 ipfw_insn *cmd = rule->cmd + rule->act_ofs; 1034 1035 if (cmd->opcode == O_LOG) 1036 cmd += F_LEN(cmd); 1037 fs = ((ipfw_insn_pipe *)cmd)->pipe_ptr; 1038 1039 if (fs != NULL) 1040 return fs; 1041 1042 if (cmd->opcode == O_QUEUE) 1043 #else /* !IPFW2 */ 1044 struct dn_flow_set *fs = NULL ; 1045 1046 if ( (rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_QUEUE ) 1047 #endif /* !IPFW2 */ 1048 for (fs=all_flow_sets; fs && fs->fs_nr != pipe_nr; fs=fs->next) 1049 ; 1050 else { 1051 struct dn_pipe *p1; 1052 for (p1 = all_pipes; p1 && p1->pipe_nr != pipe_nr; p1 = p1->next) 1053 ; 1054 if (p1 != NULL) 1055 fs = &(p1->fs) ; 1056 } 1057 /* record for the future */ 1058 #if IPFW2 1059 ((ipfw_insn_pipe *)cmd)->pipe_ptr = fs; 1060 #else 1061 if (fs != NULL) 1062 rule->pipe_ptr = fs; 1063 #endif 1064 return fs ; 1065 } 1066 1067 /* 1068 * dummynet hook for packets. Below 'pipe' is a pipe or a queue 1069 * depending on whether WF2Q or fixed bw is used. 1070 * 1071 * pipe_nr pipe or queue the packet is destined for. 1072 * dir where shall we send the packet after dummynet. 1073 * m the mbuf with the packet 1074 * ifp the 'ifp' parameter from the caller. 1075 * NULL in ip_input, destination interface in ip_output, 1076 * real_dst in bdg_forward 1077 * ro route parameter (only used in ip_output, NULL otherwise) 1078 * dst destination address, only used by ip_output 1079 * rule matching rule, in case of multiple passes 1080 * flags flags from the caller, only used in ip_output 1081 * 1082 */ 1083 static int 1084 dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) 1085 { 1086 struct dn_pkt *pkt; 1087 struct dn_flow_set *fs; 1088 struct dn_pipe *pipe ; 1089 u_int64_t len = m->m_pkthdr.len ; 1090 struct dn_flow_queue *q = NULL ; 1091 int s = splimp(); 1092 int is_pipe; 1093 #if IPFW2 1094 ipfw_insn *cmd = fwa->rule->cmd + fwa->rule->act_ofs; 1095 1096 if (cmd->opcode == O_LOG) 1097 cmd += F_LEN(cmd); 1098 is_pipe = (cmd->opcode == O_PIPE); 1099 #else 1100 is_pipe = (fwa->rule->fw_flg & IP_FW_F_COMMAND) == IP_FW_F_PIPE; 1101 #endif 1102 1103 pipe_nr &= 0xffff ; 1104 1105 /* 1106 * this is a dummynet rule, so we expect a O_PIPE or O_QUEUE rule 1107 */ 1108 fs = locate_flowset(pipe_nr, fwa->rule); 1109 if (fs == NULL) 1110 goto dropit ; /* this queue/pipe does not exist! */ 1111 pipe = fs->pipe ; 1112 if (pipe == NULL) { /* must be a queue, try find a matching pipe */ 1113 for (pipe = all_pipes; pipe && pipe->pipe_nr != fs->parent_nr; 1114 pipe = pipe->next) 1115 ; 1116 if (pipe != NULL) 1117 fs->pipe = pipe ; 1118 else { 1119 printf("No pipe %d for queue %d, drop pkt\n", 1120 fs->parent_nr, fs->fs_nr); 1121 goto dropit ; 1122 } 1123 } 1124 q = find_queue(fs, &(fwa->f_id)); 1125 if ( q == NULL ) 1126 goto dropit ; /* cannot allocate queue */ 1127 /* 1128 * update statistics, then check reasons to drop pkt 1129 */ 1130 q->tot_bytes += len ; 1131 q->tot_pkts++ ; 1132 if ( fs->plr && random() < fs->plr ) 1133 goto dropit ; /* random pkt drop */ 1134 if ( fs->flags_fs & DN_QSIZE_IS_BYTES) { 1135 if (q->len_bytes > fs->qsize) 1136 goto dropit ; /* queue size overflow */ 1137 } else { 1138 if (q->len >= fs->qsize) 1139 goto dropit ; /* queue count overflow */ 1140 } 1141 if ( fs->flags_fs & DN_IS_RED && red_drops(fs, q, len) ) 1142 goto dropit ; 1143 1144 /* XXX expensive to zero, see if we can remove it*/ 1145 pkt = (struct dn_pkt *)malloc(sizeof (*pkt), M_DUMMYNET, M_NOWAIT|M_ZERO); 1146 if ( pkt == NULL ) 1147 goto dropit ; /* cannot allocate packet header */ 1148 /* ok, i can handle the pkt now... */ 1149 /* build and enqueue packet + parameters */ 1150 pkt->hdr.mh_type = MT_TAG; 1151 pkt->hdr.mh_flags = PACKET_TAG_DUMMYNET; 1152 pkt->rule = fwa->rule ; 1153 DN_NEXT(pkt) = NULL; 1154 pkt->dn_m = m; 1155 pkt->dn_dir = dir ; 1156 1157 pkt->ifp = fwa->oif; 1158 if (dir == DN_TO_IP_OUT) { 1159 /* 1160 * We need to copy *ro because for ICMP pkts (and maybe others) 1161 * the caller passed a pointer into the stack; dst might also be 1162 * a pointer into *ro so it needs to be updated. 1163 */ 1164 pkt->ro = *(fwa->ro); 1165 if (fwa->ro->ro_rt) 1166 fwa->ro->ro_rt->rt_refcnt++ ; 1167 if (fwa->dst == (struct sockaddr_in *)&fwa->ro->ro_dst) /* dst points into ro */ 1168 fwa->dst = (struct sockaddr_in *)&(pkt->ro.ro_dst) ; 1169 1170 pkt->dn_dst = fwa->dst; 1171 pkt->flags = fwa->flags; 1172 } 1173 if (q->head == NULL) 1174 q->head = pkt; 1175 else 1176 DN_NEXT(q->tail) = pkt; 1177 q->tail = pkt; 1178 q->len++; 1179 q->len_bytes += len ; 1180 1181 if ( q->head != pkt ) /* flow was not idle, we are done */ 1182 goto done; 1183 /* 1184 * If we reach this point the flow was previously idle, so we need 1185 * to schedule it. This involves different actions for fixed-rate or 1186 * WF2Q queues. 1187 */ 1188 if (is_pipe) { 1189 /* 1190 * Fixed-rate queue: just insert into the ready_heap. 1191 */ 1192 dn_key t = 0 ; 1193 if (pipe->bandwidth) 1194 t = SET_TICKS(pkt, q, pipe); 1195 q->sched_time = curr_time ; 1196 if (t == 0) /* must process it now */ 1197 ready_event( q ); 1198 else 1199 heap_insert(&ready_heap, curr_time + t , q ); 1200 } else { 1201 /* 1202 * WF2Q. First, compute start time S: if the flow was idle (S=F+1) 1203 * set S to the virtual time V for the controlling pipe, and update 1204 * the sum of weights for the pipe; otherwise, remove flow from 1205 * idle_heap and set S to max(F,V). 1206 * Second, compute finish time F = S + len/weight. 1207 * Third, if pipe was idle, update V=max(S, V). 1208 * Fourth, count one more backlogged flow. 1209 */ 1210 if (DN_KEY_GT(q->S, q->F)) { /* means timestamps are invalid */ 1211 q->S = pipe->V ; 1212 pipe->sum += fs->weight ; /* add weight of new queue */ 1213 } else { 1214 heap_extract(&(pipe->idle_heap), q); 1215 q->S = MAX64(q->F, pipe->V ) ; 1216 } 1217 q->F = q->S + ( len<<MY_M )/(u_int64_t) fs->weight; 1218 1219 if (pipe->not_eligible_heap.elements == 0 && 1220 pipe->scheduler_heap.elements == 0) 1221 pipe->V = MAX64 ( q->S, pipe->V ); 1222 fs->backlogged++ ; 1223 /* 1224 * Look at eligibility. A flow is not eligibile if S>V (when 1225 * this happens, it means that there is some other flow already 1226 * scheduled for the same pipe, so the scheduler_heap cannot be 1227 * empty). If the flow is not eligible we just store it in the 1228 * not_eligible_heap. Otherwise, we store in the scheduler_heap 1229 * and possibly invoke ready_event_wfq() right now if there is 1230 * leftover credit. 1231 * Note that for all flows in scheduler_heap (SCH), S_i <= V, 1232 * and for all flows in not_eligible_heap (NEH), S_i > V . 1233 * So when we need to compute max( V, min(S_i) ) forall i in SCH+NEH, 1234 * we only need to look into NEH. 1235 */ 1236 if (DN_KEY_GT(q->S, pipe->V) ) { /* not eligible */ 1237 if (pipe->scheduler_heap.elements == 0) 1238 printf("++ ouch! not eligible but empty scheduler!\n"); 1239 heap_insert(&(pipe->not_eligible_heap), q->S, q); 1240 } else { 1241 heap_insert(&(pipe->scheduler_heap), q->F, q); 1242 if (pipe->numbytes >= 0) { /* pipe is idle */ 1243 if (pipe->scheduler_heap.elements != 1) 1244 printf("*** OUCH! pipe should have been idle!\n"); 1245 DEB(printf("Waking up pipe %d at %d\n", 1246 pipe->pipe_nr, (int)(q->F >> MY_M)); ) 1247 pipe->sched_time = curr_time ; 1248 ready_event_wfq(pipe); 1249 } 1250 } 1251 } 1252 done: 1253 splx(s); 1254 return 0; 1255 1256 dropit: 1257 splx(s); 1258 if (q) 1259 q->drops++ ; 1260 m_freem(m); 1261 return ( (fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS); 1262 } 1263 1264 /* 1265 * Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT) 1266 * Doing this would probably save us the initial bzero of dn_pkt 1267 */ 1268 #define DN_FREE_PKT(pkt) { \ 1269 struct dn_pkt *n = pkt ; \ 1270 rt_unref ( n->ro.ro_rt ) ; \ 1271 m_freem(n->dn_m); \ 1272 pkt = DN_NEXT(n) ; \ 1273 free(n, M_DUMMYNET) ; } 1274 1275 /* 1276 * Dispose all packets and flow_queues on a flow_set. 1277 * If all=1, also remove red lookup table and other storage, 1278 * including the descriptor itself. 1279 * For the one in dn_pipe MUST also cleanup ready_heap... 1280 */ 1281 static void 1282 purge_flow_set(struct dn_flow_set *fs, int all) 1283 { 1284 struct dn_pkt *pkt ; 1285 struct dn_flow_queue *q, *qn ; 1286 int i ; 1287 1288 for (i = 0 ; i <= fs->rq_size ; i++ ) { 1289 for (q = fs->rq[i] ; q ; q = qn ) { 1290 for (pkt = q->head ; pkt ; ) 1291 DN_FREE_PKT(pkt) ; 1292 qn = q->next ; 1293 free(q, M_DUMMYNET); 1294 } 1295 fs->rq[i] = NULL ; 1296 } 1297 fs->rq_elements = 0 ; 1298 if (all) { 1299 /* RED - free lookup table */ 1300 if (fs->w_q_lookup) 1301 free(fs->w_q_lookup, M_DUMMYNET); 1302 if (fs->rq) 1303 free(fs->rq, M_DUMMYNET); 1304 /* if this fs is not part of a pipe, free it */ 1305 if (fs->pipe && fs != &(fs->pipe->fs) ) 1306 free(fs, M_DUMMYNET); 1307 } 1308 } 1309 1310 /* 1311 * Dispose all packets queued on a pipe (not a flow_set). 1312 * Also free all resources associated to a pipe, which is about 1313 * to be deleted. 1314 */ 1315 static void 1316 purge_pipe(struct dn_pipe *pipe) 1317 { 1318 struct dn_pkt *pkt ; 1319 1320 purge_flow_set( &(pipe->fs), 1 ); 1321 1322 for (pkt = pipe->head ; pkt ; ) 1323 DN_FREE_PKT(pkt) ; 1324 1325 heap_free( &(pipe->scheduler_heap) ); 1326 heap_free( &(pipe->not_eligible_heap) ); 1327 heap_free( &(pipe->idle_heap) ); 1328 } 1329 1330 /* 1331 * Delete all pipes and heaps returning memory. Must also 1332 * remove references from all ipfw rules to all pipes. 1333 */ 1334 static void 1335 dummynet_flush() 1336 { 1337 struct dn_pipe *curr_p, *p ; 1338 struct dn_flow_set *fs, *curr_fs; 1339 int s ; 1340 1341 s = splimp() ; 1342 1343 /* remove all references to pipes ...*/ 1344 flush_pipe_ptrs(NULL); 1345 /* prevent future matches... */ 1346 p = all_pipes ; 1347 all_pipes = NULL ; 1348 fs = all_flow_sets ; 1349 all_flow_sets = NULL ; 1350 /* and free heaps so we don't have unwanted events */ 1351 heap_free(&ready_heap); 1352 heap_free(&wfq_ready_heap); 1353 heap_free(&extract_heap); 1354 splx(s) ; 1355 /* 1356 * Now purge all queued pkts and delete all pipes 1357 */ 1358 /* scan and purge all flow_sets. */ 1359 for ( ; fs ; ) { 1360 curr_fs = fs ; 1361 fs = fs->next ; 1362 purge_flow_set(curr_fs, 1); 1363 } 1364 for ( ; p ; ) { 1365 purge_pipe(p); 1366 curr_p = p ; 1367 p = p->next ; 1368 free(curr_p, M_DUMMYNET); 1369 } 1370 } 1371 1372 1373 extern struct ip_fw *ip_fw_default_rule ; 1374 static void 1375 dn_rule_delete_fs(struct dn_flow_set *fs, void *r) 1376 { 1377 int i ; 1378 struct dn_flow_queue *q ; 1379 struct dn_pkt *pkt ; 1380 1381 for (i = 0 ; i <= fs->rq_size ; i++) /* last one is ovflow */ 1382 for (q = fs->rq[i] ; q ; q = q->next ) 1383 for (pkt = q->head ; pkt ; pkt = DN_NEXT(pkt) ) 1384 if (pkt->rule == r) 1385 pkt->rule = ip_fw_default_rule ; 1386 } 1387 /* 1388 * when a firewall rule is deleted, scan all queues and remove the flow-id 1389 * from packets matching this rule. 1390 */ 1391 void 1392 dn_rule_delete(void *r) 1393 { 1394 struct dn_pipe *p ; 1395 struct dn_pkt *pkt ; 1396 struct dn_flow_set *fs ; 1397 1398 /* 1399 * If the rule references a queue (dn_flow_set), then scan 1400 * the flow set, otherwise scan pipes. Should do either, but doing 1401 * both does not harm. 1402 */ 1403 for ( fs = all_flow_sets ; fs ; fs = fs->next ) 1404 dn_rule_delete_fs(fs, r); 1405 for ( p = all_pipes ; p ; p = p->next ) { 1406 fs = &(p->fs) ; 1407 dn_rule_delete_fs(fs, r); 1408 for (pkt = p->head ; pkt ; pkt = DN_NEXT(pkt) ) 1409 if (pkt->rule == r) 1410 pkt->rule = ip_fw_default_rule ; 1411 } 1412 } 1413 1414 /* 1415 * setup RED parameters 1416 */ 1417 static int 1418 config_red(struct dn_flow_set *p, struct dn_flow_set * x) 1419 { 1420 int i; 1421 1422 x->w_q = p->w_q; 1423 x->min_th = SCALE(p->min_th); 1424 x->max_th = SCALE(p->max_th); 1425 x->max_p = p->max_p; 1426 1427 x->c_1 = p->max_p / (p->max_th - p->min_th); 1428 x->c_2 = SCALE_MUL(x->c_1, SCALE(p->min_th)); 1429 if (x->flags_fs & DN_IS_GENTLE_RED) { 1430 x->c_3 = (SCALE(1) - p->max_p) / p->max_th; 1431 x->c_4 = (SCALE(1) - 2 * p->max_p); 1432 } 1433 1434 /* if the lookup table already exist, free and create it again */ 1435 if (x->w_q_lookup) { 1436 free(x->w_q_lookup, M_DUMMYNET); 1437 x->w_q_lookup = NULL ; 1438 } 1439 if (red_lookup_depth == 0) { 1440 printf("\nnet.inet.ip.dummynet.red_lookup_depth must be > 0"); 1441 free(x, M_DUMMYNET); 1442 return EINVAL; 1443 } 1444 x->lookup_depth = red_lookup_depth; 1445 x->w_q_lookup = (u_int *) malloc(x->lookup_depth * sizeof(int), 1446 M_DUMMYNET, M_NOWAIT); 1447 if (x->w_q_lookup == NULL) { 1448 printf("sorry, cannot allocate red lookup table\n"); 1449 free(x, M_DUMMYNET); 1450 return ENOSPC; 1451 } 1452 1453 /* fill the lookup table with (1 - w_q)^x */ 1454 x->lookup_step = p->lookup_step ; 1455 x->lookup_weight = p->lookup_weight ; 1456 x->w_q_lookup[0] = SCALE(1) - x->w_q; 1457 for (i = 1; i < x->lookup_depth; i++) 1458 x->w_q_lookup[i] = SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight); 1459 if (red_avg_pkt_size < 1) 1460 red_avg_pkt_size = 512 ; 1461 x->avg_pkt_size = red_avg_pkt_size ; 1462 if (red_max_pkt_size < 1) 1463 red_max_pkt_size = 1500 ; 1464 x->max_pkt_size = red_max_pkt_size ; 1465 return 0 ; 1466 } 1467 1468 static int 1469 alloc_hash(struct dn_flow_set *x, struct dn_flow_set *pfs) 1470 { 1471 if (x->flags_fs & DN_HAVE_FLOW_MASK) { /* allocate some slots */ 1472 int l = pfs->rq_size; 1473 1474 if (l == 0) 1475 l = dn_hash_size; 1476 if (l < 4) 1477 l = 4; 1478 else if (l > DN_MAX_HASH_SIZE) 1479 l = DN_MAX_HASH_SIZE; 1480 x->rq_size = l; 1481 } else /* one is enough for null mask */ 1482 x->rq_size = 1; 1483 x->rq = malloc((1 + x->rq_size) * sizeof(struct dn_flow_queue *), 1484 M_DUMMYNET, M_NOWAIT | M_ZERO); 1485 if (x->rq == NULL) { 1486 printf("sorry, cannot allocate queue\n"); 1487 return ENOSPC; 1488 } 1489 x->rq_elements = 0; 1490 return 0 ; 1491 } 1492 1493 static void 1494 set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src) 1495 { 1496 x->flags_fs = src->flags_fs; 1497 x->qsize = src->qsize; 1498 x->plr = src->plr; 1499 x->flow_mask = src->flow_mask; 1500 if (x->flags_fs & DN_QSIZE_IS_BYTES) { 1501 if (x->qsize > 1024*1024) 1502 x->qsize = 1024*1024 ; 1503 } else { 1504 if (x->qsize == 0) 1505 x->qsize = 50 ; 1506 if (x->qsize > 100) 1507 x->qsize = 50 ; 1508 } 1509 /* configuring RED */ 1510 if ( x->flags_fs & DN_IS_RED ) 1511 config_red(src, x) ; /* XXX should check errors */ 1512 } 1513 1514 /* 1515 * setup pipe or queue parameters. 1516 */ 1517 1518 static int 1519 config_pipe(struct dn_pipe *p) 1520 { 1521 int i, s; 1522 struct dn_flow_set *pfs = &(p->fs); 1523 struct dn_flow_queue *q; 1524 1525 /* 1526 * The config program passes parameters as follows: 1527 * bw = bits/second (0 means no limits), 1528 * delay = ms, must be translated into ticks. 1529 * qsize = slots/bytes 1530 */ 1531 p->delay = ( p->delay * hz ) / 1000 ; 1532 /* We need either a pipe number or a flow_set number */ 1533 if (p->pipe_nr == 0 && pfs->fs_nr == 0) 1534 return EINVAL ; 1535 if (p->pipe_nr != 0 && pfs->fs_nr != 0) 1536 return EINVAL ; 1537 if (p->pipe_nr != 0) { /* this is a pipe */ 1538 struct dn_pipe *x, *a, *b; 1539 /* locate pipe */ 1540 for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ; 1541 a = b , b = b->next) ; 1542 1543 if (b == NULL || b->pipe_nr != p->pipe_nr) { /* new pipe */ 1544 x = malloc(sizeof(struct dn_pipe), M_DUMMYNET, M_NOWAIT | M_ZERO); 1545 if (x == NULL) { 1546 printf("ip_dummynet.c: no memory for new pipe\n"); 1547 return ENOSPC; 1548 } 1549 x->pipe_nr = p->pipe_nr; 1550 x->fs.pipe = x ; 1551 /* idle_heap is the only one from which we extract from the middle. 1552 */ 1553 x->idle_heap.size = x->idle_heap.elements = 0 ; 1554 x->idle_heap.offset=OFFSET_OF(struct dn_flow_queue, heap_pos); 1555 } else { 1556 x = b; 1557 s = splimp(); 1558 /* Flush accumulated credit for all queues */ 1559 for (i = 0; i <= x->fs.rq_size; i++) 1560 for (q = x->fs.rq[i]; q; q = q->next) 1561 q->numbytes = 0; 1562 splx(s); 1563 } 1564 1565 s = splimp(); 1566 x->bandwidth = p->bandwidth ; 1567 x->numbytes = 0; /* just in case... */ 1568 bcopy(p->if_name, x->if_name, sizeof(p->if_name) ); 1569 x->ifp = NULL ; /* reset interface ptr */ 1570 x->delay = p->delay ; 1571 set_fs_parms(&(x->fs), pfs); 1572 1573 1574 if ( x->fs.rq == NULL ) { /* a new pipe */ 1575 s = alloc_hash(&(x->fs), pfs) ; 1576 if (s) { 1577 free(x, M_DUMMYNET); 1578 return s ; 1579 } 1580 x->next = b ; 1581 if (a == NULL) 1582 all_pipes = x ; 1583 else 1584 a->next = x ; 1585 } 1586 splx(s); 1587 } else { /* config queue */ 1588 struct dn_flow_set *x, *a, *b ; 1589 1590 /* locate flow_set */ 1591 for (a=NULL, b=all_flow_sets ; b && b->fs_nr < pfs->fs_nr ; 1592 a = b , b = b->next) ; 1593 1594 if (b == NULL || b->fs_nr != pfs->fs_nr) { /* new */ 1595 if (pfs->parent_nr == 0) /* need link to a pipe */ 1596 return EINVAL ; 1597 x = malloc(sizeof(struct dn_flow_set), M_DUMMYNET, M_NOWAIT|M_ZERO); 1598 if (x == NULL) { 1599 printf("ip_dummynet.c: no memory for new flow_set\n"); 1600 return ENOSPC; 1601 } 1602 x->fs_nr = pfs->fs_nr; 1603 x->parent_nr = pfs->parent_nr; 1604 x->weight = pfs->weight ; 1605 if (x->weight == 0) 1606 x->weight = 1 ; 1607 else if (x->weight > 100) 1608 x->weight = 100 ; 1609 } else { 1610 /* Change parent pipe not allowed; must delete and recreate */ 1611 if (pfs->parent_nr != 0 && b->parent_nr != pfs->parent_nr) 1612 return EINVAL ; 1613 x = b; 1614 } 1615 s = splimp(); 1616 set_fs_parms(x, pfs); 1617 1618 if ( x->rq == NULL ) { /* a new flow_set */ 1619 s = alloc_hash(x, pfs) ; 1620 if (s) { 1621 free(x, M_DUMMYNET); 1622 return s ; 1623 } 1624 x->next = b; 1625 if (a == NULL) 1626 all_flow_sets = x; 1627 else 1628 a->next = x; 1629 } 1630 splx(s); 1631 } 1632 return 0 ; 1633 } 1634 1635 /* 1636 * Helper function to remove from a heap queues which are linked to 1637 * a flow_set about to be deleted. 1638 */ 1639 static void 1640 fs_remove_from_heap(struct dn_heap *h, struct dn_flow_set *fs) 1641 { 1642 int i = 0, found = 0 ; 1643 for (; i < h->elements ;) 1644 if ( ((struct dn_flow_queue *)h->p[i].object)->fs == fs) { 1645 h->elements-- ; 1646 h->p[i] = h->p[h->elements] ; 1647 found++ ; 1648 } else 1649 i++ ; 1650 if (found) 1651 heapify(h); 1652 } 1653 1654 /* 1655 * helper function to remove a pipe from a heap (can be there at most once) 1656 */ 1657 static void 1658 pipe_remove_from_heap(struct dn_heap *h, struct dn_pipe *p) 1659 { 1660 if (h->elements > 0) { 1661 int i = 0 ; 1662 for (i=0; i < h->elements ; i++ ) { 1663 if (h->p[i].object == p) { /* found it */ 1664 h->elements-- ; 1665 h->p[i] = h->p[h->elements] ; 1666 heapify(h); 1667 break ; 1668 } 1669 } 1670 } 1671 } 1672 1673 /* 1674 * drain all queues. Called in case of severe mbuf shortage. 1675 */ 1676 void 1677 dummynet_drain() 1678 { 1679 struct dn_flow_set *fs; 1680 struct dn_pipe *p; 1681 struct dn_pkt *pkt; 1682 1683 heap_free(&ready_heap); 1684 heap_free(&wfq_ready_heap); 1685 heap_free(&extract_heap); 1686 /* remove all references to this pipe from flow_sets */ 1687 for (fs = all_flow_sets; fs; fs= fs->next ) 1688 purge_flow_set(fs, 0); 1689 1690 for (p = all_pipes; p; p= p->next ) { 1691 purge_flow_set(&(p->fs), 0); 1692 for (pkt = p->head ; pkt ; ) 1693 DN_FREE_PKT(pkt) ; 1694 p->head = p->tail = NULL ; 1695 } 1696 } 1697 1698 /* 1699 * Fully delete a pipe or a queue, cleaning up associated info. 1700 */ 1701 static int 1702 delete_pipe(struct dn_pipe *p) 1703 { 1704 int s ; 1705 1706 if (p->pipe_nr == 0 && p->fs.fs_nr == 0) 1707 return EINVAL ; 1708 if (p->pipe_nr != 0 && p->fs.fs_nr != 0) 1709 return EINVAL ; 1710 if (p->pipe_nr != 0) { /* this is an old-style pipe */ 1711 struct dn_pipe *a, *b; 1712 struct dn_flow_set *fs; 1713 1714 /* locate pipe */ 1715 for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ; 1716 a = b , b = b->next) ; 1717 if (b == NULL || (b->pipe_nr != p->pipe_nr) ) 1718 return EINVAL ; /* not found */ 1719 1720 s = splimp() ; 1721 1722 /* unlink from list of pipes */ 1723 if (a == NULL) 1724 all_pipes = b->next ; 1725 else 1726 a->next = b->next ; 1727 /* remove references to this pipe from the ip_fw rules. */ 1728 flush_pipe_ptrs(&(b->fs)); 1729 1730 /* remove all references to this pipe from flow_sets */ 1731 for (fs = all_flow_sets; fs; fs= fs->next ) 1732 if (fs->pipe == b) { 1733 printf("++ ref to pipe %d from fs %d\n", 1734 p->pipe_nr, fs->fs_nr); 1735 fs->pipe = NULL ; 1736 purge_flow_set(fs, 0); 1737 } 1738 fs_remove_from_heap(&ready_heap, &(b->fs)); 1739 purge_pipe(b); /* remove all data associated to this pipe */ 1740 /* remove reference to here from extract_heap and wfq_ready_heap */ 1741 pipe_remove_from_heap(&extract_heap, b); 1742 pipe_remove_from_heap(&wfq_ready_heap, b); 1743 splx(s); 1744 free(b, M_DUMMYNET); 1745 } else { /* this is a WF2Q queue (dn_flow_set) */ 1746 struct dn_flow_set *a, *b; 1747 1748 /* locate set */ 1749 for (a = NULL, b = all_flow_sets ; b && b->fs_nr < p->fs.fs_nr ; 1750 a = b , b = b->next) ; 1751 if (b == NULL || (b->fs_nr != p->fs.fs_nr) ) 1752 return EINVAL ; /* not found */ 1753 1754 s = splimp() ; 1755 if (a == NULL) 1756 all_flow_sets = b->next ; 1757 else 1758 a->next = b->next ; 1759 /* remove references to this flow_set from the ip_fw rules. */ 1760 flush_pipe_ptrs(b); 1761 1762 if (b->pipe != NULL) { 1763 /* Update total weight on parent pipe and cleanup parent heaps */ 1764 b->pipe->sum -= b->weight * b->backlogged ; 1765 fs_remove_from_heap(&(b->pipe->not_eligible_heap), b); 1766 fs_remove_from_heap(&(b->pipe->scheduler_heap), b); 1767 #if 1 /* XXX should i remove from idle_heap as well ? */ 1768 fs_remove_from_heap(&(b->pipe->idle_heap), b); 1769 #endif 1770 } 1771 purge_flow_set(b, 1); 1772 splx(s); 1773 } 1774 return 0 ; 1775 } 1776 1777 /* 1778 * helper function used to copy data from kernel in DUMMYNET_GET 1779 */ 1780 static char * 1781 dn_copy_set(struct dn_flow_set *set, char *bp) 1782 { 1783 int i, copied = 0 ; 1784 struct dn_flow_queue *q, *qp = (struct dn_flow_queue *)bp; 1785 1786 for (i = 0 ; i <= set->rq_size ; i++) 1787 for (q = set->rq[i] ; q ; q = q->next, qp++ ) { 1788 if (q->hash_slot != i) 1789 printf("++ at %d: wrong slot (have %d, " 1790 "should be %d)\n", copied, q->hash_slot, i); 1791 if (q->fs != set) 1792 printf("++ at %d: wrong fs ptr (have %p, should be %p)\n", 1793 i, q->fs, set); 1794 copied++ ; 1795 bcopy(q, qp, sizeof( *q ) ); 1796 /* cleanup pointers */ 1797 qp->next = NULL ; 1798 qp->head = qp->tail = NULL ; 1799 qp->fs = NULL ; 1800 } 1801 if (copied != set->rq_elements) 1802 printf("++ wrong count, have %d should be %d\n", 1803 copied, set->rq_elements); 1804 return (char *)qp ; 1805 } 1806 1807 static int 1808 dummynet_get(struct sockopt *sopt) 1809 { 1810 char *buf, *bp ; /* bp is the "copy-pointer" */ 1811 size_t size ; 1812 struct dn_flow_set *set ; 1813 struct dn_pipe *p ; 1814 int s, error=0 ; 1815 1816 s = splimp(); 1817 /* 1818 * compute size of data structures: list of pipes and flow_sets. 1819 */ 1820 for (p = all_pipes, size = 0 ; p ; p = p->next ) 1821 size += sizeof( *p ) + 1822 p->fs.rq_elements * sizeof(struct dn_flow_queue); 1823 for (set = all_flow_sets ; set ; set = set->next ) 1824 size += sizeof ( *set ) + 1825 set->rq_elements * sizeof(struct dn_flow_queue); 1826 buf = malloc(size, M_TEMP, M_NOWAIT); 1827 if (buf == 0) { 1828 splx(s); 1829 return ENOBUFS ; 1830 } 1831 for (p = all_pipes, bp = buf ; p ; p = p->next ) { 1832 struct dn_pipe *pipe_bp = (struct dn_pipe *)bp ; 1833 1834 /* 1835 * copy pipe descriptor into *bp, convert delay back to ms, 1836 * then copy the flow_set descriptor(s) one at a time. 1837 * After each flow_set, copy the queue descriptor it owns. 1838 */ 1839 bcopy(p, bp, sizeof( *p ) ); 1840 pipe_bp->delay = (pipe_bp->delay * 1000) / hz ; 1841 /* 1842 * XXX the following is a hack based on ->next being the 1843 * first field in dn_pipe and dn_flow_set. The correct 1844 * solution would be to move the dn_flow_set to the beginning 1845 * of struct dn_pipe. 1846 */ 1847 pipe_bp->next = (struct dn_pipe *)DN_IS_PIPE ; 1848 /* clean pointers */ 1849 pipe_bp->head = pipe_bp->tail = NULL ; 1850 pipe_bp->fs.next = NULL ; 1851 pipe_bp->fs.pipe = NULL ; 1852 pipe_bp->fs.rq = NULL ; 1853 1854 bp += sizeof( *p ) ; 1855 bp = dn_copy_set( &(p->fs), bp ); 1856 } 1857 for (set = all_flow_sets ; set ; set = set->next ) { 1858 struct dn_flow_set *fs_bp = (struct dn_flow_set *)bp ; 1859 bcopy(set, bp, sizeof( *set ) ); 1860 /* XXX same hack as above */ 1861 fs_bp->next = (struct dn_flow_set *)DN_IS_QUEUE ; 1862 fs_bp->pipe = NULL ; 1863 fs_bp->rq = NULL ; 1864 bp += sizeof( *set ) ; 1865 bp = dn_copy_set( set, bp ); 1866 } 1867 splx(s); 1868 error = sooptcopyout(sopt, buf, size); 1869 free(buf, M_TEMP); 1870 return error ; 1871 } 1872 1873 /* 1874 * Handler for the various dummynet socket options (get, flush, config, del) 1875 */ 1876 static int 1877 ip_dn_ctl(struct sockopt *sopt) 1878 { 1879 int error = 0 ; 1880 struct dn_pipe *p, tmp_pipe; 1881 1882 /* Disallow sets in really-really secure mode. */ 1883 if (sopt->sopt_dir == SOPT_SET) { 1884 #if __FreeBSD_version >= 500034 1885 error = securelevel_ge(sopt->sopt_td->td_ucred, 3); 1886 if (error) 1887 return (error); 1888 #else 1889 if (securelevel >= 3) 1890 return (EPERM); 1891 #endif 1892 } 1893 1894 switch (sopt->sopt_name) { 1895 default : 1896 printf("ip_dn_ctl -- unknown option %d", sopt->sopt_name); 1897 return EINVAL ; 1898 1899 case IP_DUMMYNET_GET : 1900 error = dummynet_get(sopt); 1901 break ; 1902 1903 case IP_DUMMYNET_FLUSH : 1904 dummynet_flush() ; 1905 break ; 1906 1907 case IP_DUMMYNET_CONFIGURE : 1908 p = &tmp_pipe ; 1909 error = sooptcopyin(sopt, p, sizeof *p, sizeof *p); 1910 if (error) 1911 break ; 1912 error = config_pipe(p); 1913 break ; 1914 1915 case IP_DUMMYNET_DEL : /* remove a pipe or queue */ 1916 p = &tmp_pipe ; 1917 error = sooptcopyin(sopt, p, sizeof *p, sizeof *p); 1918 if (error) 1919 break ; 1920 1921 error = delete_pipe(p); 1922 break ; 1923 } 1924 return error ; 1925 } 1926 1927 static void 1928 ip_dn_init(void) 1929 { 1930 printf("DUMMYNET initialized (011031)\n"); 1931 all_pipes = NULL ; 1932 all_flow_sets = NULL ; 1933 ready_heap.size = ready_heap.elements = 0 ; 1934 ready_heap.offset = 0 ; 1935 1936 wfq_ready_heap.size = wfq_ready_heap.elements = 0 ; 1937 wfq_ready_heap.offset = 0 ; 1938 1939 extract_heap.size = extract_heap.elements = 0 ; 1940 extract_heap.offset = 0 ; 1941 ip_dn_ctl_ptr = ip_dn_ctl; 1942 ip_dn_io_ptr = dummynet_io; 1943 ip_dn_ruledel_ptr = dn_rule_delete; 1944 bzero(&dn_timeout, sizeof(struct callout_handle)); 1945 dn_timeout = timeout(dummynet, NULL, 1); 1946 } 1947 1948 static int 1949 dummynet_modevent(module_t mod, int type, void *data) 1950 { 1951 int s; 1952 switch (type) { 1953 case MOD_LOAD: 1954 s = splimp(); 1955 if (DUMMYNET_LOADED) { 1956 splx(s); 1957 printf("DUMMYNET already loaded\n"); 1958 return EEXIST ; 1959 } 1960 ip_dn_init(); 1961 splx(s); 1962 break; 1963 1964 case MOD_UNLOAD: 1965 #if !defined(KLD_MODULE) 1966 printf("dummynet statically compiled, cannot unload\n"); 1967 return EINVAL ; 1968 #else 1969 s = splimp(); 1970 untimeout(dummynet, NULL, dn_timeout); 1971 dummynet_flush(); 1972 ip_dn_ctl_ptr = NULL; 1973 ip_dn_io_ptr = NULL; 1974 ip_dn_ruledel_ptr = NULL; 1975 splx(s); 1976 #endif 1977 break ; 1978 default: 1979 break ; 1980 } 1981 return 0 ; 1982 } 1983 1984 static moduledata_t dummynet_mod = { 1985 "dummynet", 1986 dummynet_modevent, 1987 NULL 1988 }; 1989 DECLARE_MODULE(dummynet, dummynet_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 1990 MODULE_DEPEND(dummynet, ipfw, 1, 1, 1); 1991 MODULE_VERSION(dummynet, 1); 1992