1 /* 2 * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa 3 * Portions Copyright (c) 2000 Akamba Corp. 4 * All rights reserved 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.24.2.22 2003/05/13 09:31:06 maxim Exp $ 28 */ 29 30 #include "opt_ipdn.h" 31 32 /* 33 * This module implements IP dummynet, a bandwidth limiter/delay emulator. 34 * Description of the data structures used is in ip_dummynet.h 35 * Here you mainly find the following blocks of code: 36 * + variable declarations; 37 * + heap management functions; 38 * + scheduler and dummynet functions; 39 * + configuration and initialization. 40 * 41 * Most important Changes: 42 * 43 * 011004: KLDable 44 * 010124: Fixed WF2Q behaviour 45 * 010122: Fixed spl protection. 46 * 000601: WF2Q support 47 * 000106: Large rewrite, use heaps to handle very many pipes. 48 * 980513: Initial release 49 */ 50 51 #include <sys/param.h> 52 #include <sys/kernel.h> 53 #include <sys/malloc.h> 54 #include <sys/mbuf.h> 55 #include <sys/socketvar.h> 56 #include <sys/sysctl.h> 57 #include <sys/systimer.h> 58 #include <sys/thread2.h> 59 60 #include <net/ethernet.h> 61 #include <net/netmsg2.h> 62 #include <net/route.h> 63 64 #include <netinet/in_var.h> 65 #include <netinet/ip_var.h> 66 67 #include <net/dummynet/ip_dummynet.h> 68 69 #ifdef DUMMYNET_DEBUG 70 #define DPRINTF(fmt, ...) kprintf(fmt, __VA_ARGS__) 71 #else 72 #define DPRINTF(fmt, ...) ((void)0) 73 #endif 74 75 #ifndef DN_CALLOUT_FREQ_MAX 76 #define DN_CALLOUT_FREQ_MAX 10000 77 #endif 78 79 /* 80 * The maximum/minimum hash table size for queues. 81 * These values must be a power of 2. 82 */ 83 #define DN_MIN_HASH_SIZE 4 84 #define DN_MAX_HASH_SIZE 65536 85 86 /* 87 * Some macros are used to compare key values and handle wraparounds. 88 * MAX64 returns the largest of two key values. 89 */ 90 #define DN_KEY_LT(a, b) ((int64_t)((a) - (b)) < 0) 91 #define DN_KEY_LEQ(a, b) ((int64_t)((a) - (b)) <= 0) 92 #define DN_KEY_GT(a, b) ((int64_t)((a) - (b)) > 0) 93 #define DN_KEY_GEQ(a, b) ((int64_t)((a) - (b)) >= 0) 94 #define MAX64(x, y) ((((int64_t)((y) - (x))) > 0) ? (y) : (x)) 95 96 #define DN_NR_HASH_MAX 16 97 #define DN_NR_HASH_MASK (DN_NR_HASH_MAX - 1) 98 #define DN_NR_HASH(nr) \ 99 ((((nr) >> 12) ^ ((nr) >> 8) ^ ((nr) >> 4) ^ (nr)) & DN_NR_HASH_MASK) 100 101 MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap"); 102 103 extern int ip_dn_cpu; 104 105 static dn_key curr_time = 0; /* current simulation time */ 106 static int dn_hash_size = 64; /* default hash size */ 107 static int pipe_expire = 1; /* expire queue if empty */ 108 static int dn_max_ratio = 16; /* max queues/buckets ratio */ 109 110 /* 111 * Statistics on number of queue searches and search steps 112 */ 113 static int searches; 114 static int search_steps; 115 116 /* 117 * RED parameters 118 */ 119 static int red_lookup_depth = 256; /* default lookup table depth */ 120 static int red_avg_pkt_size = 512; /* default medium packet size */ 121 static int red_max_pkt_size = 1500;/* default max packet size */ 122 123 /* 124 * Three heaps contain queues and pipes that the scheduler handles: 125 * 126 * + ready_heap contains all dn_flow_queue related to fixed-rate pipes. 127 * + wfq_ready_heap contains the pipes associated with WF2Q flows 128 * + extract_heap contains pipes associated with delay lines. 129 */ 130 static struct dn_heap ready_heap; 131 static struct dn_heap extract_heap; 132 static struct dn_heap wfq_ready_heap; 133 134 static struct dn_pipe_head pipe_table[DN_NR_HASH_MAX]; 135 static struct dn_flowset_head flowset_table[DN_NR_HASH_MAX]; 136 137 /* 138 * Variables for dummynet systimer 139 */ 140 static struct netmsg_base dn_netmsg; 141 static struct systimer dn_clock; 142 static int dn_hz = 1000; 143 144 static int sysctl_dn_hz(SYSCTL_HANDLER_ARGS); 145 146 SYSCTL_DECL(_net_inet_ip_dummynet); 147 148 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size, CTLFLAG_RW, 149 &dn_hash_size, 0, "Default hash table size"); 150 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, curr_time, CTLFLAG_RD, 151 &curr_time, 0, "Current tick"); 152 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire, CTLFLAG_RW, 153 &pipe_expire, 0, "Expire queue if empty"); 154 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len, CTLFLAG_RW, 155 &dn_max_ratio, 0, "Max ratio between dynamic queues and buckets"); 156 157 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap, CTLFLAG_RD, 158 &ready_heap.size, 0, "Size of ready heap"); 159 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap, CTLFLAG_RD, 160 &extract_heap.size, 0, "Size of extract heap"); 161 162 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, searches, CTLFLAG_RD, 163 &searches, 0, "Number of queue searches"); 164 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, search_steps, CTLFLAG_RD, 165 &search_steps, 0, "Number of queue search steps"); 166 167 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth, CTLFLAG_RD, 168 &red_lookup_depth, 0, "Depth of RED lookup table"); 169 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size, CTLFLAG_RD, 170 &red_avg_pkt_size, 0, "RED Medium packet size"); 171 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size, CTLFLAG_RD, 172 &red_max_pkt_size, 0, "RED Max packet size"); 173 174 SYSCTL_PROC(_net_inet_ip_dummynet, OID_AUTO, hz, CTLTYPE_INT | CTLFLAG_RW, 175 0, 0, sysctl_dn_hz, "I", "Dummynet callout frequency"); 176 177 static int heap_init(struct dn_heap *, int); 178 static int heap_insert(struct dn_heap *, dn_key, void *); 179 static void heap_extract(struct dn_heap *, void *); 180 181 static void transmit_event(struct dn_pipe *); 182 static void ready_event(struct dn_flow_queue *); 183 static void ready_event_wfq(struct dn_pipe *); 184 185 static int config_pipe(struct dn_ioc_pipe *); 186 static void dummynet_flush(void); 187 188 static void dummynet_clock(systimer_t, int, struct intrframe *); 189 static void dummynet(netmsg_t); 190 191 static struct dn_pipe *dn_find_pipe(int); 192 static struct dn_flow_set *dn_locate_flowset(int, int); 193 194 typedef void (*dn_pipe_iter_t)(struct dn_pipe *, void *); 195 static void dn_iterate_pipe(dn_pipe_iter_t, void *); 196 197 typedef void (*dn_flowset_iter_t)(struct dn_flow_set *, void *); 198 static void dn_iterate_flowset(dn_flowset_iter_t, void *); 199 200 static ip_dn_io_t dummynet_io; 201 static ip_dn_ctl_t dummynet_ctl; 202 203 /* 204 * Heap management functions. 205 * 206 * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2. 207 * Some macros help finding parent/children so we can optimize them. 208 * 209 * heap_init() is called to expand the heap when needed. 210 * Increment size in blocks of 16 entries. 211 * XXX failure to allocate a new element is a pretty bad failure 212 * as we basically stall a whole queue forever!! 213 * Returns 1 on error, 0 on success 214 */ 215 #define HEAP_FATHER(x) (((x) - 1) / 2) 216 #define HEAP_LEFT(x) (2*(x) + 1) 217 #define HEAP_IS_LEFT(x) ((x) & 1) 218 #define HEAP_RIGHT(x) (2*(x) + 2) 219 #define HEAP_SWAP(a, b, buffer) { buffer = a; a = b; b = buffer; } 220 #define HEAP_INCREMENT 15 221 222 static int 223 heap_init(struct dn_heap *h, int new_size) 224 { 225 struct dn_heap_entry *p; 226 227 if (h->size >= new_size) { 228 kprintf("%s, Bogus call, have %d want %d\n", __func__, 229 h->size, new_size); 230 return 0; 231 } 232 233 new_size = (new_size + HEAP_INCREMENT) & ~HEAP_INCREMENT; 234 p = kmalloc(new_size * sizeof(*p), M_DUMMYNET, M_WAITOK | M_ZERO); 235 if (h->size > 0) { 236 bcopy(h->p, p, h->size * sizeof(*p)); 237 kfree(h->p, M_DUMMYNET); 238 } 239 h->p = p; 240 h->size = new_size; 241 return 0; 242 } 243 244 /* 245 * Insert element in heap. Normally, p != NULL, we insert p in 246 * a new position and bubble up. If p == NULL, then the element is 247 * already in place, and key is the position where to start the 248 * bubble-up. 249 * Returns 1 on failure (cannot allocate new heap entry) 250 * 251 * If offset > 0 the position (index, int) of the element in the heap is 252 * also stored in the element itself at the given offset in bytes. 253 */ 254 #define SET_OFFSET(heap, node) \ 255 if (heap->offset > 0) \ 256 *((int *)((char *)(heap->p[node].object) + heap->offset)) = node; 257 258 /* 259 * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value. 260 */ 261 #define RESET_OFFSET(heap, node) \ 262 if (heap->offset > 0) \ 263 *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1; 264 265 static int 266 heap_insert(struct dn_heap *h, dn_key key1, void *p) 267 { 268 int son; 269 270 if (p == NULL) { /* Data already there, set starting point */ 271 son = key1; 272 } else { /* Insert new element at the end, possibly resize */ 273 son = h->elements; 274 if (son == h->size) { /* Need resize... */ 275 if (heap_init(h, h->elements + 1)) 276 return 1; /* Failure... */ 277 } 278 h->p[son].object = p; 279 h->p[son].key = key1; 280 h->elements++; 281 } 282 283 while (son > 0) { /* Bubble up */ 284 int father = HEAP_FATHER(son); 285 struct dn_heap_entry tmp; 286 287 if (DN_KEY_LT(h->p[father].key, h->p[son].key)) 288 break; /* Found right position */ 289 290 /* 'son' smaller than 'father', swap and repeat */ 291 HEAP_SWAP(h->p[son], h->p[father], tmp); 292 SET_OFFSET(h, son); 293 son = father; 294 } 295 SET_OFFSET(h, son); 296 return 0; 297 } 298 299 /* 300 * Remove top element from heap, or obj if obj != NULL 301 */ 302 static void 303 heap_extract(struct dn_heap *h, void *obj) 304 { 305 int child, father, max = h->elements - 1; 306 307 if (max < 0) { 308 kprintf("warning, extract from empty heap 0x%p\n", h); 309 return; 310 } 311 312 father = 0; /* Default: move up smallest child */ 313 if (obj != NULL) { /* Extract specific element, index is at offset */ 314 if (h->offset <= 0) 315 panic("%s from middle not supported on this heap!!!", __func__); 316 317 father = *((int *)((char *)obj + h->offset)); 318 if (father < 0 || father >= h->elements) { 319 panic("%s father %d out of bound 0..%d", __func__, 320 father, h->elements); 321 } 322 } 323 RESET_OFFSET(h, father); 324 325 child = HEAP_LEFT(father); /* Left child */ 326 while (child <= max) { /* Valid entry */ 327 if (child != max && DN_KEY_LT(h->p[child + 1].key, h->p[child].key)) 328 child = child + 1; /* Take right child, otherwise left */ 329 h->p[father] = h->p[child]; 330 SET_OFFSET(h, father); 331 father = child; 332 child = HEAP_LEFT(child); /* Left child for next loop */ 333 } 334 h->elements--; 335 if (father != max) { 336 /* 337 * Fill hole with last entry and bubble up, reusing the insert code 338 */ 339 h->p[father] = h->p[max]; 340 heap_insert(h, father, NULL); /* This one cannot fail */ 341 } 342 } 343 344 /* 345 * heapify() will reorganize data inside an array to maintain the 346 * heap property. It is needed when we delete a bunch of entries. 347 */ 348 static void 349 heapify(struct dn_heap *h) 350 { 351 int i; 352 353 for (i = 0; i < h->elements; i++) 354 heap_insert(h, i , NULL); 355 } 356 357 /* 358 * Cleanup the heap and free data structure 359 */ 360 static void 361 heap_free(struct dn_heap *h) 362 { 363 if (h->size > 0) 364 kfree(h->p, M_DUMMYNET); 365 bzero(h, sizeof(*h)); 366 } 367 368 /* 369 * --- End of heap management functions --- 370 */ 371 372 /* 373 * Scheduler functions: 374 * 375 * transmit_event() is called when the delay-line needs to enter 376 * the scheduler, either because of existing pkts getting ready, 377 * or new packets entering the queue. The event handled is the delivery 378 * time of the packet. 379 * 380 * ready_event() does something similar with fixed-rate queues, and the 381 * event handled is the finish time of the head pkt. 382 * 383 * ready_event_wfq() does something similar with WF2Q queues, and the 384 * event handled is the start time of the head pkt. 385 * 386 * In all cases, we make sure that the data structures are consistent 387 * before passing pkts out, because this might trigger recursive 388 * invocations of the procedures. 389 */ 390 static void 391 transmit_event(struct dn_pipe *pipe) 392 { 393 struct dn_pkt *pkt; 394 395 while ((pkt = TAILQ_FIRST(&pipe->p_queue)) && 396 DN_KEY_LEQ(pkt->output_time, curr_time)) { 397 TAILQ_REMOVE(&pipe->p_queue, pkt, dn_next); 398 ip_dn_packet_redispatch(pkt); 399 } 400 401 /* 402 * If there are leftover packets, put into the heap for next event 403 */ 404 if ((pkt = TAILQ_FIRST(&pipe->p_queue)) != NULL) { 405 /* 406 * XXX should check errors on heap_insert, by draining the 407 * whole pipe and hoping in the future we are more successful 408 */ 409 heap_insert(&extract_heap, pkt->output_time, pipe); 410 } 411 } 412 413 /* 414 * The following macro computes how many ticks we have to wait 415 * before being able to transmit a packet. The credit is taken from 416 * either a pipe (WF2Q) or a flow_queue (per-flow queueing) 417 */ 418 #define SET_TICKS(pkt, q, p) \ 419 (pkt->dn_m->m_pkthdr.len*8*dn_hz - (q)->numbytes + p->bandwidth - 1 ) / \ 420 p->bandwidth; 421 422 /* 423 * Extract pkt from queue, compute output time (could be now) 424 * and put into delay line (p_queue) 425 */ 426 static void 427 move_pkt(struct dn_pkt *pkt, struct dn_flow_queue *q, 428 struct dn_pipe *p, int len) 429 { 430 TAILQ_REMOVE(&q->queue, pkt, dn_next); 431 q->len--; 432 q->len_bytes -= len; 433 434 pkt->output_time = curr_time + p->delay; 435 436 TAILQ_INSERT_TAIL(&p->p_queue, pkt, dn_next); 437 } 438 439 /* 440 * ready_event() is invoked every time the queue must enter the 441 * scheduler, either because the first packet arrives, or because 442 * a previously scheduled event fired. 443 * On invokation, drain as many pkts as possible (could be 0) and then 444 * if there are leftover packets reinsert the pkt in the scheduler. 445 */ 446 static void 447 ready_event(struct dn_flow_queue *q) 448 { 449 struct dn_pkt *pkt; 450 struct dn_pipe *p = q->fs->pipe; 451 int p_was_empty; 452 453 if (p == NULL) { 454 kprintf("ready_event- pipe is gone\n"); 455 return; 456 } 457 p_was_empty = TAILQ_EMPTY(&p->p_queue); 458 459 /* 460 * Schedule fixed-rate queues linked to this pipe: 461 * Account for the bw accumulated since last scheduling, then 462 * drain as many pkts as allowed by q->numbytes and move to 463 * the delay line (in p) computing output time. 464 * bandwidth==0 (no limit) means we can drain the whole queue, 465 * setting len_scaled = 0 does the job. 466 */ 467 q->numbytes += (curr_time - q->sched_time) * p->bandwidth; 468 while ((pkt = TAILQ_FIRST(&q->queue)) != NULL) { 469 int len = pkt->dn_m->m_pkthdr.len; 470 int len_scaled = p->bandwidth ? len*8*dn_hz : 0; 471 472 if (len_scaled > q->numbytes) 473 break; 474 q->numbytes -= len_scaled; 475 move_pkt(pkt, q, p, len); 476 } 477 478 /* 479 * If we have more packets queued, schedule next ready event 480 * (can only occur when bandwidth != 0, otherwise we would have 481 * flushed the whole queue in the previous loop). 482 * To this purpose we record the current time and compute how many 483 * ticks to go for the finish time of the packet. 484 */ 485 if ((pkt = TAILQ_FIRST(&q->queue)) != NULL) { 486 /* This implies bandwidth != 0 */ 487 dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */ 488 489 q->sched_time = curr_time; 490 491 /* 492 * XXX should check errors on heap_insert, and drain the whole 493 * queue on error hoping next time we are luckier. 494 */ 495 heap_insert(&ready_heap, curr_time + t, q); 496 } else { /* RED needs to know when the queue becomes empty */ 497 q->q_time = curr_time; 498 q->numbytes = 0; 499 } 500 501 /* 502 * If the delay line was empty call transmit_event(p) now. 503 * Otherwise, the scheduler will take care of it. 504 */ 505 if (p_was_empty) 506 transmit_event(p); 507 } 508 509 /* 510 * Called when we can transmit packets on WF2Q queues. Take pkts out of 511 * the queues at their start time, and enqueue into the delay line. 512 * Packets are drained until p->numbytes < 0. As long as 513 * len_scaled >= p->numbytes, the packet goes into the delay line 514 * with a deadline p->delay. For the last packet, if p->numbytes < 0, 515 * there is an additional delay. 516 */ 517 static void 518 ready_event_wfq(struct dn_pipe *p) 519 { 520 int p_was_empty = TAILQ_EMPTY(&p->p_queue); 521 struct dn_heap *sch = &p->scheduler_heap; 522 struct dn_heap *neh = &p->not_eligible_heap; 523 524 p->numbytes += (curr_time - p->sched_time) * p->bandwidth; 525 526 /* 527 * While we have backlogged traffic AND credit, we need to do 528 * something on the queue. 529 */ 530 while (p->numbytes >= 0 && (sch->elements > 0 || neh->elements > 0)) { 531 if (sch->elements > 0) { /* Have some eligible pkts to send out */ 532 struct dn_flow_queue *q = sch->p[0].object; 533 struct dn_pkt *pkt = TAILQ_FIRST(&q->queue); 534 struct dn_flow_set *fs = q->fs; 535 uint64_t len = pkt->dn_m->m_pkthdr.len; 536 int len_scaled = p->bandwidth ? len*8*dn_hz : 0; 537 538 heap_extract(sch, NULL); /* Remove queue from heap */ 539 p->numbytes -= len_scaled; 540 move_pkt(pkt, q, p, len); 541 542 p->V += (len << MY_M) / p->sum; /* Update V */ 543 q->S = q->F; /* Update start time */ 544 545 if (q->len == 0) { /* Flow not backlogged any more */ 546 fs->backlogged--; 547 heap_insert(&p->idle_heap, q->F, q); 548 } else { /* Still backlogged */ 549 /* 550 * Update F and position in backlogged queue, then 551 * put flow in not_eligible_heap (we will fix this later). 552 */ 553 len = TAILQ_FIRST(&q->queue)->dn_m->m_pkthdr.len; 554 q->F += (len << MY_M) / (uint64_t)fs->weight; 555 if (DN_KEY_LEQ(q->S, p->V)) 556 heap_insert(neh, q->S, q); 557 else 558 heap_insert(sch, q->F, q); 559 } 560 } 561 562 /* 563 * Now compute V = max(V, min(S_i)). Remember that all elements in 564 * sch have by definition S_i <= V so if sch is not empty, V is surely 565 * the max and we must not update it. Conversely, if sch is empty 566 * we only need to look at neh. 567 */ 568 if (sch->elements == 0 && neh->elements > 0) 569 p->V = MAX64(p->V, neh->p[0].key); 570 571 /* 572 * Move from neh to sch any packets that have become eligible 573 */ 574 while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V)) { 575 struct dn_flow_queue *q = neh->p[0].object; 576 577 heap_extract(neh, NULL); 578 heap_insert(sch, q->F, q); 579 } 580 } 581 582 if (sch->elements == 0 && neh->elements == 0 && p->numbytes >= 0 && 583 p->idle_heap.elements > 0) { 584 /* 585 * No traffic and no events scheduled. We can get rid of idle-heap. 586 */ 587 int i; 588 589 for (i = 0; i < p->idle_heap.elements; i++) { 590 struct dn_flow_queue *q = p->idle_heap.p[i].object; 591 592 q->F = 0; 593 q->S = q->F + 1; 594 } 595 p->sum = 0; 596 p->V = 0; 597 p->idle_heap.elements = 0; 598 } 599 600 /* 601 * If we are getting clocks from dummynet and if we are under credit, 602 * schedule the next ready event. 603 * Also fix the delivery time of the last packet. 604 */ 605 if (p->numbytes < 0) { /* This implies bandwidth>0 */ 606 dn_key t = 0; /* Number of ticks i have to wait */ 607 608 if (p->bandwidth > 0) 609 t = (p->bandwidth - 1 - p->numbytes) / p->bandwidth; 610 TAILQ_LAST(&p->p_queue, dn_pkt_queue)->output_time += t; 611 p->sched_time = curr_time; 612 613 /* 614 * XXX should check errors on heap_insert, and drain the whole 615 * queue on error hoping next time we are luckier. 616 */ 617 heap_insert(&wfq_ready_heap, curr_time + t, p); 618 } 619 620 /* 621 * If the delay line was empty call transmit_event(p) now. 622 * Otherwise, the scheduler will take care of it. 623 */ 624 if (p_was_empty) 625 transmit_event(p); 626 } 627 628 static void 629 dn_expire_pipe_cb(struct dn_pipe *pipe, void *dummy __unused) 630 { 631 if (pipe->idle_heap.elements > 0 && 632 DN_KEY_LT(pipe->idle_heap.p[0].key, pipe->V)) { 633 struct dn_flow_queue *q = pipe->idle_heap.p[0].object; 634 635 heap_extract(&pipe->idle_heap, NULL); 636 q->S = q->F + 1; /* Mark timestamp as invalid */ 637 pipe->sum -= q->fs->weight; 638 } 639 } 640 641 /* 642 * This is called once per tick, or dn_hz times per second. It is used to 643 * increment the current tick counter and schedule expired events. 644 */ 645 static void 646 dummynet(netmsg_t msg) 647 { 648 void *p; 649 struct dn_heap *h; 650 struct dn_heap *heaps[3]; 651 int i; 652 653 heaps[0] = &ready_heap; /* Fixed-rate queues */ 654 heaps[1] = &wfq_ready_heap; /* WF2Q queues */ 655 heaps[2] = &extract_heap; /* Delay line */ 656 657 /* Reply ASAP */ 658 crit_enter(); 659 lwkt_replymsg(&msg->lmsg, 0); 660 crit_exit(); 661 662 curr_time++; 663 for (i = 0; i < 3; i++) { 664 h = heaps[i]; 665 while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time)) { 666 if (h->p[0].key > curr_time) { 667 kprintf("-- dummynet: warning, heap %d is %d ticks late\n", 668 i, (int)(curr_time - h->p[0].key)); 669 } 670 671 p = h->p[0].object; /* Store a copy before heap_extract */ 672 heap_extract(h, NULL); /* Need to extract before processing */ 673 674 if (i == 0) 675 ready_event(p); 676 else if (i == 1) 677 ready_event_wfq(p); 678 else 679 transmit_event(p); 680 } 681 } 682 683 /* Sweep pipes trying to expire idle flow_queues */ 684 dn_iterate_pipe(dn_expire_pipe_cb, NULL); 685 } 686 687 /* 688 * Unconditionally expire empty queues in case of shortage. 689 * Returns the number of queues freed. 690 */ 691 static int 692 expire_queues(struct dn_flow_set *fs) 693 { 694 int i, initial_elements = fs->rq_elements; 695 696 if (fs->last_expired == time_second) 697 return 0; 698 699 fs->last_expired = time_second; 700 701 for (i = 0; i <= fs->rq_size; i++) { /* Last one is overflow */ 702 struct dn_flow_queue *q, *qn; 703 704 LIST_FOREACH_MUTABLE(q, &fs->rq[i], q_link, qn) { 705 if (!TAILQ_EMPTY(&q->queue) || q->S != q->F + 1) 706 continue; 707 708 /* 709 * Entry is idle, expire it 710 */ 711 LIST_REMOVE(q, q_link); 712 kfree(q, M_DUMMYNET); 713 714 KASSERT(fs->rq_elements > 0, 715 ("invalid rq_elements %d", fs->rq_elements)); 716 fs->rq_elements--; 717 } 718 } 719 return initial_elements - fs->rq_elements; 720 } 721 722 /* 723 * If room, create a new queue and put at head of slot i; 724 * otherwise, create or use the default queue. 725 */ 726 static struct dn_flow_queue * 727 create_queue(struct dn_flow_set *fs, int i) 728 { 729 struct dn_flow_queue *q; 730 731 if (fs->rq_elements > fs->rq_size * dn_max_ratio && 732 expire_queues(fs) == 0) { 733 /* 734 * No way to get room, use or create overflow queue. 735 */ 736 i = fs->rq_size; 737 if (!LIST_EMPTY(&fs->rq[i])) 738 return LIST_FIRST(&fs->rq[i]); 739 } 740 741 q = kmalloc(sizeof(*q), M_DUMMYNET, M_INTWAIT | M_NULLOK | M_ZERO); 742 if (q == NULL) 743 return NULL; 744 745 q->fs = fs; 746 q->hash_slot = i; 747 q->S = q->F + 1; /* hack - mark timestamp as invalid */ 748 TAILQ_INIT(&q->queue); 749 750 LIST_INSERT_HEAD(&fs->rq[i], q, q_link); 751 fs->rq_elements++; 752 753 return q; 754 } 755 756 /* 757 * Given a flow_set and a pkt in last_pkt, find a matching queue 758 * after appropriate masking. The queue is moved to front 759 * so that further searches take less time. 760 */ 761 static struct dn_flow_queue * 762 find_queue(struct dn_flow_set *fs, struct dn_flow_id *id) 763 { 764 struct dn_flow_queue *q; 765 int i = 0; 766 767 if (!(fs->flags_fs & DN_HAVE_FLOW_MASK)) { 768 q = LIST_FIRST(&fs->rq[0]); 769 } else { 770 struct dn_flow_queue *qn; 771 772 /* First, do the masking */ 773 id->fid_dst_ip &= fs->flow_mask.fid_dst_ip; 774 id->fid_src_ip &= fs->flow_mask.fid_src_ip; 775 id->fid_dst_port &= fs->flow_mask.fid_dst_port; 776 id->fid_src_port &= fs->flow_mask.fid_src_port; 777 id->fid_proto &= fs->flow_mask.fid_proto; 778 id->fid_flags = 0; /* we don't care about this one */ 779 780 /* Then, hash function */ 781 i = ((id->fid_dst_ip) & 0xffff) ^ 782 ((id->fid_dst_ip >> 15) & 0xffff) ^ 783 ((id->fid_src_ip << 1) & 0xffff) ^ 784 ((id->fid_src_ip >> 16 ) & 0xffff) ^ 785 (id->fid_dst_port << 1) ^ (id->fid_src_port) ^ 786 (id->fid_proto); 787 i = i % fs->rq_size; 788 789 /* 790 * Finally, scan the current list for a match and 791 * expire idle flow queues 792 */ 793 searches++; 794 LIST_FOREACH_MUTABLE(q, &fs->rq[i], q_link, qn) { 795 search_steps++; 796 if (id->fid_dst_ip == q->id.fid_dst_ip && 797 id->fid_src_ip == q->id.fid_src_ip && 798 id->fid_dst_port == q->id.fid_dst_port && 799 id->fid_src_port == q->id.fid_src_port && 800 id->fid_proto == q->id.fid_proto && 801 id->fid_flags == q->id.fid_flags) { 802 break; /* Found */ 803 } else if (pipe_expire && TAILQ_EMPTY(&q->queue) && 804 q->S == q->F + 1) { 805 /* 806 * Entry is idle and not in any heap, expire it 807 */ 808 LIST_REMOVE(q, q_link); 809 kfree(q, M_DUMMYNET); 810 811 KASSERT(fs->rq_elements > 0, 812 ("invalid rq_elements %d", fs->rq_elements)); 813 fs->rq_elements--; 814 } 815 } 816 if (q && LIST_FIRST(&fs->rq[i]) != q) { /* Found and not in front */ 817 LIST_REMOVE(q, q_link); 818 LIST_INSERT_HEAD(&fs->rq[i], q, q_link); 819 } 820 } 821 if (q == NULL) { /* No match, need to allocate a new entry */ 822 q = create_queue(fs, i); 823 if (q != NULL) 824 q->id = *id; 825 } 826 return q; 827 } 828 829 static int 830 red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len) 831 { 832 /* 833 * RED algorithm 834 * 835 * RED calculates the average queue size (avg) using a low-pass filter 836 * with an exponential weighted (w_q) moving average: 837 * avg <- (1-w_q) * avg + w_q * q_size 838 * where q_size is the queue length (measured in bytes or * packets). 839 * 840 * If q_size == 0, we compute the idle time for the link, and set 841 * avg = (1 - w_q)^(idle/s) 842 * where s is the time needed for transmitting a medium-sized packet. 843 * 844 * Now, if avg < min_th the packet is enqueued. 845 * If avg > max_th the packet is dropped. Otherwise, the packet is 846 * dropped with probability P function of avg. 847 */ 848 849 int64_t p_b = 0; 850 u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ? q->len_bytes : q->len; 851 852 DPRINTF("\n%d q: %2u ", (int)curr_time, q_size); 853 854 /* Average queue size estimation */ 855 if (q_size != 0) { 856 /* 857 * Queue is not empty, avg <- avg + (q_size - avg) * w_q 858 */ 859 int diff = SCALE(q_size) - q->avg; 860 int64_t v = SCALE_MUL((int64_t)diff, (int64_t)fs->w_q); 861 862 q->avg += (int)v; 863 } else { 864 /* 865 * Queue is empty, find for how long the queue has been 866 * empty and use a lookup table for computing 867 * (1 - * w_q)^(idle_time/s) where s is the time to send a 868 * (small) packet. 869 * XXX check wraps... 870 */ 871 if (q->avg) { 872 u_int t = (curr_time - q->q_time) / fs->lookup_step; 873 874 q->avg = (t < fs->lookup_depth) ? 875 SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0; 876 } 877 } 878 DPRINTF("avg: %u ", SCALE_VAL(q->avg)); 879 880 /* Should i drop? */ 881 882 if (q->avg < fs->min_th) { 883 /* Accept packet */ 884 q->count = -1; 885 return 0; 886 } 887 888 if (q->avg >= fs->max_th) { /* Average queue >= Max threshold */ 889 if (fs->flags_fs & DN_IS_GENTLE_RED) { 890 /* 891 * According to Gentle-RED, if avg is greater than max_th the 892 * packet is dropped with a probability 893 * p_b = c_3 * avg - c_4 894 * where c_3 = (1 - max_p) / max_th, and c_4 = 1 - 2 * max_p 895 */ 896 p_b = SCALE_MUL((int64_t)fs->c_3, (int64_t)q->avg) - fs->c_4; 897 } else { 898 q->count = -1; 899 kprintf("- drop\n"); 900 return 1; 901 } 902 } else if (q->avg > fs->min_th) { 903 /* 904 * We compute p_b using the linear dropping function p_b = c_1 * 905 * avg - c_2, where c_1 = max_p / (max_th - min_th), and c_2 = 906 * max_p * min_th / (max_th - min_th) 907 */ 908 p_b = SCALE_MUL((int64_t)fs->c_1, (int64_t)q->avg) - fs->c_2; 909 } 910 if (fs->flags_fs & DN_QSIZE_IS_BYTES) 911 p_b = (p_b * len) / fs->max_pkt_size; 912 913 if (++q->count == 0) { 914 q->random = krandom() & 0xffff; 915 } else { 916 /* 917 * q->count counts packets arrived since last drop, so a greater 918 * value of q->count means a greater packet drop probability. 919 */ 920 if (SCALE_MUL(p_b, SCALE((int64_t)q->count)) > q->random) { 921 q->count = 0; 922 DPRINTF("%s", "- red drop"); 923 /* After a drop we calculate a new random value */ 924 q->random = krandom() & 0xffff; 925 return 1; /* Drop */ 926 } 927 } 928 /* End of RED algorithm */ 929 return 0; /* Accept */ 930 } 931 932 static void 933 dn_iterate_pipe(dn_pipe_iter_t func, void *arg) 934 { 935 int i; 936 937 for (i = 0; i < DN_NR_HASH_MAX; ++i) { 938 struct dn_pipe_head *pipe_hdr = &pipe_table[i]; 939 struct dn_pipe *pipe, *pipe_next; 940 941 LIST_FOREACH_MUTABLE(pipe, pipe_hdr, p_link, pipe_next) 942 func(pipe, arg); 943 } 944 } 945 946 static void 947 dn_iterate_flowset(dn_flowset_iter_t func, void *arg) 948 { 949 int i; 950 951 for (i = 0; i < DN_NR_HASH_MAX; ++i) { 952 struct dn_flowset_head *fs_hdr = &flowset_table[i]; 953 struct dn_flow_set *fs, *fs_next; 954 955 LIST_FOREACH_MUTABLE(fs, fs_hdr, fs_link, fs_next) 956 func(fs, arg); 957 } 958 } 959 960 static struct dn_pipe * 961 dn_find_pipe(int pipe_nr) 962 { 963 struct dn_pipe_head *pipe_hdr; 964 struct dn_pipe *p; 965 966 pipe_hdr = &pipe_table[DN_NR_HASH(pipe_nr)]; 967 LIST_FOREACH(p, pipe_hdr, p_link) { 968 if (p->pipe_nr == pipe_nr) 969 break; 970 } 971 return p; 972 } 973 974 static struct dn_flow_set * 975 dn_find_flowset(int fs_nr) 976 { 977 struct dn_flowset_head *fs_hdr; 978 struct dn_flow_set *fs; 979 980 fs_hdr = &flowset_table[DN_NR_HASH(fs_nr)]; 981 LIST_FOREACH(fs, fs_hdr, fs_link) { 982 if (fs->fs_nr == fs_nr) 983 break; 984 } 985 return fs; 986 } 987 988 static struct dn_flow_set * 989 dn_locate_flowset(int pipe_nr, int is_pipe) 990 { 991 struct dn_flow_set *fs = NULL; 992 993 if (!is_pipe) { 994 fs = dn_find_flowset(pipe_nr); 995 } else { 996 struct dn_pipe *p; 997 998 p = dn_find_pipe(pipe_nr); 999 if (p != NULL) 1000 fs = &p->fs; 1001 } 1002 return fs; 1003 } 1004 1005 /* 1006 * Dummynet hook for packets. Below 'pipe' is a pipe or a queue 1007 * depending on whether WF2Q or fixed bw is used. 1008 * 1009 * pipe_nr pipe or queue the packet is destined for. 1010 * dir where shall we send the packet after dummynet. 1011 * m the mbuf with the packet 1012 * fwa->oif the 'ifp' parameter from the caller. 1013 * NULL in ip_input, destination interface in ip_output 1014 * fwa->ro route parameter (only used in ip_output, NULL otherwise) 1015 * fwa->dst destination address, only used by ip_output 1016 * fwa->rule matching rule, in case of multiple passes 1017 * fwa->flags flags from the caller, only used in ip_output 1018 */ 1019 static int 1020 dummynet_io(struct mbuf *m) 1021 { 1022 struct dn_pkt *pkt; 1023 struct m_tag *tag; 1024 struct dn_flow_set *fs; 1025 struct dn_pipe *pipe; 1026 uint64_t len = m->m_pkthdr.len; 1027 struct dn_flow_queue *q = NULL; 1028 int is_pipe, pipe_nr; 1029 1030 tag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); 1031 pkt = m_tag_data(tag); 1032 1033 is_pipe = pkt->dn_flags & DN_FLAGS_IS_PIPE; 1034 pipe_nr = pkt->pipe_nr; 1035 1036 /* 1037 * This is a dummynet rule, so we expect a O_PIPE or O_QUEUE rule 1038 */ 1039 fs = dn_locate_flowset(pipe_nr, is_pipe); 1040 if (fs == NULL) 1041 goto dropit; /* This queue/pipe does not exist! */ 1042 1043 pipe = fs->pipe; 1044 if (pipe == NULL) { /* Must be a queue, try find a matching pipe */ 1045 pipe = dn_find_pipe(fs->parent_nr); 1046 if (pipe != NULL) { 1047 fs->pipe = pipe; 1048 } else { 1049 kprintf("No pipe %d for queue %d, drop pkt\n", 1050 fs->parent_nr, fs->fs_nr); 1051 goto dropit; 1052 } 1053 } 1054 1055 q = find_queue(fs, &pkt->id); 1056 if (q == NULL) 1057 goto dropit; /* Cannot allocate queue */ 1058 1059 /* 1060 * Update statistics, then check reasons to drop pkt 1061 */ 1062 q->tot_bytes += len; 1063 q->tot_pkts++; 1064 1065 if (fs->plr && krandom() < fs->plr) 1066 goto dropit; /* Random pkt drop */ 1067 1068 if (fs->flags_fs & DN_QSIZE_IS_BYTES) { 1069 if (q->len_bytes > fs->qsize) 1070 goto dropit; /* Queue size overflow */ 1071 } else { 1072 if (q->len >= fs->qsize) 1073 goto dropit; /* Queue count overflow */ 1074 } 1075 1076 if ((fs->flags_fs & DN_IS_RED) && red_drops(fs, q, len)) 1077 goto dropit; 1078 1079 TAILQ_INSERT_TAIL(&q->queue, pkt, dn_next); 1080 q->len++; 1081 q->len_bytes += len; 1082 1083 if (TAILQ_FIRST(&q->queue) != pkt) /* Flow was not idle, we are done */ 1084 goto done; 1085 1086 /* 1087 * If we reach this point the flow was previously idle, so we need 1088 * to schedule it. This involves different actions for fixed-rate 1089 * or WF2Q queues. 1090 */ 1091 if (is_pipe) { 1092 /* 1093 * Fixed-rate queue: just insert into the ready_heap. 1094 */ 1095 dn_key t = 0; 1096 1097 if (pipe->bandwidth) 1098 t = SET_TICKS(pkt, q, pipe); 1099 1100 q->sched_time = curr_time; 1101 if (t == 0) /* Must process it now */ 1102 ready_event(q); 1103 else 1104 heap_insert(&ready_heap, curr_time + t, q); 1105 } else { 1106 /* 1107 * WF2Q: 1108 * First, compute start time S: if the flow was idle (S=F+1) 1109 * set S to the virtual time V for the controlling pipe, and update 1110 * the sum of weights for the pipe; otherwise, remove flow from 1111 * idle_heap and set S to max(F, V). 1112 * Second, compute finish time F = S + len/weight. 1113 * Third, if pipe was idle, update V = max(S, V). 1114 * Fourth, count one more backlogged flow. 1115 */ 1116 if (DN_KEY_GT(q->S, q->F)) { /* Means timestamps are invalid */ 1117 q->S = pipe->V; 1118 pipe->sum += fs->weight; /* Add weight of new queue */ 1119 } else { 1120 heap_extract(&pipe->idle_heap, q); 1121 q->S = MAX64(q->F, pipe->V); 1122 } 1123 q->F = q->S + (len << MY_M) / (uint64_t)fs->weight; 1124 1125 if (pipe->not_eligible_heap.elements == 0 && 1126 pipe->scheduler_heap.elements == 0) 1127 pipe->V = MAX64(q->S, pipe->V); 1128 1129 fs->backlogged++; 1130 1131 /* 1132 * Look at eligibility. A flow is not eligibile if S>V (when 1133 * this happens, it means that there is some other flow already 1134 * scheduled for the same pipe, so the scheduler_heap cannot be 1135 * empty). If the flow is not eligible we just store it in the 1136 * not_eligible_heap. Otherwise, we store in the scheduler_heap 1137 * and possibly invoke ready_event_wfq() right now if there is 1138 * leftover credit. 1139 * Note that for all flows in scheduler_heap (SCH), S_i <= V, 1140 * and for all flows in not_eligible_heap (NEH), S_i > V. 1141 * So when we need to compute max(V, min(S_i)) forall i in SCH+NEH, 1142 * we only need to look into NEH. 1143 */ 1144 if (DN_KEY_GT(q->S, pipe->V)) { /* Not eligible */ 1145 if (pipe->scheduler_heap.elements == 0) 1146 kprintf("++ ouch! not eligible but empty scheduler!\n"); 1147 heap_insert(&pipe->not_eligible_heap, q->S, q); 1148 } else { 1149 heap_insert(&pipe->scheduler_heap, q->F, q); 1150 if (pipe->numbytes >= 0) { /* Pipe is idle */ 1151 if (pipe->scheduler_heap.elements != 1) 1152 kprintf("*** OUCH! pipe should have been idle!\n"); 1153 DPRINTF("Waking up pipe %d at %d\n", 1154 pipe->pipe_nr, (int)(q->F >> MY_M)); 1155 pipe->sched_time = curr_time; 1156 ready_event_wfq(pipe); 1157 } 1158 } 1159 } 1160 done: 1161 return 0; 1162 1163 dropit: 1164 if (q) 1165 q->drops++; 1166 return ENOBUFS; 1167 } 1168 1169 /* 1170 * Dispose all packets and flow_queues on a flow_set. 1171 * If all=1, also remove red lookup table and other storage, 1172 * including the descriptor itself. 1173 * For the one in dn_pipe MUST also cleanup ready_heap... 1174 */ 1175 static void 1176 purge_flow_set(struct dn_flow_set *fs, int all) 1177 { 1178 int i; 1179 #ifdef INVARIANTS 1180 int rq_elements = 0; 1181 #endif 1182 1183 for (i = 0; i <= fs->rq_size; i++) { 1184 struct dn_flow_queue *q; 1185 1186 while ((q = LIST_FIRST(&fs->rq[i])) != NULL) { 1187 struct dn_pkt *pkt; 1188 1189 while ((pkt = TAILQ_FIRST(&q->queue)) != NULL) { 1190 TAILQ_REMOVE(&q->queue, pkt, dn_next); 1191 ip_dn_packet_free(pkt); 1192 } 1193 1194 LIST_REMOVE(q, q_link); 1195 kfree(q, M_DUMMYNET); 1196 1197 #ifdef INVARIANTS 1198 rq_elements++; 1199 #endif 1200 } 1201 } 1202 KASSERT(rq_elements == fs->rq_elements, 1203 ("# rq elements mismatch, freed %d, total %d", 1204 rq_elements, fs->rq_elements)); 1205 fs->rq_elements = 0; 1206 1207 if (all) { 1208 /* RED - free lookup table */ 1209 if (fs->w_q_lookup) 1210 kfree(fs->w_q_lookup, M_DUMMYNET); 1211 1212 if (fs->rq) 1213 kfree(fs->rq, M_DUMMYNET); 1214 1215 /* 1216 * If this fs is not part of a pipe, free it 1217 * 1218 * fs->pipe == NULL could happen, if 'fs' is a WF2Q and 1219 * - No packet belongs to that flow set is delivered by 1220 * dummynet_io(), i.e. parent pipe is not installed yet. 1221 * - Parent pipe is deleted. 1222 */ 1223 if (fs->pipe == NULL || (fs->pipe && fs != &fs->pipe->fs)) 1224 kfree(fs, M_DUMMYNET); 1225 } 1226 } 1227 1228 /* 1229 * Dispose all packets queued on a pipe (not a flow_set). 1230 * Also free all resources associated to a pipe, which is about 1231 * to be deleted. 1232 */ 1233 static void 1234 purge_pipe(struct dn_pipe *pipe) 1235 { 1236 struct dn_pkt *pkt; 1237 1238 purge_flow_set(&pipe->fs, 1); 1239 1240 while ((pkt = TAILQ_FIRST(&pipe->p_queue)) != NULL) { 1241 TAILQ_REMOVE(&pipe->p_queue, pkt, dn_next); 1242 ip_dn_packet_free(pkt); 1243 } 1244 1245 heap_free(&pipe->scheduler_heap); 1246 heap_free(&pipe->not_eligible_heap); 1247 heap_free(&pipe->idle_heap); 1248 } 1249 1250 /* 1251 * Delete all pipes and heaps returning memory. 1252 */ 1253 static void 1254 dummynet_flush(void) 1255 { 1256 struct dn_pipe_head pipe_list; 1257 struct dn_flowset_head fs_list; 1258 struct dn_pipe *p; 1259 struct dn_flow_set *fs; 1260 int i; 1261 1262 /* 1263 * Prevent future matches... 1264 */ 1265 LIST_INIT(&pipe_list); 1266 for (i = 0; i < DN_NR_HASH_MAX; ++i) { 1267 struct dn_pipe_head *pipe_hdr = &pipe_table[i]; 1268 1269 while ((p = LIST_FIRST(pipe_hdr)) != NULL) { 1270 LIST_REMOVE(p, p_link); 1271 LIST_INSERT_HEAD(&pipe_list, p, p_link); 1272 } 1273 } 1274 1275 LIST_INIT(&fs_list); 1276 for (i = 0; i < DN_NR_HASH_MAX; ++i) { 1277 struct dn_flowset_head *fs_hdr = &flowset_table[i]; 1278 1279 while ((fs = LIST_FIRST(fs_hdr)) != NULL) { 1280 LIST_REMOVE(fs, fs_link); 1281 LIST_INSERT_HEAD(&fs_list, fs, fs_link); 1282 } 1283 } 1284 1285 /* Free heaps so we don't have unwanted events */ 1286 heap_free(&ready_heap); 1287 heap_free(&wfq_ready_heap); 1288 heap_free(&extract_heap); 1289 1290 /* 1291 * Now purge all queued pkts and delete all pipes 1292 */ 1293 /* Scan and purge all flow_sets. */ 1294 while ((fs = LIST_FIRST(&fs_list)) != NULL) { 1295 LIST_REMOVE(fs, fs_link); 1296 purge_flow_set(fs, 1); 1297 } 1298 1299 while ((p = LIST_FIRST(&pipe_list)) != NULL) { 1300 LIST_REMOVE(p, p_link); 1301 purge_pipe(p); 1302 kfree(p, M_DUMMYNET); 1303 } 1304 } 1305 1306 /* 1307 * setup RED parameters 1308 */ 1309 static int 1310 config_red(const struct dn_ioc_flowset *ioc_fs, struct dn_flow_set *x) 1311 { 1312 int i; 1313 1314 x->w_q = ioc_fs->w_q; 1315 x->min_th = SCALE(ioc_fs->min_th); 1316 x->max_th = SCALE(ioc_fs->max_th); 1317 x->max_p = ioc_fs->max_p; 1318 1319 x->c_1 = ioc_fs->max_p / (ioc_fs->max_th - ioc_fs->min_th); 1320 x->c_2 = SCALE_MUL(x->c_1, SCALE(ioc_fs->min_th)); 1321 if (x->flags_fs & DN_IS_GENTLE_RED) { 1322 x->c_3 = (SCALE(1) - ioc_fs->max_p) / ioc_fs->max_th; 1323 x->c_4 = (SCALE(1) - 2 * ioc_fs->max_p); 1324 } 1325 1326 /* If the lookup table already exist, free and create it again */ 1327 if (x->w_q_lookup) { 1328 kfree(x->w_q_lookup, M_DUMMYNET); 1329 x->w_q_lookup = NULL ; 1330 } 1331 1332 if (red_lookup_depth == 0) { 1333 kprintf("net.inet.ip.dummynet.red_lookup_depth must be > 0\n"); 1334 kfree(x, M_DUMMYNET); 1335 return EINVAL; 1336 } 1337 x->lookup_depth = red_lookup_depth; 1338 x->w_q_lookup = kmalloc(x->lookup_depth * sizeof(int), 1339 M_DUMMYNET, M_WAITOK); 1340 1341 /* Fill the lookup table with (1 - w_q)^x */ 1342 x->lookup_step = ioc_fs->lookup_step; 1343 x->lookup_weight = ioc_fs->lookup_weight; 1344 1345 x->w_q_lookup[0] = SCALE(1) - x->w_q; 1346 for (i = 1; i < x->lookup_depth; i++) 1347 x->w_q_lookup[i] = SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight); 1348 1349 if (red_avg_pkt_size < 1) 1350 red_avg_pkt_size = 512; 1351 x->avg_pkt_size = red_avg_pkt_size; 1352 1353 if (red_max_pkt_size < 1) 1354 red_max_pkt_size = 1500; 1355 x->max_pkt_size = red_max_pkt_size; 1356 1357 return 0; 1358 } 1359 1360 static void 1361 alloc_hash(struct dn_flow_set *x, const struct dn_ioc_flowset *ioc_fs) 1362 { 1363 int i, alloc_size; 1364 1365 if (x->flags_fs & DN_HAVE_FLOW_MASK) { 1366 int l = ioc_fs->rq_size; 1367 1368 /* Allocate some slots */ 1369 if (l == 0) 1370 l = dn_hash_size; 1371 1372 if (l < DN_MIN_HASH_SIZE) 1373 l = DN_MIN_HASH_SIZE; 1374 else if (l > DN_MAX_HASH_SIZE) 1375 l = DN_MAX_HASH_SIZE; 1376 1377 x->rq_size = l; 1378 } else { 1379 /* One is enough for null mask */ 1380 x->rq_size = 1; 1381 } 1382 alloc_size = x->rq_size + 1; 1383 1384 x->rq = kmalloc(alloc_size * sizeof(struct dn_flowqueue_head), 1385 M_DUMMYNET, M_WAITOK | M_ZERO); 1386 x->rq_elements = 0; 1387 1388 for (i = 0; i < alloc_size; ++i) 1389 LIST_INIT(&x->rq[i]); 1390 } 1391 1392 static void 1393 set_flowid_parms(struct dn_flow_id *id, const struct dn_ioc_flowid *ioc_id) 1394 { 1395 id->fid_dst_ip = ioc_id->u.ip.dst_ip; 1396 id->fid_src_ip = ioc_id->u.ip.src_ip; 1397 id->fid_dst_port = ioc_id->u.ip.dst_port; 1398 id->fid_src_port = ioc_id->u.ip.src_port; 1399 id->fid_proto = ioc_id->u.ip.proto; 1400 id->fid_flags = ioc_id->u.ip.flags; 1401 } 1402 1403 static void 1404 set_fs_parms(struct dn_flow_set *x, const struct dn_ioc_flowset *ioc_fs) 1405 { 1406 x->flags_fs = ioc_fs->flags_fs; 1407 x->qsize = ioc_fs->qsize; 1408 x->plr = ioc_fs->plr; 1409 set_flowid_parms(&x->flow_mask, &ioc_fs->flow_mask); 1410 if (x->flags_fs & DN_QSIZE_IS_BYTES) { 1411 if (x->qsize > 1024 * 1024) 1412 x->qsize = 1024 * 1024; 1413 } else { 1414 if (x->qsize == 0 || x->qsize > 100) 1415 x->qsize = 50; 1416 } 1417 1418 /* Configuring RED */ 1419 if (x->flags_fs & DN_IS_RED) 1420 config_red(ioc_fs, x); /* XXX should check errors */ 1421 } 1422 1423 /* 1424 * setup pipe or queue parameters. 1425 */ 1426 1427 static int 1428 config_pipe(struct dn_ioc_pipe *ioc_pipe) 1429 { 1430 struct dn_ioc_flowset *ioc_fs = &ioc_pipe->fs; 1431 int error; 1432 1433 /* 1434 * The config program passes parameters as follows: 1435 * bw bits/second (0 means no limits) 1436 * delay ms (must be translated into ticks) 1437 * qsize slots or bytes 1438 */ 1439 ioc_pipe->delay = (ioc_pipe->delay * dn_hz) / 1000; 1440 1441 /* 1442 * We need either a pipe number or a flow_set number 1443 */ 1444 if (ioc_pipe->pipe_nr == 0 && ioc_fs->fs_nr == 0) 1445 return EINVAL; 1446 if (ioc_pipe->pipe_nr != 0 && ioc_fs->fs_nr != 0) 1447 return EINVAL; 1448 1449 /* 1450 * Validate pipe number 1451 */ 1452 if (ioc_pipe->pipe_nr > DN_PIPE_NR_MAX || ioc_pipe->pipe_nr < 0) 1453 return EINVAL; 1454 1455 error = EINVAL; 1456 if (ioc_pipe->pipe_nr != 0) { /* This is a pipe */ 1457 struct dn_pipe *x, *p; 1458 1459 /* Locate pipe */ 1460 p = dn_find_pipe(ioc_pipe->pipe_nr); 1461 1462 if (p == NULL) { /* New pipe */ 1463 x = kmalloc(sizeof(struct dn_pipe), M_DUMMYNET, M_WAITOK | M_ZERO); 1464 x->pipe_nr = ioc_pipe->pipe_nr; 1465 x->fs.pipe = x; 1466 TAILQ_INIT(&x->p_queue); 1467 1468 /* 1469 * idle_heap is the only one from which we extract from the middle. 1470 */ 1471 x->idle_heap.size = x->idle_heap.elements = 0; 1472 x->idle_heap.offset = __offsetof(struct dn_flow_queue, heap_pos); 1473 } else { 1474 int i; 1475 1476 x = p; 1477 1478 /* Flush accumulated credit for all queues */ 1479 for (i = 0; i <= x->fs.rq_size; i++) { 1480 struct dn_flow_queue *q; 1481 1482 LIST_FOREACH(q, &x->fs.rq[i], q_link) 1483 q->numbytes = 0; 1484 } 1485 } 1486 1487 x->bandwidth = ioc_pipe->bandwidth; 1488 x->numbytes = 0; /* Just in case... */ 1489 x->delay = ioc_pipe->delay; 1490 1491 set_fs_parms(&x->fs, ioc_fs); 1492 1493 if (x->fs.rq == NULL) { /* A new pipe */ 1494 struct dn_pipe_head *pipe_hdr; 1495 1496 alloc_hash(&x->fs, ioc_fs); 1497 1498 pipe_hdr = &pipe_table[DN_NR_HASH(x->pipe_nr)]; 1499 LIST_INSERT_HEAD(pipe_hdr, x, p_link); 1500 } 1501 } else { /* Config flow_set */ 1502 struct dn_flow_set *x, *fs; 1503 1504 /* Locate flow_set */ 1505 fs = dn_find_flowset(ioc_fs->fs_nr); 1506 1507 if (fs == NULL) { /* New flow_set */ 1508 if (ioc_fs->parent_nr == 0) /* Need link to a pipe */ 1509 goto back; 1510 1511 x = kmalloc(sizeof(struct dn_flow_set), M_DUMMYNET, 1512 M_WAITOK | M_ZERO); 1513 x->fs_nr = ioc_fs->fs_nr; 1514 x->parent_nr = ioc_fs->parent_nr; 1515 x->weight = ioc_fs->weight; 1516 if (x->weight == 0) 1517 x->weight = 1; 1518 else if (x->weight > 100) 1519 x->weight = 100; 1520 } else { 1521 /* Change parent pipe not allowed; must delete and recreate */ 1522 if (ioc_fs->parent_nr != 0 && fs->parent_nr != ioc_fs->parent_nr) 1523 goto back; 1524 x = fs; 1525 } 1526 1527 set_fs_parms(x, ioc_fs); 1528 1529 if (x->rq == NULL) { /* A new flow_set */ 1530 struct dn_flowset_head *fs_hdr; 1531 1532 alloc_hash(x, ioc_fs); 1533 1534 fs_hdr = &flowset_table[DN_NR_HASH(x->fs_nr)]; 1535 LIST_INSERT_HEAD(fs_hdr, x, fs_link); 1536 } 1537 } 1538 error = 0; 1539 1540 back: 1541 return error; 1542 } 1543 1544 /* 1545 * Helper function to remove from a heap queues which are linked to 1546 * a flow_set about to be deleted. 1547 */ 1548 static void 1549 fs_remove_from_heap(struct dn_heap *h, struct dn_flow_set *fs) 1550 { 1551 int i = 0, found = 0; 1552 1553 while (i < h->elements) { 1554 if (((struct dn_flow_queue *)h->p[i].object)->fs == fs) { 1555 h->elements--; 1556 h->p[i] = h->p[h->elements]; 1557 found++; 1558 } else { 1559 i++; 1560 } 1561 } 1562 if (found) 1563 heapify(h); 1564 } 1565 1566 /* 1567 * helper function to remove a pipe from a heap (can be there at most once) 1568 */ 1569 static void 1570 pipe_remove_from_heap(struct dn_heap *h, struct dn_pipe *p) 1571 { 1572 if (h->elements > 0) { 1573 int i; 1574 1575 for (i = 0; i < h->elements; i++) { 1576 if (h->p[i].object == p) { /* found it */ 1577 h->elements--; 1578 h->p[i] = h->p[h->elements]; 1579 heapify(h); 1580 break; 1581 } 1582 } 1583 } 1584 } 1585 1586 static void 1587 dn_unref_pipe_cb(struct dn_flow_set *fs, void *pipe0) 1588 { 1589 struct dn_pipe *pipe = pipe0; 1590 1591 if (fs->pipe == pipe) { 1592 kprintf("++ ref to pipe %d from fs %d\n", 1593 pipe->pipe_nr, fs->fs_nr); 1594 fs->pipe = NULL; 1595 purge_flow_set(fs, 0); 1596 } 1597 } 1598 1599 /* 1600 * Fully delete a pipe or a queue, cleaning up associated info. 1601 */ 1602 static int 1603 delete_pipe(const struct dn_ioc_pipe *ioc_pipe) 1604 { 1605 struct dn_pipe *p; 1606 int error; 1607 1608 if (ioc_pipe->pipe_nr == 0 && ioc_pipe->fs.fs_nr == 0) 1609 return EINVAL; 1610 if (ioc_pipe->pipe_nr != 0 && ioc_pipe->fs.fs_nr != 0) 1611 return EINVAL; 1612 1613 if (ioc_pipe->pipe_nr > DN_NR_HASH_MAX || ioc_pipe->pipe_nr < 0) 1614 return EINVAL; 1615 1616 error = EINVAL; 1617 if (ioc_pipe->pipe_nr != 0) { /* This is an old-style pipe */ 1618 /* Locate pipe */ 1619 p = dn_find_pipe(ioc_pipe->pipe_nr); 1620 if (p == NULL) 1621 goto back; /* Not found */ 1622 1623 /* Unlink from pipe hash table */ 1624 LIST_REMOVE(p, p_link); 1625 1626 /* Remove all references to this pipe from flow_sets */ 1627 dn_iterate_flowset(dn_unref_pipe_cb, p); 1628 1629 fs_remove_from_heap(&ready_heap, &p->fs); 1630 purge_pipe(p); /* Remove all data associated to this pipe */ 1631 1632 /* Remove reference to here from extract_heap and wfq_ready_heap */ 1633 pipe_remove_from_heap(&extract_heap, p); 1634 pipe_remove_from_heap(&wfq_ready_heap, p); 1635 1636 kfree(p, M_DUMMYNET); 1637 } else { /* This is a WF2Q queue (dn_flow_set) */ 1638 struct dn_flow_set *fs; 1639 1640 /* Locate flow_set */ 1641 fs = dn_find_flowset(ioc_pipe->fs.fs_nr); 1642 if (fs == NULL) 1643 goto back; /* Not found */ 1644 1645 LIST_REMOVE(fs, fs_link); 1646 1647 if ((p = fs->pipe) != NULL) { 1648 /* Update total weight on parent pipe and cleanup parent heaps */ 1649 p->sum -= fs->weight * fs->backlogged; 1650 fs_remove_from_heap(&p->not_eligible_heap, fs); 1651 fs_remove_from_heap(&p->scheduler_heap, fs); 1652 #if 1 /* XXX should i remove from idle_heap as well ? */ 1653 fs_remove_from_heap(&p->idle_heap, fs); 1654 #endif 1655 } 1656 purge_flow_set(fs, 1); 1657 } 1658 error = 0; 1659 1660 back: 1661 return error; 1662 } 1663 1664 /* 1665 * helper function used to copy data from kernel in DUMMYNET_GET 1666 */ 1667 static void 1668 dn_copy_flowid(const struct dn_flow_id *id, struct dn_ioc_flowid *ioc_id) 1669 { 1670 ioc_id->type = ETHERTYPE_IP; 1671 ioc_id->u.ip.dst_ip = id->fid_dst_ip; 1672 ioc_id->u.ip.src_ip = id->fid_src_ip; 1673 ioc_id->u.ip.dst_port = id->fid_dst_port; 1674 ioc_id->u.ip.src_port = id->fid_src_port; 1675 ioc_id->u.ip.proto = id->fid_proto; 1676 ioc_id->u.ip.flags = id->fid_flags; 1677 } 1678 1679 static void * 1680 dn_copy_flowqueues(const struct dn_flow_set *fs, void *bp) 1681 { 1682 struct dn_ioc_flowqueue *ioc_fq = bp; 1683 int i, copied = 0; 1684 1685 for (i = 0; i <= fs->rq_size; i++) { 1686 const struct dn_flow_queue *q; 1687 1688 LIST_FOREACH(q, &fs->rq[i], q_link) { 1689 if (q->hash_slot != i) { /* XXX ASSERT */ 1690 kprintf("++ at %d: wrong slot (have %d, " 1691 "should be %d)\n", copied, q->hash_slot, i); 1692 } 1693 if (q->fs != fs) { /* XXX ASSERT */ 1694 kprintf("++ at %d: wrong fs ptr (have %p, should be %p)\n", 1695 i, q->fs, fs); 1696 } 1697 1698 copied++; 1699 1700 ioc_fq->len = q->len; 1701 ioc_fq->len_bytes = q->len_bytes; 1702 ioc_fq->tot_pkts = q->tot_pkts; 1703 ioc_fq->tot_bytes = q->tot_bytes; 1704 ioc_fq->drops = q->drops; 1705 ioc_fq->hash_slot = q->hash_slot; 1706 ioc_fq->S = q->S; 1707 ioc_fq->F = q->F; 1708 dn_copy_flowid(&q->id, &ioc_fq->id); 1709 1710 ioc_fq++; 1711 } 1712 } 1713 1714 if (copied != fs->rq_elements) { /* XXX ASSERT */ 1715 kprintf("++ wrong count, have %d should be %d\n", 1716 copied, fs->rq_elements); 1717 } 1718 return ioc_fq; 1719 } 1720 1721 static void 1722 dn_copy_flowset(const struct dn_flow_set *fs, struct dn_ioc_flowset *ioc_fs, 1723 u_short fs_type) 1724 { 1725 ioc_fs->fs_type = fs_type; 1726 1727 ioc_fs->fs_nr = fs->fs_nr; 1728 ioc_fs->flags_fs = fs->flags_fs; 1729 ioc_fs->parent_nr = fs->parent_nr; 1730 1731 ioc_fs->weight = fs->weight; 1732 ioc_fs->qsize = fs->qsize; 1733 ioc_fs->plr = fs->plr; 1734 1735 ioc_fs->rq_size = fs->rq_size; 1736 ioc_fs->rq_elements = fs->rq_elements; 1737 1738 ioc_fs->w_q = fs->w_q; 1739 ioc_fs->max_th = fs->max_th; 1740 ioc_fs->min_th = fs->min_th; 1741 ioc_fs->max_p = fs->max_p; 1742 1743 dn_copy_flowid(&fs->flow_mask, &ioc_fs->flow_mask); 1744 } 1745 1746 static void 1747 dn_calc_pipe_size_cb(struct dn_pipe *pipe, void *sz) 1748 { 1749 size_t *size = sz; 1750 1751 *size += sizeof(struct dn_ioc_pipe) + 1752 pipe->fs.rq_elements * sizeof(struct dn_ioc_flowqueue); 1753 } 1754 1755 static void 1756 dn_calc_fs_size_cb(struct dn_flow_set *fs, void *sz) 1757 { 1758 size_t *size = sz; 1759 1760 *size += sizeof(struct dn_ioc_flowset) + 1761 fs->rq_elements * sizeof(struct dn_ioc_flowqueue); 1762 } 1763 1764 static void 1765 dn_copyout_pipe_cb(struct dn_pipe *pipe, void *bp0) 1766 { 1767 char **bp = bp0; 1768 struct dn_ioc_pipe *ioc_pipe = (struct dn_ioc_pipe *)(*bp); 1769 1770 /* 1771 * Copy flow set descriptor associated with this pipe 1772 */ 1773 dn_copy_flowset(&pipe->fs, &ioc_pipe->fs, DN_IS_PIPE); 1774 1775 /* 1776 * Copy pipe descriptor 1777 */ 1778 ioc_pipe->bandwidth = pipe->bandwidth; 1779 ioc_pipe->pipe_nr = pipe->pipe_nr; 1780 ioc_pipe->V = pipe->V; 1781 /* Convert delay to milliseconds */ 1782 ioc_pipe->delay = (pipe->delay * 1000) / dn_hz; 1783 1784 /* 1785 * Copy flow queue descriptors 1786 */ 1787 *bp += sizeof(*ioc_pipe); 1788 *bp = dn_copy_flowqueues(&pipe->fs, *bp); 1789 } 1790 1791 static void 1792 dn_copyout_fs_cb(struct dn_flow_set *fs, void *bp0) 1793 { 1794 char **bp = bp0; 1795 struct dn_ioc_flowset *ioc_fs = (struct dn_ioc_flowset *)(*bp); 1796 1797 /* 1798 * Copy flow set descriptor 1799 */ 1800 dn_copy_flowset(fs, ioc_fs, DN_IS_QUEUE); 1801 1802 /* 1803 * Copy flow queue descriptors 1804 */ 1805 *bp += sizeof(*ioc_fs); 1806 *bp = dn_copy_flowqueues(fs, *bp); 1807 } 1808 1809 static int 1810 dummynet_get(struct dn_sopt *dn_sopt) 1811 { 1812 char *buf, *bp; 1813 size_t size = 0; 1814 1815 /* 1816 * Compute size of data structures: list of pipes and flow_sets. 1817 */ 1818 dn_iterate_pipe(dn_calc_pipe_size_cb, &size); 1819 dn_iterate_flowset(dn_calc_fs_size_cb, &size); 1820 1821 /* 1822 * Copyout pipe/flow_set/flow_queue 1823 */ 1824 bp = buf = kmalloc(size, M_TEMP, M_WAITOK | M_ZERO); 1825 dn_iterate_pipe(dn_copyout_pipe_cb, &bp); 1826 dn_iterate_flowset(dn_copyout_fs_cb, &bp); 1827 1828 /* Temp memory will be freed by caller */ 1829 dn_sopt->dn_sopt_arg = buf; 1830 dn_sopt->dn_sopt_arglen = size; 1831 return 0; 1832 } 1833 1834 /* 1835 * Handler for the various dummynet socket options (get, flush, config, del) 1836 */ 1837 static int 1838 dummynet_ctl(struct dn_sopt *dn_sopt) 1839 { 1840 int error = 0; 1841 1842 switch (dn_sopt->dn_sopt_name) { 1843 case IP_DUMMYNET_GET: 1844 error = dummynet_get(dn_sopt); 1845 break; 1846 1847 case IP_DUMMYNET_FLUSH: 1848 dummynet_flush(); 1849 break; 1850 1851 case IP_DUMMYNET_CONFIGURE: 1852 KKASSERT(dn_sopt->dn_sopt_arglen == sizeof(struct dn_ioc_pipe)); 1853 error = config_pipe(dn_sopt->dn_sopt_arg); 1854 break; 1855 1856 case IP_DUMMYNET_DEL: /* Remove a pipe or flow_set */ 1857 KKASSERT(dn_sopt->dn_sopt_arglen == sizeof(struct dn_ioc_pipe)); 1858 error = delete_pipe(dn_sopt->dn_sopt_arg); 1859 break; 1860 1861 default: 1862 kprintf("%s -- unknown option %d\n", __func__, dn_sopt->dn_sopt_name); 1863 error = EINVAL; 1864 break; 1865 } 1866 return error; 1867 } 1868 1869 static void 1870 dummynet_clock(systimer_t info __unused, int in_ipi __unused, 1871 struct intrframe *frame __unused) 1872 { 1873 KASSERT(mycpuid == ip_dn_cpu, 1874 ("dummynet systimer comes on cpu%d, should be %d!", 1875 mycpuid, ip_dn_cpu)); 1876 1877 crit_enter(); 1878 if (DUMMYNET_LOADED && (dn_netmsg.lmsg.ms_flags & MSGF_DONE)) 1879 lwkt_sendmsg(netisr_portfn(mycpuid), &dn_netmsg.lmsg); 1880 crit_exit(); 1881 } 1882 1883 static int 1884 sysctl_dn_hz(SYSCTL_HANDLER_ARGS) 1885 { 1886 int error, val; 1887 1888 val = dn_hz; 1889 error = sysctl_handle_int(oidp, &val, 0, req); 1890 if (error || req->newptr == NULL) 1891 return error; 1892 if (val <= 0) 1893 return EINVAL; 1894 else if (val > DN_CALLOUT_FREQ_MAX) 1895 val = DN_CALLOUT_FREQ_MAX; 1896 1897 crit_enter(); 1898 dn_hz = val; 1899 systimer_adjust_periodic(&dn_clock, val); 1900 crit_exit(); 1901 1902 return 0; 1903 } 1904 1905 static void 1906 ip_dn_init_dispatch(netmsg_t msg) 1907 { 1908 int i, error = 0; 1909 1910 KASSERT(mycpuid == ip_dn_cpu, 1911 ("%s runs on cpu%d, instead of cpu%d", __func__, 1912 mycpuid, ip_dn_cpu)); 1913 1914 crit_enter(); 1915 1916 if (DUMMYNET_LOADED) { 1917 kprintf("DUMMYNET already loaded\n"); 1918 error = EEXIST; 1919 goto back; 1920 } 1921 1922 kprintf("DUMMYNET initialized (011031)\n"); 1923 1924 for (i = 0; i < DN_NR_HASH_MAX; ++i) 1925 LIST_INIT(&pipe_table[i]); 1926 1927 for (i = 0; i < DN_NR_HASH_MAX; ++i) 1928 LIST_INIT(&flowset_table[i]); 1929 1930 ready_heap.size = ready_heap.elements = 0; 1931 ready_heap.offset = 0; 1932 1933 wfq_ready_heap.size = wfq_ready_heap.elements = 0; 1934 wfq_ready_heap.offset = 0; 1935 1936 extract_heap.size = extract_heap.elements = 0; 1937 extract_heap.offset = 0; 1938 1939 ip_dn_ctl_ptr = dummynet_ctl; 1940 ip_dn_io_ptr = dummynet_io; 1941 1942 netmsg_init(&dn_netmsg, NULL, &netisr_adone_rport, 1943 0, dummynet); 1944 systimer_init_periodic_nq(&dn_clock, dummynet_clock, NULL, dn_hz); 1945 1946 back: 1947 crit_exit(); 1948 lwkt_replymsg(&msg->lmsg, error); 1949 } 1950 1951 static int 1952 ip_dn_init(void) 1953 { 1954 struct netmsg_base smsg; 1955 1956 if (ip_dn_cpu >= ncpus) { 1957 kprintf("%s: CPU%d does not exist, switch to CPU0\n", 1958 __func__, ip_dn_cpu); 1959 ip_dn_cpu = 0; 1960 } 1961 1962 netmsg_init(&smsg, NULL, &curthread->td_msgport, 1963 0, ip_dn_init_dispatch); 1964 lwkt_domsg(netisr_portfn(ip_dn_cpu), &smsg.lmsg, 0); 1965 return smsg.lmsg.ms_error; 1966 } 1967 1968 #ifdef KLD_MODULE 1969 1970 static void 1971 ip_dn_stop_dispatch(netmsg_t msg) 1972 { 1973 crit_enter(); 1974 1975 dummynet_flush(); 1976 1977 ip_dn_ctl_ptr = NULL; 1978 ip_dn_io_ptr = NULL; 1979 1980 systimer_del(&dn_clock); 1981 1982 crit_exit(); 1983 lwkt_replymsg(&msg->lmsg, 0); 1984 } 1985 1986 1987 static void 1988 ip_dn_stop(void) 1989 { 1990 struct netmsg_base smsg; 1991 1992 netmsg_init(&smsg, NULL, &curthread->td_msgport, 1993 0, ip_dn_stop_dispatch); 1994 lwkt_domsg(netisr_portfn(ip_dn_cpu), &smsg.lmsg, 0); 1995 1996 netmsg_service_sync(); 1997 } 1998 1999 #endif /* KLD_MODULE */ 2000 2001 static int 2002 dummynet_modevent(module_t mod, int type, void *data) 2003 { 2004 switch (type) { 2005 case MOD_LOAD: 2006 return ip_dn_init(); 2007 2008 case MOD_UNLOAD: 2009 #ifndef KLD_MODULE 2010 kprintf("dummynet statically compiled, cannot unload\n"); 2011 return EINVAL; 2012 #else 2013 ip_dn_stop(); 2014 #endif 2015 break; 2016 2017 default: 2018 break; 2019 } 2020 return 0; 2021 } 2022 2023 static moduledata_t dummynet_mod = { 2024 "dummynet", 2025 dummynet_modevent, 2026 NULL 2027 }; 2028 DECLARE_MODULE(dummynet, dummynet_mod, SI_SUB_PROTO_END, SI_ORDER_ANY); 2029 MODULE_VERSION(dummynet, 1); 2030