1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2004-2010 University of Zagreb
5 * Copyright (c) 2007-2008 FreeBSD Foundation
6 *
7 * This software was developed by the University of Zagreb and the
8 * FreeBSD Foundation under sponsorship by the Stichting NLnet and the
9 * FreeBSD Foundation.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * This node permits simple traffic shaping by emulating bandwidth
35 * and delay, as well as random packet losses.
36 * The node has two hooks, upper and lower. Traffic flowing from upper to
37 * lower hook is referenced as downstream, and vice versa. Parameters for
38 * both directions can be set separately, except for delay.
39 */
40
41 #include <sys/param.h>
42 #include <sys/errno.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/prng.h>
48 #include <sys/time.h>
49
50 #include <vm/uma.h>
51
52 #include <net/vnet.h>
53
54 #include <netinet/in.h>
55 #include <netinet/in_systm.h>
56 #include <netinet/ip.h>
57
58 #include <netgraph/ng_message.h>
59 #include <netgraph/netgraph.h>
60 #include <netgraph/ng_parse.h>
61 #include <netgraph/ng_pipe.h>
62
63 static MALLOC_DEFINE(M_NG_PIPE, "ng_pipe", "ng_pipe");
64
65 /* Packet header struct */
66 struct ngp_hdr {
67 TAILQ_ENTRY(ngp_hdr) ngp_link; /* next pkt in queue */
68 struct timeval when; /* this packet's due time */
69 struct mbuf *m; /* ptr to the packet data */
70 };
71 TAILQ_HEAD(p_head, ngp_hdr);
72
73 /* FIFO queue struct */
74 struct ngp_fifo {
75 TAILQ_ENTRY(ngp_fifo) fifo_le; /* list of active queues only */
76 struct p_head packet_head; /* FIFO queue head */
77 u_int32_t hash; /* flow signature */
78 struct timeval vtime; /* virtual time, for WFQ */
79 u_int32_t rr_deficit; /* for DRR */
80 u_int32_t packets; /* # of packets in this queue */
81 };
82
83 /* Per hook info */
84 struct hookinfo {
85 hook_p hook;
86 int noqueue; /* bypass any processing */
87 TAILQ_HEAD(, ngp_fifo) fifo_head; /* FIFO queues */
88 TAILQ_HEAD(, ngp_hdr) qout_head; /* delay queue head */
89 struct timeval qin_utime;
90 struct ng_pipe_hookcfg cfg;
91 struct ng_pipe_hookrun run;
92 struct ng_pipe_hookstat stats;
93 uint64_t *ber_p; /* loss_p(BER,psize) map */
94 };
95
96 /* Per node info */
97 struct node_priv {
98 u_int64_t delay;
99 u_int32_t overhead;
100 u_int32_t header_offset;
101 struct hookinfo lower;
102 struct hookinfo upper;
103 struct callout timer;
104 int timer_scheduled;
105 };
106 typedef struct node_priv *priv_p;
107
108 /* Macro for calculating the virtual time for packet dequeueing in WFQ */
109 #define FIFO_VTIME_SORT(plen) \
110 if (hinfo->cfg.wfq && hinfo->cfg.bandwidth) { \
111 ngp_f->vtime.tv_usec = now->tv_usec + ((uint64_t) (plen) \
112 + priv->overhead ) * hinfo->run.fifo_queues * \
113 8000000 / hinfo->cfg.bandwidth; \
114 ngp_f->vtime.tv_sec = now->tv_sec + \
115 ngp_f->vtime.tv_usec / 1000000; \
116 ngp_f->vtime.tv_usec = ngp_f->vtime.tv_usec % 1000000; \
117 TAILQ_FOREACH(ngp_f1, &hinfo->fifo_head, fifo_le) \
118 if (ngp_f1->vtime.tv_sec > ngp_f->vtime.tv_sec || \
119 (ngp_f1->vtime.tv_sec == ngp_f->vtime.tv_sec && \
120 ngp_f1->vtime.tv_usec > ngp_f->vtime.tv_usec)) \
121 break; \
122 if (ngp_f1 == NULL) \
123 TAILQ_INSERT_TAIL(&hinfo->fifo_head, ngp_f, fifo_le); \
124 else \
125 TAILQ_INSERT_BEFORE(ngp_f1, ngp_f, fifo_le); \
126 } else \
127 TAILQ_INSERT_TAIL(&hinfo->fifo_head, ngp_f, fifo_le); \
128
129 static void parse_cfg(struct ng_pipe_hookcfg *, struct ng_pipe_hookcfg *,
130 struct hookinfo *, priv_p);
131 static void pipe_dequeue(struct hookinfo *, struct timeval *);
132 static void ngp_callout(node_p, hook_p, void *, int);
133 static int ngp_modevent(module_t, int, void *);
134
135 /* zone for storing ngp_hdr-s */
136 static uma_zone_t ngp_zone;
137
138 /* Netgraph methods */
139 static ng_constructor_t ngp_constructor;
140 static ng_rcvmsg_t ngp_rcvmsg;
141 static ng_shutdown_t ngp_shutdown;
142 static ng_newhook_t ngp_newhook;
143 static ng_rcvdata_t ngp_rcvdata;
144 static ng_disconnect_t ngp_disconnect;
145
146 /* Parse type for struct ng_pipe_hookstat */
147 static const struct ng_parse_struct_field
148 ng_pipe_hookstat_type_fields[] = NG_PIPE_HOOKSTAT_INFO;
149 static const struct ng_parse_type ng_pipe_hookstat_type = {
150 &ng_parse_struct_type,
151 &ng_pipe_hookstat_type_fields
152 };
153
154 /* Parse type for struct ng_pipe_stats */
155 static const struct ng_parse_struct_field ng_pipe_stats_type_fields[] =
156 NG_PIPE_STATS_INFO(&ng_pipe_hookstat_type);
157 static const struct ng_parse_type ng_pipe_stats_type = {
158 &ng_parse_struct_type,
159 &ng_pipe_stats_type_fields
160 };
161
162 /* Parse type for struct ng_pipe_hookrun */
163 static const struct ng_parse_struct_field
164 ng_pipe_hookrun_type_fields[] = NG_PIPE_HOOKRUN_INFO;
165 static const struct ng_parse_type ng_pipe_hookrun_type = {
166 &ng_parse_struct_type,
167 &ng_pipe_hookrun_type_fields
168 };
169
170 /* Parse type for struct ng_pipe_run */
171 static const struct ng_parse_struct_field
172 ng_pipe_run_type_fields[] = NG_PIPE_RUN_INFO(&ng_pipe_hookrun_type);
173 static const struct ng_parse_type ng_pipe_run_type = {
174 &ng_parse_struct_type,
175 &ng_pipe_run_type_fields
176 };
177
178 /* Parse type for struct ng_pipe_hookcfg */
179 static const struct ng_parse_struct_field
180 ng_pipe_hookcfg_type_fields[] = NG_PIPE_HOOKCFG_INFO;
181 static const struct ng_parse_type ng_pipe_hookcfg_type = {
182 &ng_parse_struct_type,
183 &ng_pipe_hookcfg_type_fields
184 };
185
186 /* Parse type for struct ng_pipe_cfg */
187 static const struct ng_parse_struct_field
188 ng_pipe_cfg_type_fields[] = NG_PIPE_CFG_INFO(&ng_pipe_hookcfg_type);
189 static const struct ng_parse_type ng_pipe_cfg_type = {
190 &ng_parse_struct_type,
191 &ng_pipe_cfg_type_fields
192 };
193
194 /* List of commands and how to convert arguments to/from ASCII */
195 static const struct ng_cmdlist ngp_cmds[] = {
196 {
197 .cookie = NGM_PIPE_COOKIE,
198 .cmd = NGM_PIPE_GET_STATS,
199 .name = "getstats",
200 .respType = &ng_pipe_stats_type
201 },
202 {
203 .cookie = NGM_PIPE_COOKIE,
204 .cmd = NGM_PIPE_CLR_STATS,
205 .name = "clrstats"
206 },
207 {
208 .cookie = NGM_PIPE_COOKIE,
209 .cmd = NGM_PIPE_GETCLR_STATS,
210 .name = "getclrstats",
211 .respType = &ng_pipe_stats_type
212 },
213 {
214 .cookie = NGM_PIPE_COOKIE,
215 .cmd = NGM_PIPE_GET_RUN,
216 .name = "getrun",
217 .respType = &ng_pipe_run_type
218 },
219 {
220 .cookie = NGM_PIPE_COOKIE,
221 .cmd = NGM_PIPE_GET_CFG,
222 .name = "getcfg",
223 .respType = &ng_pipe_cfg_type
224 },
225 {
226 .cookie = NGM_PIPE_COOKIE,
227 .cmd = NGM_PIPE_SET_CFG,
228 .name = "setcfg",
229 .mesgType = &ng_pipe_cfg_type,
230 },
231 { 0 }
232 };
233
234 /* Netgraph type descriptor */
235 static struct ng_type ng_pipe_typestruct = {
236 .version = NG_ABI_VERSION,
237 .name = NG_PIPE_NODE_TYPE,
238 .mod_event = ngp_modevent,
239 .constructor = ngp_constructor,
240 .shutdown = ngp_shutdown,
241 .rcvmsg = ngp_rcvmsg,
242 .newhook = ngp_newhook,
243 .rcvdata = ngp_rcvdata,
244 .disconnect = ngp_disconnect,
245 .cmdlist = ngp_cmds
246 };
247 NETGRAPH_INIT(pipe, &ng_pipe_typestruct);
248
249 /* Node constructor */
250 static int
ngp_constructor(node_p node)251 ngp_constructor(node_p node)
252 {
253 priv_p priv;
254
255 priv = malloc(sizeof(*priv), M_NG_PIPE, M_ZERO | M_WAITOK);
256 NG_NODE_SET_PRIVATE(node, priv);
257
258 /* Mark node as single-threaded */
259 NG_NODE_FORCE_WRITER(node);
260
261 ng_callout_init(&priv->timer);
262
263 return (0);
264 }
265
266 /* Add a hook */
267 static int
ngp_newhook(node_p node,hook_p hook,const char * name)268 ngp_newhook(node_p node, hook_p hook, const char *name)
269 {
270 const priv_p priv = NG_NODE_PRIVATE(node);
271 struct hookinfo *hinfo;
272
273 if (strcmp(name, NG_PIPE_HOOK_UPPER) == 0) {
274 bzero(&priv->upper, sizeof(priv->upper));
275 priv->upper.hook = hook;
276 NG_HOOK_SET_PRIVATE(hook, &priv->upper);
277 } else if (strcmp(name, NG_PIPE_HOOK_LOWER) == 0) {
278 bzero(&priv->lower, sizeof(priv->lower));
279 priv->lower.hook = hook;
280 NG_HOOK_SET_PRIVATE(hook, &priv->lower);
281 } else
282 return (EINVAL);
283
284 /* Load non-zero initial cfg values */
285 hinfo = NG_HOOK_PRIVATE(hook);
286 hinfo->cfg.qin_size_limit = 50;
287 hinfo->cfg.fifo = 1;
288 hinfo->cfg.droptail = 1;
289 TAILQ_INIT(&hinfo->fifo_head);
290 TAILQ_INIT(&hinfo->qout_head);
291 return (0);
292 }
293
294 /* Receive a control message */
295 static int
ngp_rcvmsg(node_p node,item_p item,hook_p lasthook)296 ngp_rcvmsg(node_p node, item_p item, hook_p lasthook)
297 {
298 const priv_p priv = NG_NODE_PRIVATE(node);
299 struct ng_mesg *resp = NULL;
300 struct ng_mesg *msg, *flow_msg;
301 struct ng_pipe_stats *stats;
302 struct ng_pipe_run *run;
303 struct ng_pipe_cfg *cfg;
304 int error = 0;
305 int prev_down, now_down, cmd;
306
307 NGI_GET_MSG(item, msg);
308 switch (msg->header.typecookie) {
309 case NGM_PIPE_COOKIE:
310 switch (msg->header.cmd) {
311 case NGM_PIPE_GET_STATS:
312 case NGM_PIPE_CLR_STATS:
313 case NGM_PIPE_GETCLR_STATS:
314 if (msg->header.cmd != NGM_PIPE_CLR_STATS) {
315 NG_MKRESPONSE(resp, msg,
316 sizeof(*stats), M_NOWAIT);
317 if (resp == NULL) {
318 error = ENOMEM;
319 break;
320 }
321 stats = (struct ng_pipe_stats *) resp->data;
322 bcopy(&priv->upper.stats, &stats->downstream,
323 sizeof(stats->downstream));
324 bcopy(&priv->lower.stats, &stats->upstream,
325 sizeof(stats->upstream));
326 }
327 if (msg->header.cmd != NGM_PIPE_GET_STATS) {
328 bzero(&priv->upper.stats,
329 sizeof(priv->upper.stats));
330 bzero(&priv->lower.stats,
331 sizeof(priv->lower.stats));
332 }
333 break;
334 case NGM_PIPE_GET_RUN:
335 NG_MKRESPONSE(resp, msg, sizeof(*run), M_NOWAIT);
336 if (resp == NULL) {
337 error = ENOMEM;
338 break;
339 }
340 run = (struct ng_pipe_run *) resp->data;
341 bcopy(&priv->upper.run, &run->downstream,
342 sizeof(run->downstream));
343 bcopy(&priv->lower.run, &run->upstream,
344 sizeof(run->upstream));
345 break;
346 case NGM_PIPE_GET_CFG:
347 NG_MKRESPONSE(resp, msg, sizeof(*cfg), M_NOWAIT);
348 if (resp == NULL) {
349 error = ENOMEM;
350 break;
351 }
352 cfg = (struct ng_pipe_cfg *) resp->data;
353 bcopy(&priv->upper.cfg, &cfg->downstream,
354 sizeof(cfg->downstream));
355 bcopy(&priv->lower.cfg, &cfg->upstream,
356 sizeof(cfg->upstream));
357 cfg->delay = priv->delay;
358 cfg->overhead = priv->overhead;
359 cfg->header_offset = priv->header_offset;
360 if (cfg->upstream.bandwidth ==
361 cfg->downstream.bandwidth) {
362 cfg->bandwidth = cfg->upstream.bandwidth;
363 cfg->upstream.bandwidth = 0;
364 cfg->downstream.bandwidth = 0;
365 } else
366 cfg->bandwidth = 0;
367 break;
368 case NGM_PIPE_SET_CFG:
369 cfg = (struct ng_pipe_cfg *) msg->data;
370 if (msg->header.arglen != sizeof(*cfg)) {
371 error = EINVAL;
372 break;
373 }
374
375 if (cfg->delay == -1)
376 priv->delay = 0;
377 else if (cfg->delay > 0 && cfg->delay < 10000000)
378 priv->delay = cfg->delay;
379
380 if (cfg->bandwidth == -1) {
381 priv->upper.cfg.bandwidth = 0;
382 priv->lower.cfg.bandwidth = 0;
383 priv->overhead = 0;
384 } else if (cfg->bandwidth >= 100 &&
385 cfg->bandwidth <= 1000000000) {
386 priv->upper.cfg.bandwidth = cfg->bandwidth;
387 priv->lower.cfg.bandwidth = cfg->bandwidth;
388 if (cfg->bandwidth >= 10000000)
389 priv->overhead = 8+4+12; /* Ethernet */
390 else
391 priv->overhead = 10; /* HDLC */
392 }
393
394 if (cfg->overhead == -1)
395 priv->overhead = 0;
396 else if (cfg->overhead > 0 &&
397 cfg->overhead < MAX_OHSIZE)
398 priv->overhead = cfg->overhead;
399
400 if (cfg->header_offset == -1)
401 priv->header_offset = 0;
402 else if (cfg->header_offset > 0 &&
403 cfg->header_offset < 64)
404 priv->header_offset = cfg->header_offset;
405
406 prev_down = priv->upper.cfg.ber == 1 ||
407 priv->lower.cfg.ber == 1;
408 parse_cfg(&priv->upper.cfg, &cfg->downstream,
409 &priv->upper, priv);
410 parse_cfg(&priv->lower.cfg, &cfg->upstream,
411 &priv->lower, priv);
412 now_down = priv->upper.cfg.ber == 1 ||
413 priv->lower.cfg.ber == 1;
414
415 if (prev_down != now_down) {
416 if (now_down)
417 cmd = NGM_LINK_IS_DOWN;
418 else
419 cmd = NGM_LINK_IS_UP;
420
421 if (priv->lower.hook != NULL) {
422 NG_MKMESSAGE(flow_msg, NGM_FLOW_COOKIE,
423 cmd, 0, M_NOWAIT);
424 if (flow_msg != NULL)
425 NG_SEND_MSG_HOOK(error, node,
426 flow_msg, priv->lower.hook,
427 0);
428 }
429 if (priv->upper.hook != NULL) {
430 NG_MKMESSAGE(flow_msg, NGM_FLOW_COOKIE,
431 cmd, 0, M_NOWAIT);
432 if (flow_msg != NULL)
433 NG_SEND_MSG_HOOK(error, node,
434 flow_msg, priv->upper.hook,
435 0);
436 }
437 }
438 break;
439 default:
440 error = EINVAL;
441 break;
442 }
443 break;
444 default:
445 error = EINVAL;
446 break;
447 }
448 NG_RESPOND_MSG(error, node, item, resp);
449 NG_FREE_MSG(msg);
450
451 return (error);
452 }
453
454 static void
parse_cfg(struct ng_pipe_hookcfg * current,struct ng_pipe_hookcfg * new,struct hookinfo * hinfo,priv_p priv)455 parse_cfg(struct ng_pipe_hookcfg *current, struct ng_pipe_hookcfg *new,
456 struct hookinfo *hinfo, priv_p priv)
457 {
458
459 if (new->ber == -1) {
460 current->ber = 0;
461 if (hinfo->ber_p) {
462 free(hinfo->ber_p, M_NG_PIPE);
463 hinfo->ber_p = NULL;
464 }
465 } else if (new->ber >= 1 && new->ber <= 1000000000000) {
466 static const uint64_t one = 0x1000000000000; /* = 2^48 */
467 uint64_t p0, p;
468 uint32_t fsize, i;
469
470 if (hinfo->ber_p == NULL)
471 hinfo->ber_p =
472 malloc((MAX_FSIZE + MAX_OHSIZE) * sizeof(uint64_t),
473 M_NG_PIPE, M_WAITOK);
474 current->ber = new->ber;
475
476 /*
477 * For given BER and each frame size N (in bytes) calculate
478 * the probability P_OK that the frame is clean:
479 *
480 * P_OK(BER,N) = (1 - 1/BER)^(N*8)
481 *
482 * We use a 64-bit fixed-point format with decimal point
483 * positioned between bits 47 and 48.
484 */
485 p0 = one - one / new->ber;
486 p = one;
487 for (fsize = 0; fsize < MAX_FSIZE + MAX_OHSIZE; fsize++) {
488 hinfo->ber_p[fsize] = p;
489 for (i = 0; i < 8; i++)
490 p = (p * (p0 & 0xffff) >> 48) +
491 (p * ((p0 >> 16) & 0xffff) >> 32) +
492 (p * (p0 >> 32) >> 16);
493 }
494 }
495
496 if (new->qin_size_limit == -1)
497 current->qin_size_limit = 0;
498 else if (new->qin_size_limit >= 5)
499 current->qin_size_limit = new->qin_size_limit;
500
501 if (new->qout_size_limit == -1)
502 current->qout_size_limit = 0;
503 else if (new->qout_size_limit >= 5)
504 current->qout_size_limit = new->qout_size_limit;
505
506 if (new->duplicate == -1)
507 current->duplicate = 0;
508 else if (new->duplicate > 0 && new->duplicate <= 50)
509 current->duplicate = new->duplicate;
510
511 if (new->fifo) {
512 current->fifo = 1;
513 current->wfq = 0;
514 current->drr = 0;
515 }
516
517 if (new->wfq) {
518 current->fifo = 0;
519 current->wfq = 1;
520 current->drr = 0;
521 }
522
523 if (new->drr) {
524 current->fifo = 0;
525 current->wfq = 0;
526 /* DRR quantum */
527 if (new->drr >= 32)
528 current->drr = new->drr;
529 else
530 current->drr = 2048; /* default quantum */
531 }
532
533 if (new->droptail) {
534 current->droptail = 1;
535 current->drophead = 0;
536 }
537
538 if (new->drophead) {
539 current->droptail = 0;
540 current->drophead = 1;
541 }
542
543 if (new->bandwidth == -1) {
544 current->bandwidth = 0;
545 current->fifo = 1;
546 current->wfq = 0;
547 current->drr = 0;
548 } else if (new->bandwidth >= 100 && new->bandwidth <= 1000000000)
549 current->bandwidth = new->bandwidth;
550
551 if (current->bandwidth | priv->delay |
552 current->duplicate | current->ber)
553 hinfo->noqueue = 0;
554 else
555 hinfo->noqueue = 1;
556 }
557
558 /*
559 * Compute a hash signature for a packet. This function suffers from the
560 * NIH sindrome, so probably it would be wise to look around what other
561 * folks have found out to be a good and efficient IP hash function...
562 */
563 static int
ip_hash(struct mbuf * m,int offset)564 ip_hash(struct mbuf *m, int offset)
565 {
566 u_int64_t i;
567 struct ip *ip = (struct ip *)(mtod(m, u_char *) + offset);
568
569 if (m->m_len < sizeof(struct ip) + offset ||
570 ip->ip_v != 4 || ip->ip_hl << 2 != sizeof(struct ip))
571 return 0;
572
573 i = ((u_int64_t) ip->ip_src.s_addr ^
574 ((u_int64_t) ip->ip_src.s_addr << 13) ^
575 ((u_int64_t) ip->ip_dst.s_addr << 7) ^
576 ((u_int64_t) ip->ip_dst.s_addr << 19));
577 return (i ^ (i >> 32));
578 }
579
580 /*
581 * Receive data on a hook - both in upstream and downstream direction.
582 * We put the frame on the inbound queue, and try to initiate dequeuing
583 * sequence immediately. If inbound queue is full, discard one frame
584 * depending on dropping policy (from the head or from the tail of the
585 * queue).
586 */
587 static int
ngp_rcvdata(hook_p hook,item_p item)588 ngp_rcvdata(hook_p hook, item_p item)
589 {
590 struct hookinfo *const hinfo = NG_HOOK_PRIVATE(hook);
591 const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook));
592 struct timeval uuptime;
593 struct timeval *now = &uuptime;
594 struct ngp_fifo *ngp_f = NULL, *ngp_f1;
595 struct ngp_hdr *ngp_h = NULL;
596 struct mbuf *m;
597 int hash, plen;
598 int error = 0;
599
600 /*
601 * Shortcut from inbound to outbound hook when neither of
602 * bandwidth, delay, BER or duplication probability is
603 * configured, nor we have queued frames to drain.
604 */
605 if (hinfo->run.qin_frames == 0 && hinfo->run.qout_frames == 0 &&
606 hinfo->noqueue) {
607 struct hookinfo *dest;
608 if (hinfo == &priv->lower)
609 dest = &priv->upper;
610 else
611 dest = &priv->lower;
612
613 /* Send the frame. */
614 plen = NGI_M(item)->m_pkthdr.len;
615 NG_FWD_ITEM_HOOK(error, item, dest->hook);
616
617 /* Update stats. */
618 if (error) {
619 hinfo->stats.out_disc_frames++;
620 hinfo->stats.out_disc_octets += plen;
621 } else {
622 hinfo->stats.fwd_frames++;
623 hinfo->stats.fwd_octets += plen;
624 }
625
626 return (error);
627 }
628
629 microuptime(now);
630
631 /*
632 * If this was an empty queue, update service deadline time.
633 */
634 if (hinfo->run.qin_frames == 0) {
635 struct timeval *when = &hinfo->qin_utime;
636 if (when->tv_sec < now->tv_sec || (when->tv_sec == now->tv_sec
637 && when->tv_usec < now->tv_usec)) {
638 when->tv_sec = now->tv_sec;
639 when->tv_usec = now->tv_usec;
640 }
641 }
642
643 /* Populate the packet header */
644 NGI_GET_M(item, m);
645 NG_FREE_ITEM(item);
646 ngp_h = uma_zalloc(ngp_zone, M_NOWAIT);
647 if (ngp_h == NULL) {
648 NG_FREE_M(m);
649 return (ENOMEM);
650 }
651 ngp_h->m = m;
652
653 if (hinfo->cfg.fifo)
654 hash = 0; /* all packets go into a single FIFO queue */
655 else
656 hash = ip_hash(m, priv->header_offset);
657
658 /* Find the appropriate FIFO queue for the packet and enqueue it*/
659 TAILQ_FOREACH(ngp_f, &hinfo->fifo_head, fifo_le)
660 if (hash == ngp_f->hash)
661 break;
662 if (ngp_f == NULL) {
663 ngp_f = uma_zalloc(ngp_zone, M_NOWAIT);
664 if (ngp_f == NULL) {
665 NG_FREE_M(ngp_h->m);
666 uma_zfree(ngp_zone, ngp_h);
667 return (ENOMEM);
668 }
669 TAILQ_INIT(&ngp_f->packet_head);
670 ngp_f->hash = hash;
671 ngp_f->packets = 1;
672 ngp_f->rr_deficit = hinfo->cfg.drr; /* DRR quantum */
673 hinfo->run.fifo_queues++;
674 TAILQ_INSERT_TAIL(&ngp_f->packet_head, ngp_h, ngp_link);
675 FIFO_VTIME_SORT(m->m_pkthdr.len);
676 } else {
677 TAILQ_INSERT_TAIL(&ngp_f->packet_head, ngp_h, ngp_link);
678 ngp_f->packets++;
679 }
680 hinfo->run.qin_frames++;
681 hinfo->run.qin_octets += m->m_pkthdr.len;
682
683 /* Discard a frame if inbound queue limit has been reached */
684 if (hinfo->run.qin_frames > hinfo->cfg.qin_size_limit) {
685 struct mbuf *m1;
686 int longest = 0;
687
688 /* Find the longest queue */
689 TAILQ_FOREACH(ngp_f1, &hinfo->fifo_head, fifo_le)
690 if (ngp_f1->packets > longest) {
691 longest = ngp_f1->packets;
692 ngp_f = ngp_f1;
693 }
694
695 /* Drop a frame from the queue head/tail, depending on cfg */
696 if (hinfo->cfg.drophead)
697 ngp_h = TAILQ_FIRST(&ngp_f->packet_head);
698 else
699 ngp_h = TAILQ_LAST(&ngp_f->packet_head, p_head);
700 TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
701 m1 = ngp_h->m;
702 uma_zfree(ngp_zone, ngp_h);
703 hinfo->run.qin_octets -= m1->m_pkthdr.len;
704 hinfo->stats.in_disc_octets += m1->m_pkthdr.len;
705 m_freem(m1);
706 if (--(ngp_f->packets) == 0) {
707 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
708 uma_zfree(ngp_zone, ngp_f);
709 hinfo->run.fifo_queues--;
710 }
711 hinfo->run.qin_frames--;
712 hinfo->stats.in_disc_frames++;
713 }
714
715 /*
716 * Try to start the dequeuing process immediately.
717 */
718 pipe_dequeue(hinfo, now);
719
720 return (0);
721 }
722
723 /*
724 * Dequeueing sequence - we basically do the following:
725 * 1) Try to extract the frame from the inbound (bandwidth) queue;
726 * 2) In accordance to BER specified, discard the frame randomly;
727 * 3) If the frame survives BER, prepend it with delay info and move it
728 * to outbound (delay) queue;
729 * 4) Loop to 2) until bandwidth quota for this timeslice is reached, or
730 * inbound queue is flushed completely;
731 * 5) Dequeue frames from the outbound queue and send them downstream until
732 * outbound queue is flushed completely, or the next frame in the queue
733 * is not due to be dequeued yet
734 */
735 static void
pipe_dequeue(struct hookinfo * hinfo,struct timeval * now)736 pipe_dequeue(struct hookinfo *hinfo, struct timeval *now) {
737 static uint64_t rand, oldrand;
738 const node_p node = NG_HOOK_NODE(hinfo->hook);
739 const priv_p priv = NG_NODE_PRIVATE(node);
740 struct hookinfo *dest;
741 struct ngp_fifo *ngp_f, *ngp_f1;
742 struct ngp_hdr *ngp_h, *ngp_dup;
743 struct timeval *when;
744 struct mbuf *m;
745 int plen, error = 0;
746
747 /* Which one is the destination hook? */
748 if (hinfo == &priv->lower)
749 dest = &priv->upper;
750 else
751 dest = &priv->lower;
752
753 /* Bandwidth queue processing */
754 while ((ngp_f = TAILQ_FIRST(&hinfo->fifo_head))) {
755 when = &hinfo->qin_utime;
756 if (when->tv_sec > now->tv_sec || (when->tv_sec == now->tv_sec
757 && when->tv_usec > now->tv_usec))
758 break;
759
760 ngp_h = TAILQ_FIRST(&ngp_f->packet_head);
761 m = ngp_h->m;
762
763 /* Deficit Round Robin (DRR) processing */
764 if (hinfo->cfg.drr) {
765 if (ngp_f->rr_deficit >= m->m_pkthdr.len) {
766 ngp_f->rr_deficit -= m->m_pkthdr.len;
767 } else {
768 ngp_f->rr_deficit += hinfo->cfg.drr;
769 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
770 TAILQ_INSERT_TAIL(&hinfo->fifo_head,
771 ngp_f, fifo_le);
772 continue;
773 }
774 }
775
776 /*
777 * Either create a duplicate and pass it on, or
778 * dequeue the original packet. When any of the
779 * memory allocations for the duplicate package fail,
780 * simply do not duplicate it at all.
781 */
782 ngp_dup = NULL;
783 if (hinfo->cfg.duplicate &&
784 prng32_bounded(100) <= hinfo->cfg.duplicate) {
785 ngp_dup = uma_zalloc(ngp_zone, M_NOWAIT);
786 if (ngp_dup != NULL) {
787 ngp_dup->m = m_dup(m, M_NOWAIT);
788 if (ngp_dup->m == NULL) {
789 uma_zfree(ngp_zone, ngp_dup);
790 ngp_dup = NULL;
791 }
792 }
793 }
794
795 if (ngp_dup != NULL) {
796 ngp_h = ngp_dup;
797 m = ngp_h->m;
798 } else {
799 TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
800 hinfo->run.qin_frames--;
801 hinfo->run.qin_octets -= m->m_pkthdr.len;
802 ngp_f->packets--;
803 }
804
805 /* Calculate the serialization delay */
806 if (hinfo->cfg.bandwidth) {
807 hinfo->qin_utime.tv_usec +=
808 ((uint64_t) m->m_pkthdr.len + priv->overhead ) *
809 8000000 / hinfo->cfg.bandwidth;
810 hinfo->qin_utime.tv_sec +=
811 hinfo->qin_utime.tv_usec / 1000000;
812 hinfo->qin_utime.tv_usec =
813 hinfo->qin_utime.tv_usec % 1000000;
814 }
815 when = &ngp_h->when;
816 when->tv_sec = hinfo->qin_utime.tv_sec;
817 when->tv_usec = hinfo->qin_utime.tv_usec;
818
819 /* Sort / rearrange inbound queues */
820 if (ngp_f->packets) {
821 if (hinfo->cfg.wfq) {
822 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
823 FIFO_VTIME_SORT(TAILQ_FIRST(
824 &ngp_f->packet_head)->m->m_pkthdr.len)
825 }
826 } else {
827 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
828 uma_zfree(ngp_zone, ngp_f);
829 hinfo->run.fifo_queues--;
830 }
831
832 /* Randomly discard the frame, according to BER setting */
833 if (hinfo->cfg.ber) {
834 oldrand = rand;
835 rand = prng32_bounded(1U << 31);
836 if (((oldrand ^ rand) << 17) >=
837 hinfo->ber_p[priv->overhead + m->m_pkthdr.len]) {
838 hinfo->stats.out_disc_frames++;
839 hinfo->stats.out_disc_octets += m->m_pkthdr.len;
840 uma_zfree(ngp_zone, ngp_h);
841 m_freem(m);
842 continue;
843 }
844 }
845
846 /* Discard frame if outbound queue size limit exceeded */
847 if (hinfo->cfg.qout_size_limit &&
848 hinfo->run.qout_frames>=hinfo->cfg.qout_size_limit) {
849 hinfo->stats.out_disc_frames++;
850 hinfo->stats.out_disc_octets += m->m_pkthdr.len;
851 uma_zfree(ngp_zone, ngp_h);
852 m_freem(m);
853 continue;
854 }
855
856 /* Calculate the propagation delay */
857 when->tv_usec += priv->delay;
858 when->tv_sec += when->tv_usec / 1000000;
859 when->tv_usec = when->tv_usec % 1000000;
860
861 /* Put the frame into the delay queue */
862 TAILQ_INSERT_TAIL(&hinfo->qout_head, ngp_h, ngp_link);
863 hinfo->run.qout_frames++;
864 hinfo->run.qout_octets += m->m_pkthdr.len;
865 }
866
867 /* Delay queue processing */
868 while ((ngp_h = TAILQ_FIRST(&hinfo->qout_head))) {
869 when = &ngp_h->when;
870 m = ngp_h->m;
871 if (when->tv_sec > now->tv_sec ||
872 (when->tv_sec == now->tv_sec &&
873 when->tv_usec > now->tv_usec))
874 break;
875
876 /* Update outbound queue stats */
877 plen = m->m_pkthdr.len;
878 hinfo->run.qout_frames--;
879 hinfo->run.qout_octets -= plen;
880
881 /* Dequeue the packet from qout */
882 TAILQ_REMOVE(&hinfo->qout_head, ngp_h, ngp_link);
883 uma_zfree(ngp_zone, ngp_h);
884
885 NG_SEND_DATA(error, dest->hook, m, meta);
886 if (error) {
887 hinfo->stats.out_disc_frames++;
888 hinfo->stats.out_disc_octets += plen;
889 } else {
890 hinfo->stats.fwd_frames++;
891 hinfo->stats.fwd_octets += plen;
892 }
893 }
894
895 if ((hinfo->run.qin_frames != 0 || hinfo->run.qout_frames != 0) &&
896 !priv->timer_scheduled) {
897 ng_callout(&priv->timer, node, NULL, 1, ngp_callout, NULL, 0);
898 priv->timer_scheduled = 1;
899 }
900 }
901
902 /*
903 * This routine is called on every clock tick. We poll connected hooks
904 * for queued frames by calling pipe_dequeue().
905 */
906 static void
ngp_callout(node_p node,hook_p hook,void * arg1,int arg2)907 ngp_callout(node_p node, hook_p hook, void *arg1, int arg2)
908 {
909 const priv_p priv = NG_NODE_PRIVATE(node);
910 struct timeval now;
911
912 priv->timer_scheduled = 0;
913 microuptime(&now);
914 if (priv->upper.hook != NULL)
915 pipe_dequeue(&priv->upper, &now);
916 if (priv->lower.hook != NULL)
917 pipe_dequeue(&priv->lower, &now);
918 }
919
920 /*
921 * Shutdown processing
922 *
923 * This is tricky. If we have both a lower and upper hook, then we
924 * probably want to extricate ourselves and leave the two peers
925 * still linked to each other. Otherwise we should just shut down as
926 * a normal node would.
927 */
928 static int
ngp_shutdown(node_p node)929 ngp_shutdown(node_p node)
930 {
931 const priv_p priv = NG_NODE_PRIVATE(node);
932
933 if (priv->timer_scheduled)
934 ng_uncallout(&priv->timer, node);
935 if (priv->lower.hook && priv->upper.hook)
936 ng_bypass(priv->lower.hook, priv->upper.hook);
937 else {
938 if (priv->upper.hook != NULL)
939 ng_rmhook_self(priv->upper.hook);
940 if (priv->lower.hook != NULL)
941 ng_rmhook_self(priv->lower.hook);
942 }
943 NG_NODE_UNREF(node);
944 free(priv, M_NG_PIPE);
945 return (0);
946 }
947
948 /*
949 * Hook disconnection
950 */
951 static int
ngp_disconnect(hook_p hook)952 ngp_disconnect(hook_p hook)
953 {
954 struct hookinfo *const hinfo = NG_HOOK_PRIVATE(hook);
955 struct ngp_fifo *ngp_f;
956 struct ngp_hdr *ngp_h;
957 priv_p priv;
958
959 KASSERT(hinfo != NULL, ("%s: null info", __FUNCTION__));
960 hinfo->hook = NULL;
961
962 /* Flush all fifo queues associated with the hook */
963 while ((ngp_f = TAILQ_FIRST(&hinfo->fifo_head))) {
964 while ((ngp_h = TAILQ_FIRST(&ngp_f->packet_head))) {
965 TAILQ_REMOVE(&ngp_f->packet_head, ngp_h, ngp_link);
966 m_freem(ngp_h->m);
967 uma_zfree(ngp_zone, ngp_h);
968 }
969 TAILQ_REMOVE(&hinfo->fifo_head, ngp_f, fifo_le);
970 uma_zfree(ngp_zone, ngp_f);
971 }
972
973 /* Flush the delay queue */
974 while ((ngp_h = TAILQ_FIRST(&hinfo->qout_head))) {
975 TAILQ_REMOVE(&hinfo->qout_head, ngp_h, ngp_link);
976 m_freem(ngp_h->m);
977 uma_zfree(ngp_zone, ngp_h);
978 }
979
980 /* Release the packet loss probability table (BER) */
981 if (hinfo->ber_p)
982 free(hinfo->ber_p, M_NG_PIPE);
983
984 /* Destroy the node if all hooks are disconnected */
985 priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook));
986
987 if (priv->upper.hook == NULL && priv->lower.hook == NULL)
988 ng_rmnode_self(NG_HOOK_NODE(hook));
989
990 return (0);
991 }
992
993 static int
ngp_modevent(module_t mod,int type,void * unused)994 ngp_modevent(module_t mod, int type, void *unused)
995 {
996 int error = 0;
997
998 switch (type) {
999 case MOD_LOAD:
1000 ngp_zone = uma_zcreate("ng_pipe", max(sizeof(struct ngp_hdr),
1001 sizeof (struct ngp_fifo)), NULL, NULL, NULL, NULL,
1002 UMA_ALIGN_PTR, 0);
1003 break;
1004 case MOD_UNLOAD:
1005 uma_zdestroy(ngp_zone);
1006 break;
1007 default:
1008 error = EOPNOTSUPP;
1009 break;
1010 }
1011
1012 return (error);
1013 }
1014