1 /* $NetBSD: altq_subr.c,v 1.32 2016/06/20 08:30:58 knakahara Exp $ */
2 /* $KAME: altq_subr.c,v 1.24 2005/04/13 03:44:25 suz Exp $ */
3
4 /*
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: altq_subr.c,v 1.32 2016/06/20 08:30:58 knakahara Exp $");
32
33 #ifdef _KERNEL_OPT
34 #include "opt_altq.h"
35 #include "opt_inet.h"
36 #include "pf.h"
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/systm.h>
43 #include <sys/proc.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/kernel.h>
47 #include <sys/errno.h>
48 #include <sys/syslog.h>
49 #include <sys/sysctl.h>
50 #include <sys/queue.h>
51
52 #include <net/if.h>
53 #include <net/if_dl.h>
54 #include <net/if_types.h>
55
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/ip.h>
59 #ifdef INET6
60 #include <netinet/ip6.h>
61 #endif
62 #include <netinet/tcp.h>
63 #include <netinet/udp.h>
64
65 #if NPF > 0
66 #include <net/pfvar.h>
67 #endif
68 #include <altq/altq.h>
69 #ifdef ALTQ3_COMPAT
70 #include <altq/altq_conf.h>
71 #endif
72
73 /*
74 * internal function prototypes
75 */
76 static void tbr_timeout(void *);
77 int (*altq_input)(struct mbuf *, int) = NULL;
78 static int tbr_timer = 0; /* token bucket regulator timer */
79 static struct callout tbr_callout;
80
81 #ifdef ALTQ3_CLFIER_COMPAT
82 static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
83 #ifdef INET6
84 static int extract_ports6(struct mbuf *, struct ip6_hdr *,
85 struct flowinfo_in6 *);
86 #endif
87 static int apply_filter4(u_int32_t, struct flow_filter *,
88 struct flowinfo_in *);
89 static int apply_ppfilter4(u_int32_t, struct flow_filter *,
90 struct flowinfo_in *);
91 #ifdef INET6
92 static int apply_filter6(u_int32_t, struct flow_filter6 *,
93 struct flowinfo_in6 *);
94 #endif
95 static int apply_tosfilter4(u_int32_t, struct flow_filter *,
96 struct flowinfo_in *);
97 static u_long get_filt_handle(struct acc_classifier *, int);
98 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
99 static u_int32_t filt2fibmask(struct flow_filter *);
100
101 static void ip4f_cache(struct ip *, struct flowinfo_in *);
102 static int ip4f_lookup(struct ip *, struct flowinfo_in *);
103 static int ip4f_init(void);
104 static struct ip4_frag *ip4f_alloc(void);
105 static void ip4f_free(struct ip4_frag *);
106 #endif /* ALTQ3_CLFIER_COMPAT */
107
108 /*
109 * alternate queueing support routines
110 */
111
112 /* look up the queue state by the interface name and the queueing type. */
113 void *
altq_lookup(char * name,int type)114 altq_lookup(char *name, int type)
115 {
116 struct ifnet *ifp;
117
118 if ((ifp = ifunit(name)) != NULL) {
119 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
120 return (ifp->if_snd.altq_disc);
121 }
122
123 return NULL;
124 }
125
126 int
altq_attach(struct ifaltq * ifq,int type,void * discipline,int (* enqueue)(struct ifaltq *,struct mbuf *),struct mbuf * (* dequeue)(struct ifaltq *,int),int (* request)(struct ifaltq *,int,void *),void * clfier,void * (* classify)(void *,struct mbuf *,int))127 altq_attach(struct ifaltq *ifq, int type, void *discipline,
128 int (*enqueue)(struct ifaltq *, struct mbuf *),
129 struct mbuf *(*dequeue)(struct ifaltq *, int),
130 int (*request)(struct ifaltq *, int, void *),
131 void *clfier, void *(*classify)(void *, struct mbuf *, int))
132 {
133 if (!ALTQ_IS_READY(ifq))
134 return ENXIO;
135
136 #ifdef ALTQ3_COMPAT
137 /*
138 * pfaltq can override the existing discipline, but altq3 cannot.
139 * check these if clfier is not NULL (which implies altq3).
140 */
141 if (clfier != NULL) {
142 if (ALTQ_IS_ENABLED(ifq))
143 return EBUSY;
144 if (ALTQ_IS_ATTACHED(ifq))
145 return EEXIST;
146 }
147 #endif
148 ifq->altq_type = type;
149 ifq->altq_disc = discipline;
150 ifq->altq_enqueue = enqueue;
151 ifq->altq_dequeue = dequeue;
152 ifq->altq_request = request;
153 ifq->altq_clfier = clfier;
154 ifq->altq_classify = classify;
155 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
156 #ifdef ALTQ3_COMPAT
157 #ifdef ALTQ_KLD
158 altq_module_incref(type);
159 #endif
160 #endif
161 return 0;
162 }
163
164 int
altq_detach(struct ifaltq * ifq)165 altq_detach(struct ifaltq *ifq)
166 {
167 if (!ALTQ_IS_READY(ifq))
168 return ENXIO;
169 if (ALTQ_IS_ENABLED(ifq))
170 return EBUSY;
171 if (!ALTQ_IS_ATTACHED(ifq))
172 return (0);
173 #ifdef ALTQ3_COMPAT
174 #ifdef ALTQ_KLD
175 altq_module_declref(ifq->altq_type);
176 #endif
177 #endif
178
179 ifq->altq_type = ALTQT_NONE;
180 ifq->altq_disc = NULL;
181 ifq->altq_enqueue = NULL;
182 ifq->altq_dequeue = NULL;
183 ifq->altq_request = NULL;
184 ifq->altq_clfier = NULL;
185 ifq->altq_classify = NULL;
186 ifq->altq_flags &= ALTQF_CANTCHANGE;
187 return 0;
188 }
189
190 int
altq_enable(struct ifaltq * ifq)191 altq_enable(struct ifaltq *ifq)
192 {
193 int s;
194
195 if (!ALTQ_IS_READY(ifq))
196 return ENXIO;
197 if (ALTQ_IS_ENABLED(ifq))
198 return 0;
199
200 s = splnet();
201 IFQ_PURGE(ifq);
202 ASSERT(ifq->ifq_len == 0);
203 ifq->altq_flags |= ALTQF_ENABLED;
204 if (ifq->altq_clfier != NULL)
205 ifq->altq_flags |= ALTQF_CLASSIFY;
206 splx(s);
207
208 return 0;
209 }
210
211 int
altq_disable(struct ifaltq * ifq)212 altq_disable(struct ifaltq *ifq)
213 {
214 int s;
215
216 if (!ALTQ_IS_ENABLED(ifq))
217 return 0;
218
219 s = splnet();
220 IFQ_PURGE(ifq);
221 ASSERT(ifq->ifq_len == 0);
222 ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
223 splx(s);
224 return 0;
225 }
226
227 #ifdef ALTQ_DEBUG
228 void
altq_assert(const char * file,int line,const char * failedexpr)229 altq_assert(const char *file, int line, const char *failedexpr)
230 {
231 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
232 failedexpr, file, line);
233 panic("altq assertion");
234 /* NOTREACHED */
235 }
236 #endif
237
238 /*
239 * internal representation of token bucket parameters
240 * rate: byte_per_unittime << 32
241 * (((bits_per_sec) / 8) << 32) / machclk_freq
242 * depth: byte << 32
243 *
244 */
245 #define TBR_SHIFT 32
246 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
247 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
248
249 struct mbuf *
tbr_dequeue(struct ifaltq * ifq,int op)250 tbr_dequeue(struct ifaltq *ifq, int op)
251 {
252 struct tb_regulator *tbr;
253 struct mbuf *m;
254 int64_t interval;
255 u_int64_t now;
256
257 tbr = ifq->altq_tbr;
258 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
259 /* if this is a remove after poll, bypass tbr check */
260 } else {
261 /* update token only when it is negative */
262 if (tbr->tbr_token <= 0) {
263 now = read_machclk();
264 interval = now - tbr->tbr_last;
265 if (interval >= tbr->tbr_filluptime)
266 tbr->tbr_token = tbr->tbr_depth;
267 else {
268 tbr->tbr_token += interval * tbr->tbr_rate;
269 if (tbr->tbr_token > tbr->tbr_depth)
270 tbr->tbr_token = tbr->tbr_depth;
271 }
272 tbr->tbr_last = now;
273 }
274 /* if token is still negative, don't allow dequeue */
275 if (tbr->tbr_token <= 0)
276 return (NULL);
277 }
278
279 if (ALTQ_IS_ENABLED(ifq))
280 m = (*ifq->altq_dequeue)(ifq, op);
281 else {
282 if (op == ALTDQ_POLL)
283 IF_POLL(ifq, m);
284 else
285 IF_DEQUEUE(ifq, m);
286 }
287
288 if (m != NULL && op == ALTDQ_REMOVE)
289 tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
290 tbr->tbr_lastop = op;
291 return (m);
292 }
293
294 /*
295 * set a token bucket regulator.
296 * if the specified rate is zero, the token bucket regulator is deleted.
297 */
298 int
tbr_set(struct ifaltq * ifq,struct tb_profile * profile)299 tbr_set(struct ifaltq *ifq, struct tb_profile *profile)
300 {
301 struct tb_regulator *tbr, *otbr;
302
303 if (machclk_freq == 0)
304 init_machclk();
305 if (machclk_freq == 0) {
306 printf("tbr_set: no CPU clock available!\n");
307 return (ENXIO);
308 }
309
310 if (profile->rate == 0) {
311 /* delete this tbr */
312 if ((tbr = ifq->altq_tbr) == NULL)
313 return (ENOENT);
314 ifq->altq_tbr = NULL;
315 free(tbr, M_DEVBUF);
316 return (0);
317 }
318
319 tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_WAITOK|M_ZERO);
320 if (tbr == NULL)
321 return (ENOMEM);
322
323 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
324 tbr->tbr_depth = TBR_SCALE(profile->depth);
325 if (tbr->tbr_rate > 0)
326 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
327 else
328 tbr->tbr_filluptime = 0xffffffffffffffffLL;
329 tbr->tbr_token = tbr->tbr_depth;
330 tbr->tbr_last = read_machclk();
331 tbr->tbr_lastop = ALTDQ_REMOVE;
332
333 otbr = ifq->altq_tbr;
334 ifq->altq_tbr = tbr; /* set the new tbr */
335
336 if (otbr != NULL) {
337 free(otbr, M_DEVBUF);
338 } else {
339 if (tbr_timer == 0) {
340 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
341 tbr_timer = 1;
342 }
343 }
344 return (0);
345 }
346
347 /*
348 * tbr_timeout goes through the interface list, and kicks the drivers
349 * if necessary.
350 */
351 static void
tbr_timeout(void * arg)352 tbr_timeout(void *arg)
353 {
354 struct ifnet *ifp;
355 int active, s;
356
357 active = 0;
358 s = pserialize_read_enter();
359 IFNET_READER_FOREACH(ifp) {
360 struct psref psref;
361 if (!TBR_IS_ENABLED(&ifp->if_snd))
362 continue;
363 psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class);
364 pserialize_read_exit(s);
365
366 active++;
367 if (!IFQ_IS_EMPTY(&ifp->if_snd) && ifp->if_start != NULL) {
368 int _s = splnet();
369 if_start_lock(ifp);
370 splx(_s);
371 }
372
373 s = pserialize_read_enter();
374 psref_release(&psref, &ifp->if_psref, ifnet_psref_class);
375 }
376 pserialize_read_exit(s);
377
378 if (active > 0)
379 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
380 else
381 tbr_timer = 0; /* don't need tbr_timer anymore */
382 }
383
384 /*
385 * get token bucket regulator profile
386 */
387 int
tbr_get(struct ifaltq * ifq,struct tb_profile * profile)388 tbr_get(struct ifaltq *ifq, struct tb_profile *profile)
389 {
390 struct tb_regulator *tbr;
391
392 if ((tbr = ifq->altq_tbr) == NULL) {
393 profile->rate = 0;
394 profile->depth = 0;
395 } else {
396 profile->rate =
397 (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
398 profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
399 }
400 return (0);
401 }
402
403 #if NPF > 0
404 /*
405 * attach a discipline to the interface. if one already exists, it is
406 * overridden.
407 */
408 int
altq_pfattach(struct pf_altq * a)409 altq_pfattach(struct pf_altq *a)
410 {
411 int error = 0;
412
413 switch (a->scheduler) {
414 case ALTQT_NONE:
415 break;
416 #ifdef ALTQ_CBQ
417 case ALTQT_CBQ:
418 error = cbq_pfattach(a);
419 break;
420 #endif
421 #ifdef ALTQ_PRIQ
422 case ALTQT_PRIQ:
423 error = priq_pfattach(a);
424 break;
425 #endif
426 #ifdef ALTQ_HFSC
427 case ALTQT_HFSC:
428 error = hfsc_pfattach(a);
429 break;
430 #endif
431 default:
432 error = ENXIO;
433 }
434
435 return (error);
436 }
437
438 /*
439 * detach a discipline from the interface.
440 * it is possible that the discipline was already overridden by another
441 * discipline.
442 */
443 int
altq_pfdetach(struct pf_altq * a)444 altq_pfdetach(struct pf_altq *a)
445 {
446 struct ifnet *ifp;
447 int s, error = 0;
448
449 if ((ifp = ifunit(a->ifname)) == NULL)
450 return (EINVAL);
451
452 /* if this discipline is no longer referenced, just return */
453 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
454 return (0);
455
456 s = splnet();
457 if (ALTQ_IS_ENABLED(&ifp->if_snd))
458 error = altq_disable(&ifp->if_snd);
459 if (error == 0)
460 error = altq_detach(&ifp->if_snd);
461 splx(s);
462
463 return (error);
464 }
465
466 /*
467 * add a discipline or a queue
468 */
469 int
altq_add(struct pf_altq * a)470 altq_add(struct pf_altq *a)
471 {
472 int error = 0;
473
474 if (a->qname[0] != 0)
475 return (altq_add_queue(a));
476
477 if (machclk_freq == 0)
478 init_machclk();
479 if (machclk_freq == 0)
480 panic("altq_add: no CPU clock");
481
482 switch (a->scheduler) {
483 #ifdef ALTQ_CBQ
484 case ALTQT_CBQ:
485 error = cbq_add_altq(a);
486 break;
487 #endif
488 #ifdef ALTQ_PRIQ
489 case ALTQT_PRIQ:
490 error = priq_add_altq(a);
491 break;
492 #endif
493 #ifdef ALTQ_HFSC
494 case ALTQT_HFSC:
495 error = hfsc_add_altq(a);
496 break;
497 #endif
498 default:
499 error = ENXIO;
500 }
501
502 return (error);
503 }
504
505 /*
506 * remove a discipline or a queue
507 */
508 int
altq_remove(struct pf_altq * a)509 altq_remove(struct pf_altq *a)
510 {
511 int error = 0;
512
513 if (a->qname[0] != 0)
514 return (altq_remove_queue(a));
515
516 switch (a->scheduler) {
517 #ifdef ALTQ_CBQ
518 case ALTQT_CBQ:
519 error = cbq_remove_altq(a);
520 break;
521 #endif
522 #ifdef ALTQ_PRIQ
523 case ALTQT_PRIQ:
524 error = priq_remove_altq(a);
525 break;
526 #endif
527 #ifdef ALTQ_HFSC
528 case ALTQT_HFSC:
529 error = hfsc_remove_altq(a);
530 break;
531 #endif
532 default:
533 error = ENXIO;
534 }
535
536 return (error);
537 }
538
539 /*
540 * add a queue to the discipline
541 */
542 int
altq_add_queue(struct pf_altq * a)543 altq_add_queue(struct pf_altq *a)
544 {
545 int error = 0;
546
547 switch (a->scheduler) {
548 #ifdef ALTQ_CBQ
549 case ALTQT_CBQ:
550 error = cbq_add_queue(a);
551 break;
552 #endif
553 #ifdef ALTQ_PRIQ
554 case ALTQT_PRIQ:
555 error = priq_add_queue(a);
556 break;
557 #endif
558 #ifdef ALTQ_HFSC
559 case ALTQT_HFSC:
560 error = hfsc_add_queue(a);
561 break;
562 #endif
563 default:
564 error = ENXIO;
565 }
566
567 return (error);
568 }
569
570 /*
571 * remove a queue from the discipline
572 */
573 int
altq_remove_queue(struct pf_altq * a)574 altq_remove_queue(struct pf_altq *a)
575 {
576 int error = 0;
577
578 switch (a->scheduler) {
579 #ifdef ALTQ_CBQ
580 case ALTQT_CBQ:
581 error = cbq_remove_queue(a);
582 break;
583 #endif
584 #ifdef ALTQ_PRIQ
585 case ALTQT_PRIQ:
586 error = priq_remove_queue(a);
587 break;
588 #endif
589 #ifdef ALTQ_HFSC
590 case ALTQT_HFSC:
591 error = hfsc_remove_queue(a);
592 break;
593 #endif
594 default:
595 error = ENXIO;
596 }
597
598 return (error);
599 }
600
601 /*
602 * get queue statistics
603 */
604 int
altq_getqstats(struct pf_altq * a,void * ubuf,int * nbytes)605 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
606 {
607 int error = 0;
608
609 switch (a->scheduler) {
610 #ifdef ALTQ_CBQ
611 case ALTQT_CBQ:
612 error = cbq_getqstats(a, ubuf, nbytes);
613 break;
614 #endif
615 #ifdef ALTQ_PRIQ
616 case ALTQT_PRIQ:
617 error = priq_getqstats(a, ubuf, nbytes);
618 break;
619 #endif
620 #ifdef ALTQ_HFSC
621 case ALTQT_HFSC:
622 error = hfsc_getqstats(a, ubuf, nbytes);
623 break;
624 #endif
625 default:
626 error = ENXIO;
627 }
628
629 return (error);
630 }
631 #endif /* NPF > 0 */
632
633 /*
634 * read and write diffserv field in IPv4 or IPv6 header
635 */
636 u_int8_t
read_dsfield(struct mbuf * m,struct altq_pktattr * pktattr)637 read_dsfield(struct mbuf *m, struct altq_pktattr *pktattr)
638 {
639 struct mbuf *m0;
640 u_int8_t ds_field = 0;
641
642 if (pktattr == NULL ||
643 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
644 return ((u_int8_t)0);
645
646 /* verify that pattr_hdr is within the mbuf data */
647 for (m0 = m; m0 != NULL; m0 = m0->m_next)
648 if (((char *)pktattr->pattr_hdr >= m0->m_data) &&
649 ((char *)pktattr->pattr_hdr < m0->m_data + m0->m_len))
650 break;
651 if (m0 == NULL) {
652 /* ick, pattr_hdr is stale */
653 pktattr->pattr_af = AF_UNSPEC;
654 #ifdef ALTQ_DEBUG
655 printf("read_dsfield: can't locate header!\n");
656 #endif
657 return ((u_int8_t)0);
658 }
659
660 if (pktattr->pattr_af == AF_INET) {
661 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
662
663 if (ip->ip_v != 4)
664 return ((u_int8_t)0); /* version mismatch! */
665 ds_field = ip->ip_tos;
666 }
667 #ifdef INET6
668 else if (pktattr->pattr_af == AF_INET6) {
669 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
670 u_int32_t flowlabel;
671
672 flowlabel = ntohl(ip6->ip6_flow);
673 if ((flowlabel >> 28) != 6)
674 return ((u_int8_t)0); /* version mismatch! */
675 ds_field = (flowlabel >> 20) & 0xff;
676 }
677 #endif
678 return (ds_field);
679 }
680
681 void
write_dsfield(struct mbuf * m,struct altq_pktattr * pktattr,u_int8_t dsfield)682 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield)
683 {
684 struct mbuf *m0;
685
686 if (pktattr == NULL ||
687 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
688 return;
689
690 /* verify that pattr_hdr is within the mbuf data */
691 for (m0 = m; m0 != NULL; m0 = m0->m_next)
692 if (((char *)pktattr->pattr_hdr >= m0->m_data) &&
693 ((char *)pktattr->pattr_hdr < m0->m_data + m0->m_len))
694 break;
695 if (m0 == NULL) {
696 /* ick, pattr_hdr is stale */
697 pktattr->pattr_af = AF_UNSPEC;
698 #ifdef ALTQ_DEBUG
699 printf("write_dsfield: can't locate header!\n");
700 #endif
701 return;
702 }
703
704 if (pktattr->pattr_af == AF_INET) {
705 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
706 u_int8_t old;
707 int32_t sum;
708
709 if (ip->ip_v != 4)
710 return; /* version mismatch! */
711 old = ip->ip_tos;
712 dsfield |= old & 3; /* leave CU bits */
713 if (old == dsfield)
714 return;
715 ip->ip_tos = dsfield;
716 /*
717 * update checksum (from RFC1624)
718 * HC' = ~(~HC + ~m + m')
719 */
720 sum = ~ntohs(ip->ip_sum) & 0xffff;
721 sum += 0xff00 + (~old & 0xff) + dsfield;
722 sum = (sum >> 16) + (sum & 0xffff);
723 sum += (sum >> 16); /* add carry */
724
725 ip->ip_sum = htons(~sum & 0xffff);
726 }
727 #ifdef INET6
728 else if (pktattr->pattr_af == AF_INET6) {
729 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
730 u_int32_t flowlabel;
731
732 flowlabel = ntohl(ip6->ip6_flow);
733 if ((flowlabel >> 28) != 6)
734 return; /* version mismatch! */
735 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
736 ip6->ip6_flow = htonl(flowlabel);
737 }
738 #endif
739 return;
740 }
741
742 #define BINTIME_SHIFT 2
743
744 u_int32_t machclk_freq = 0;
745 u_int32_t machclk_per_tick = 0;
746
747 void
init_machclk(void)748 init_machclk(void)
749 {
750
751 callout_init(&tbr_callout, 0);
752
753 /*
754 * Always emulate 1GiHz counter using bintime(9)
755 * since it has enough resolution via timecounter(9).
756 * Using machine dependent cpu_counter() is not MP safe
757 * and it won't work even on UP with Speedstep etc.
758 */
759 machclk_freq = 1024 * 1024 * 1024; /* 2^30 to emulate ~1GHz */
760 machclk_per_tick = machclk_freq / hz;
761 #ifdef ALTQ_DEBUG
762 printf("altq: emulate %uHz CPU clock\n", machclk_freq);
763 #endif
764 }
765
766 u_int64_t
read_machclk(void)767 read_machclk(void)
768 {
769 struct bintime bt;
770 u_int64_t val;
771
772 binuptime(&bt);
773 val = (((u_int64_t)bt.sec << 32) + (bt.frac >> 32)) >> BINTIME_SHIFT;
774 return (val);
775 }
776
777 #ifdef ALTQ3_CLFIER_COMPAT
778
779 #ifndef IPPROTO_ESP
780 #define IPPROTO_ESP 50 /* encapsulating security payload */
781 #endif
782 #ifndef IPPROTO_AH
783 #define IPPROTO_AH 51 /* authentication header */
784 #endif
785
786 /*
787 * extract flow information from a given packet.
788 * filt_mask shows flowinfo fields required.
789 * we assume the ip header is in one mbuf, and addresses and ports are
790 * in network byte order.
791 */
792 int
altq_extractflow(struct mbuf * m,int af,struct flowinfo * flow,u_int32_t filt_bmask)793 altq_extractflow(struct mbuf *m, int af, struct flowinfo *flow,
794 u_int32_t filt_bmask)
795 {
796
797 switch (af) {
798 case PF_INET: {
799 struct flowinfo_in *fin;
800 struct ip *ip;
801
802 ip = mtod(m, struct ip *);
803
804 if (ip->ip_v != 4)
805 break;
806
807 fin = (struct flowinfo_in *)flow;
808 fin->fi_len = sizeof(struct flowinfo_in);
809 fin->fi_family = AF_INET;
810
811 fin->fi_proto = ip->ip_p;
812 fin->fi_tos = ip->ip_tos;
813
814 fin->fi_src.s_addr = ip->ip_src.s_addr;
815 fin->fi_dst.s_addr = ip->ip_dst.s_addr;
816
817 if (filt_bmask & FIMB4_PORTS)
818 /* if port info is required, extract port numbers */
819 extract_ports4(m, ip, fin);
820 else {
821 fin->fi_sport = 0;
822 fin->fi_dport = 0;
823 fin->fi_gpi = 0;
824 }
825 return (1);
826 }
827
828 #ifdef INET6
829 case PF_INET6: {
830 struct flowinfo_in6 *fin6;
831 struct ip6_hdr *ip6;
832
833 ip6 = mtod(m, struct ip6_hdr *);
834 /* should we check the ip version? */
835
836 fin6 = (struct flowinfo_in6 *)flow;
837 fin6->fi6_len = sizeof(struct flowinfo_in6);
838 fin6->fi6_family = AF_INET6;
839
840 fin6->fi6_proto = ip6->ip6_nxt;
841 fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
842
843 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
844 fin6->fi6_src = ip6->ip6_src;
845 fin6->fi6_dst = ip6->ip6_dst;
846
847 if ((filt_bmask & FIMB6_PORTS) ||
848 ((filt_bmask & FIMB6_PROTO)
849 && ip6->ip6_nxt > IPPROTO_IPV6))
850 /*
851 * if port info is required, or proto is required
852 * but there are option headers, extract port
853 * and protocol numbers.
854 */
855 extract_ports6(m, ip6, fin6);
856 else {
857 fin6->fi6_sport = 0;
858 fin6->fi6_dport = 0;
859 fin6->fi6_gpi = 0;
860 }
861 return (1);
862 }
863 #endif /* INET6 */
864
865 default:
866 break;
867 }
868
869 /* failed */
870 flow->fi_len = sizeof(struct flowinfo);
871 flow->fi_family = AF_UNSPEC;
872 return (0);
873 }
874
875 /*
876 * helper routine to extract port numbers
877 */
878 /* structure for ipsec and ipv6 option header template */
879 struct _opt6 {
880 u_int8_t opt6_nxt; /* next header */
881 u_int8_t opt6_hlen; /* header extension length */
882 u_int16_t _pad;
883 u_int32_t ah_spi; /* security parameter index
884 for authentication header */
885 };
886
887 /*
888 * extract port numbers from a ipv4 packet.
889 */
890 static int
extract_ports4(struct mbuf * m,struct ip * ip,struct flowinfo_in * fin)891 extract_ports4(struct mbuf *m, struct ip *ip, struct flowinfo_in *fin)
892 {
893 struct mbuf *m0;
894 u_short ip_off;
895 u_int8_t proto;
896 int off;
897
898 fin->fi_sport = 0;
899 fin->fi_dport = 0;
900 fin->fi_gpi = 0;
901
902 ip_off = ntohs(ip->ip_off);
903 /* if it is a fragment, try cached fragment info */
904 if (ip_off & IP_OFFMASK) {
905 ip4f_lookup(ip, fin);
906 return (1);
907 }
908
909 /* locate the mbuf containing the protocol header */
910 for (m0 = m; m0 != NULL; m0 = m0->m_next)
911 if (((char *)ip >= m0->m_data) &&
912 ((char *)ip < m0->m_data + m0->m_len))
913 break;
914 if (m0 == NULL) {
915 #ifdef ALTQ_DEBUG
916 printf("extract_ports4: can't locate header! ip=%p\n", ip);
917 #endif
918 return (0);
919 }
920 off = ((char *)ip - m0->m_data) + (ip->ip_hl << 2);
921 proto = ip->ip_p;
922
923 #ifdef ALTQ_IPSEC
924 again:
925 #endif
926 while (off >= m0->m_len) {
927 off -= m0->m_len;
928 m0 = m0->m_next;
929 if (m0 == NULL)
930 return (0); /* bogus ip_hl! */
931 }
932 if (m0->m_len < off + 4)
933 return (0);
934
935 switch (proto) {
936 case IPPROTO_TCP:
937 case IPPROTO_UDP: {
938 struct udphdr *udp;
939
940 udp = (struct udphdr *)(mtod(m0, char *) + off);
941 fin->fi_sport = udp->uh_sport;
942 fin->fi_dport = udp->uh_dport;
943 fin->fi_proto = proto;
944 }
945 break;
946
947 #ifdef ALTQ_IPSEC
948 case IPPROTO_ESP:
949 if (fin->fi_gpi == 0){
950 u_int32_t *gpi;
951
952 gpi = (u_int32_t *)(mtod(m0, char *) + off);
953 fin->fi_gpi = *gpi;
954 }
955 fin->fi_proto = proto;
956 break;
957
958 case IPPROTO_AH: {
959 /* get next header and header length */
960 struct _opt6 *opt6;
961
962 opt6 = (struct _opt6 *)(mtod(m0, char *) + off);
963 proto = opt6->opt6_nxt;
964 off += 8 + (opt6->opt6_hlen * 4);
965 if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
966 fin->fi_gpi = opt6->ah_spi;
967 }
968 /* goto the next header */
969 goto again;
970 #endif /* ALTQ_IPSEC */
971
972 default:
973 fin->fi_proto = proto;
974 return (0);
975 }
976
977 /* if this is a first fragment, cache it. */
978 if (ip_off & IP_MF)
979 ip4f_cache(ip, fin);
980
981 return (1);
982 }
983
984 #ifdef INET6
985 static int
extract_ports6(struct mbuf * m,struct ip6_hdr * ip6,struct flowinfo_in6 * fin6)986 extract_ports6(struct mbuf *m, struct ip6_hdr *ip6, struct flowinfo_in6 *fin6)
987 {
988 struct mbuf *m0;
989 int off;
990 u_int8_t proto;
991
992 fin6->fi6_gpi = 0;
993 fin6->fi6_sport = 0;
994 fin6->fi6_dport = 0;
995
996 /* locate the mbuf containing the protocol header */
997 for (m0 = m; m0 != NULL; m0 = m0->m_next)
998 if (((char *)ip6 >= m0->m_data) &&
999 ((char *)ip6 < m0->m_data + m0->m_len))
1000 break;
1001 if (m0 == NULL) {
1002 #ifdef ALTQ_DEBUG
1003 printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
1004 #endif
1005 return (0);
1006 }
1007 off = ((char *)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
1008
1009 proto = ip6->ip6_nxt;
1010 do {
1011 while (off >= m0->m_len) {
1012 off -= m0->m_len;
1013 m0 = m0->m_next;
1014 if (m0 == NULL)
1015 return (0);
1016 }
1017 if (m0->m_len < off + 4)
1018 return (0);
1019
1020 switch (proto) {
1021 case IPPROTO_TCP:
1022 case IPPROTO_UDP: {
1023 struct udphdr *udp;
1024
1025 udp = (struct udphdr *)(mtod(m0, char *) + off);
1026 fin6->fi6_sport = udp->uh_sport;
1027 fin6->fi6_dport = udp->uh_dport;
1028 fin6->fi6_proto = proto;
1029 }
1030 return (1);
1031
1032 case IPPROTO_ESP:
1033 if (fin6->fi6_gpi == 0) {
1034 u_int32_t *gpi;
1035
1036 gpi = (u_int32_t *)(mtod(m0, char *) + off);
1037 fin6->fi6_gpi = *gpi;
1038 }
1039 fin6->fi6_proto = proto;
1040 return (1);
1041
1042 case IPPROTO_AH: {
1043 /* get next header and header length */
1044 struct _opt6 *opt6;
1045
1046 opt6 = (struct _opt6 *)(mtod(m0, char *) + off);
1047 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
1048 fin6->fi6_gpi = opt6->ah_spi;
1049 proto = opt6->opt6_nxt;
1050 off += 8 + (opt6->opt6_hlen * 4);
1051 /* goto the next header */
1052 break;
1053 }
1054
1055 case IPPROTO_HOPOPTS:
1056 case IPPROTO_ROUTING:
1057 case IPPROTO_DSTOPTS: {
1058 /* get next header and header length */
1059 struct _opt6 *opt6;
1060
1061 opt6 = (struct _opt6 *)(mtod(m0, char *) + off);
1062 proto = opt6->opt6_nxt;
1063 off += (opt6->opt6_hlen + 1) * 8;
1064 /* goto the next header */
1065 break;
1066 }
1067
1068 case IPPROTO_FRAGMENT:
1069 /* ipv6 fragmentations are not supported yet */
1070 default:
1071 fin6->fi6_proto = proto;
1072 return (0);
1073 }
1074 } while (1);
1075 /*NOTREACHED*/
1076 }
1077 #endif /* INET6 */
1078
1079 /*
1080 * altq common classifier
1081 */
1082 int
acc_add_filter(struct acc_classifier * classifier,struct flow_filter * filter,void * class,u_long * phandle)1083 acc_add_filter(struct acc_classifier *classifier, struct flow_filter *filter,
1084 void *class, u_long *phandle)
1085 {
1086 struct acc_filter *afp, *prev, *tmp;
1087 int i, s;
1088
1089 #ifdef INET6
1090 if (filter->ff_flow.fi_family != AF_INET &&
1091 filter->ff_flow.fi_family != AF_INET6)
1092 return (EINVAL);
1093 #else
1094 if (filter->ff_flow.fi_family != AF_INET)
1095 return (EINVAL);
1096 #endif
1097
1098 afp = malloc(sizeof(struct acc_filter), M_DEVBUF, M_WAITOK|M_ZERO);
1099 if (afp == NULL)
1100 return (ENOMEM);
1101
1102 afp->f_filter = *filter;
1103 afp->f_class = class;
1104
1105 i = ACC_WILDCARD_INDEX;
1106 if (filter->ff_flow.fi_family == AF_INET) {
1107 struct flow_filter *filter4 = &afp->f_filter;
1108
1109 /*
1110 * if address is 0, it's a wildcard. if address mask
1111 * isn't set, use full mask.
1112 */
1113 if (filter4->ff_flow.fi_dst.s_addr == 0)
1114 filter4->ff_mask.mask_dst.s_addr = 0;
1115 else if (filter4->ff_mask.mask_dst.s_addr == 0)
1116 filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
1117 if (filter4->ff_flow.fi_src.s_addr == 0)
1118 filter4->ff_mask.mask_src.s_addr = 0;
1119 else if (filter4->ff_mask.mask_src.s_addr == 0)
1120 filter4->ff_mask.mask_src.s_addr = 0xffffffff;
1121
1122 /* clear extra bits in addresses */
1123 filter4->ff_flow.fi_dst.s_addr &=
1124 filter4->ff_mask.mask_dst.s_addr;
1125 filter4->ff_flow.fi_src.s_addr &=
1126 filter4->ff_mask.mask_src.s_addr;
1127
1128 /*
1129 * if dst address is a wildcard, use hash-entry
1130 * ACC_WILDCARD_INDEX.
1131 */
1132 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
1133 i = ACC_WILDCARD_INDEX;
1134 else
1135 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
1136 }
1137 #ifdef INET6
1138 else if (filter->ff_flow.fi_family == AF_INET6) {
1139 struct flow_filter6 *filter6 =
1140 (struct flow_filter6 *)&afp->f_filter;
1141 #ifndef IN6MASK0 /* taken from kame ipv6 */
1142 #define IN6MASK0 {{{ 0, 0, 0, 0 }}}
1143 #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
1144 const struct in6_addr in6mask0 = IN6MASK0;
1145 const struct in6_addr in6mask128 = IN6MASK128;
1146 #endif
1147
1148 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
1149 filter6->ff_mask6.mask6_dst = in6mask0;
1150 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
1151 filter6->ff_mask6.mask6_dst = in6mask128;
1152 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
1153 filter6->ff_mask6.mask6_src = in6mask0;
1154 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
1155 filter6->ff_mask6.mask6_src = in6mask128;
1156
1157 /* clear extra bits in addresses */
1158 for (i = 0; i < 16; i++)
1159 filter6->ff_flow6.fi6_dst.s6_addr[i] &=
1160 filter6->ff_mask6.mask6_dst.s6_addr[i];
1161 for (i = 0; i < 16; i++)
1162 filter6->ff_flow6.fi6_src.s6_addr[i] &=
1163 filter6->ff_mask6.mask6_src.s6_addr[i];
1164
1165 if (filter6->ff_flow6.fi6_flowlabel == 0)
1166 i = ACC_WILDCARD_INDEX;
1167 else
1168 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
1169 }
1170 #endif /* INET6 */
1171
1172 afp->f_handle = get_filt_handle(classifier, i);
1173
1174 /* update filter bitmask */
1175 afp->f_fbmask = filt2fibmask(filter);
1176 classifier->acc_fbmask |= afp->f_fbmask;
1177
1178 /*
1179 * add this filter to the filter list.
1180 * filters are ordered from the highest rule number.
1181 */
1182 s = splnet();
1183 prev = NULL;
1184 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
1185 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
1186 prev = tmp;
1187 else
1188 break;
1189 }
1190 if (prev == NULL)
1191 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
1192 else
1193 LIST_INSERT_AFTER(prev, afp, f_chain);
1194 splx(s);
1195
1196 *phandle = afp->f_handle;
1197 return (0);
1198 }
1199
1200 int
acc_delete_filter(struct acc_classifier * classifier,u_long handle)1201 acc_delete_filter(struct acc_classifier *classifier, u_long handle)
1202 {
1203 struct acc_filter *afp;
1204 int s;
1205
1206 if ((afp = filth_to_filtp(classifier, handle)) == NULL)
1207 return (EINVAL);
1208
1209 s = splnet();
1210 LIST_REMOVE(afp, f_chain);
1211 splx(s);
1212
1213 free(afp, M_DEVBUF);
1214
1215 /* todo: update filt_bmask */
1216
1217 return (0);
1218 }
1219
1220 /*
1221 * delete filters referencing to the specified class.
1222 * if the all flag is not 0, delete all the filters.
1223 */
1224 int
acc_discard_filters(struct acc_classifier * classifier,void * class,int all)1225 acc_discard_filters(struct acc_classifier *classifier, void *class, int all)
1226 {
1227 struct acc_filter *afp;
1228 int i, s;
1229
1230 s = splnet();
1231 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
1232 do {
1233 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1234 if (all || afp->f_class == class) {
1235 LIST_REMOVE(afp, f_chain);
1236 free(afp, M_DEVBUF);
1237 /* start again from the head */
1238 break;
1239 }
1240 } while (afp != NULL);
1241 }
1242 splx(s);
1243
1244 if (all)
1245 classifier->acc_fbmask = 0;
1246
1247 return (0);
1248 }
1249
1250 void *
acc_classify(void * clfier,struct mbuf * m,int af)1251 acc_classify(void *clfier, struct mbuf *m, int af)
1252 {
1253 struct acc_classifier *classifier;
1254 struct flowinfo flow;
1255 struct acc_filter *afp;
1256 int i;
1257
1258 classifier = (struct acc_classifier *)clfier;
1259 altq_extractflow(m, af, &flow, classifier->acc_fbmask);
1260
1261 if (flow.fi_family == AF_INET) {
1262 struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
1263
1264 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
1265 /* only tos is used */
1266 LIST_FOREACH(afp,
1267 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1268 f_chain)
1269 if (apply_tosfilter4(afp->f_fbmask,
1270 &afp->f_filter, fp))
1271 /* filter matched */
1272 return (afp->f_class);
1273 } else if ((classifier->acc_fbmask &
1274 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
1275 == 0) {
1276 /* only proto and ports are used */
1277 LIST_FOREACH(afp,
1278 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1279 f_chain)
1280 if (apply_ppfilter4(afp->f_fbmask,
1281 &afp->f_filter, fp))
1282 /* filter matched */
1283 return (afp->f_class);
1284 } else {
1285 /* get the filter hash entry from its dest address */
1286 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
1287 do {
1288 /*
1289 * go through this loop twice. first for dst
1290 * hash, second for wildcards.
1291 */
1292 LIST_FOREACH(afp, &classifier->acc_filters[i],
1293 f_chain)
1294 if (apply_filter4(afp->f_fbmask,
1295 &afp->f_filter, fp))
1296 /* filter matched */
1297 return (afp->f_class);
1298
1299 /*
1300 * check again for filters with a dst addr
1301 * wildcard.
1302 * (daddr == 0 || dmask != 0xffffffff).
1303 */
1304 if (i != ACC_WILDCARD_INDEX)
1305 i = ACC_WILDCARD_INDEX;
1306 else
1307 break;
1308 } while (1);
1309 }
1310 }
1311 #ifdef INET6
1312 else if (flow.fi_family == AF_INET6) {
1313 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
1314
1315 /* get the filter hash entry from its flow ID */
1316 if (fp6->fi6_flowlabel != 0)
1317 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
1318 else
1319 /* flowlable can be zero */
1320 i = ACC_WILDCARD_INDEX;
1321
1322 /* go through this loop twice. first for flow hash, second
1323 for wildcards. */
1324 do {
1325 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1326 if (apply_filter6(afp->f_fbmask,
1327 (struct flow_filter6 *)&afp->f_filter,
1328 fp6))
1329 /* filter matched */
1330 return (afp->f_class);
1331
1332 /*
1333 * check again for filters with a wildcard.
1334 */
1335 if (i != ACC_WILDCARD_INDEX)
1336 i = ACC_WILDCARD_INDEX;
1337 else
1338 break;
1339 } while (1);
1340 }
1341 #endif /* INET6 */
1342
1343 /* no filter matched */
1344 return (NULL);
1345 }
1346
1347 static int
apply_filter4(u_int32_t fbmask,struct flow_filter * filt,struct flowinfo_in * pkt)1348 apply_filter4(u_int32_t fbmask, struct flow_filter *filt,
1349 struct flowinfo_in *pkt)
1350 {
1351 if (filt->ff_flow.fi_family != AF_INET)
1352 return (0);
1353 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1354 return (0);
1355 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1356 return (0);
1357 if ((fbmask & FIMB4_DADDR) &&
1358 filt->ff_flow.fi_dst.s_addr !=
1359 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
1360 return (0);
1361 if ((fbmask & FIMB4_SADDR) &&
1362 filt->ff_flow.fi_src.s_addr !=
1363 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
1364 return (0);
1365 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1366 return (0);
1367 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1368 (pkt->fi_tos & filt->ff_mask.mask_tos))
1369 return (0);
1370 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
1371 return (0);
1372 /* match */
1373 return (1);
1374 }
1375
1376 /*
1377 * filter matching function optimized for a common case that checks
1378 * only protocol and port numbers
1379 */
1380 static int
apply_ppfilter4(u_int32_t fbmask,struct flow_filter * filt,struct flowinfo_in * pkt)1381 apply_ppfilter4(u_int32_t fbmask, struct flow_filter *filt,
1382 struct flowinfo_in *pkt)
1383 {
1384 if (filt->ff_flow.fi_family != AF_INET)
1385 return (0);
1386 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1387 return (0);
1388 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1389 return (0);
1390 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1391 return (0);
1392 /* match */
1393 return (1);
1394 }
1395
1396 /*
1397 * filter matching function only for tos field.
1398 */
1399 static int
apply_tosfilter4(u_int32_t fbmask,struct flow_filter * filt,struct flowinfo_in * pkt)1400 apply_tosfilter4(u_int32_t fbmask, struct flow_filter *filt,
1401 struct flowinfo_in *pkt)
1402 {
1403 if (filt->ff_flow.fi_family != AF_INET)
1404 return (0);
1405 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1406 (pkt->fi_tos & filt->ff_mask.mask_tos))
1407 return (0);
1408 /* match */
1409 return (1);
1410 }
1411
1412 #ifdef INET6
1413 static int
apply_filter6(u_int32_t fbmask,struct flow_filter6 * filt,struct flowinfo_in6 * pkt)1414 apply_filter6(u_int32_t fbmask, struct flow_filter6 *filt,
1415 struct flowinfo_in6 *pkt)
1416 {
1417 int i;
1418
1419 if (filt->ff_flow6.fi6_family != AF_INET6)
1420 return (0);
1421 if ((fbmask & FIMB6_FLABEL) &&
1422 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
1423 return (0);
1424 if ((fbmask & FIMB6_PROTO) &&
1425 filt->ff_flow6.fi6_proto != pkt->fi6_proto)
1426 return (0);
1427 if ((fbmask & FIMB6_SPORT) &&
1428 filt->ff_flow6.fi6_sport != pkt->fi6_sport)
1429 return (0);
1430 if ((fbmask & FIMB6_DPORT) &&
1431 filt->ff_flow6.fi6_dport != pkt->fi6_dport)
1432 return (0);
1433 if (fbmask & FIMB6_SADDR) {
1434 for (i = 0; i < 4; i++)
1435 if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
1436 (pkt->fi6_src.s6_addr32[i] &
1437 filt->ff_mask6.mask6_src.s6_addr32[i]))
1438 return (0);
1439 }
1440 if (fbmask & FIMB6_DADDR) {
1441 for (i = 0; i < 4; i++)
1442 if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
1443 (pkt->fi6_dst.s6_addr32[i] &
1444 filt->ff_mask6.mask6_dst.s6_addr32[i]))
1445 return (0);
1446 }
1447 if ((fbmask & FIMB6_TCLASS) &&
1448 filt->ff_flow6.fi6_tclass !=
1449 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
1450 return (0);
1451 if ((fbmask & FIMB6_GPI) &&
1452 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
1453 return (0);
1454 /* match */
1455 return (1);
1456 }
1457 #endif /* INET6 */
1458
1459 /*
1460 * filter handle:
1461 * bit 20-28: index to the filter hash table
1462 * bit 0-19: unique id in the hash bucket.
1463 */
1464 static u_long
get_filt_handle(struct acc_classifier * classifier,int i)1465 get_filt_handle(struct acc_classifier *classifier, int i)
1466 {
1467 static u_long handle_number = 1;
1468 u_long handle;
1469 struct acc_filter *afp;
1470
1471 while (1) {
1472 handle = handle_number++ & 0x000fffff;
1473
1474 if (LIST_EMPTY(&classifier->acc_filters[i]))
1475 break;
1476
1477 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1478 if ((afp->f_handle & 0x000fffff) == handle)
1479 break;
1480 if (afp == NULL)
1481 break;
1482 /* this handle is already used, try again */
1483 }
1484
1485 return ((i << 20) | handle);
1486 }
1487
1488 /* convert filter handle to filter pointer */
1489 static struct acc_filter *
filth_to_filtp(struct acc_classifier * classifier,u_long handle)1490 filth_to_filtp(struct acc_classifier *classifier, u_long handle)
1491 {
1492 struct acc_filter *afp;
1493 int i;
1494
1495 i = ACC_GET_HINDEX(handle);
1496
1497 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1498 if (afp->f_handle == handle)
1499 return (afp);
1500
1501 return (NULL);
1502 }
1503
1504 /* create flowinfo bitmask */
1505 static u_int32_t
filt2fibmask(struct flow_filter * filt)1506 filt2fibmask(struct flow_filter *filt)
1507 {
1508 u_int32_t mask = 0;
1509 #ifdef INET6
1510 struct flow_filter6 *filt6;
1511 #endif
1512
1513 switch (filt->ff_flow.fi_family) {
1514 case AF_INET:
1515 if (filt->ff_flow.fi_proto != 0)
1516 mask |= FIMB4_PROTO;
1517 if (filt->ff_flow.fi_tos != 0)
1518 mask |= FIMB4_TOS;
1519 if (filt->ff_flow.fi_dst.s_addr != 0)
1520 mask |= FIMB4_DADDR;
1521 if (filt->ff_flow.fi_src.s_addr != 0)
1522 mask |= FIMB4_SADDR;
1523 if (filt->ff_flow.fi_sport != 0)
1524 mask |= FIMB4_SPORT;
1525 if (filt->ff_flow.fi_dport != 0)
1526 mask |= FIMB4_DPORT;
1527 if (filt->ff_flow.fi_gpi != 0)
1528 mask |= FIMB4_GPI;
1529 break;
1530 #ifdef INET6
1531 case AF_INET6:
1532 filt6 = (struct flow_filter6 *)filt;
1533
1534 if (filt6->ff_flow6.fi6_proto != 0)
1535 mask |= FIMB6_PROTO;
1536 if (filt6->ff_flow6.fi6_tclass != 0)
1537 mask |= FIMB6_TCLASS;
1538 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
1539 mask |= FIMB6_DADDR;
1540 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
1541 mask |= FIMB6_SADDR;
1542 if (filt6->ff_flow6.fi6_sport != 0)
1543 mask |= FIMB6_SPORT;
1544 if (filt6->ff_flow6.fi6_dport != 0)
1545 mask |= FIMB6_DPORT;
1546 if (filt6->ff_flow6.fi6_gpi != 0)
1547 mask |= FIMB6_GPI;
1548 if (filt6->ff_flow6.fi6_flowlabel != 0)
1549 mask |= FIMB6_FLABEL;
1550 break;
1551 #endif /* INET6 */
1552 }
1553 return (mask);
1554 }
1555
1556
1557 /*
1558 * helper functions to handle IPv4 fragments.
1559 * currently only in-sequence fragments are handled.
1560 * - fragment info is cached in a LRU list.
1561 * - when a first fragment is found, cache its flow info.
1562 * - when a non-first fragment is found, lookup the cache.
1563 */
1564
1565 struct ip4_frag {
1566 TAILQ_ENTRY(ip4_frag) ip4f_chain;
1567 char ip4f_valid;
1568 u_short ip4f_id;
1569 struct flowinfo_in ip4f_info;
1570 };
1571
1572 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
1573
1574 #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */
1575
1576
1577 static void
ip4f_cache(struct ip * ip,struct flowinfo_in * fin)1578 ip4f_cache(struct ip *ip, struct flowinfo_in *fin)
1579 {
1580 struct ip4_frag *fp;
1581
1582 if (TAILQ_EMPTY(&ip4f_list)) {
1583 /* first time call, allocate fragment cache entries. */
1584 if (ip4f_init() < 0)
1585 /* allocation failed! */
1586 return;
1587 }
1588
1589 fp = ip4f_alloc();
1590 fp->ip4f_id = ip->ip_id;
1591 fp->ip4f_info.fi_proto = ip->ip_p;
1592 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
1593 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
1594
1595 /* save port numbers */
1596 fp->ip4f_info.fi_sport = fin->fi_sport;
1597 fp->ip4f_info.fi_dport = fin->fi_dport;
1598 fp->ip4f_info.fi_gpi = fin->fi_gpi;
1599 }
1600
1601 static int
ip4f_lookup(struct ip * ip,struct flowinfo_in * fin)1602 ip4f_lookup(struct ip *ip, struct flowinfo_in *fin)
1603 {
1604 struct ip4_frag *fp;
1605
1606 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
1607 fp = TAILQ_NEXT(fp, ip4f_chain))
1608 if (ip->ip_id == fp->ip4f_id &&
1609 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
1610 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
1611 ip->ip_p == fp->ip4f_info.fi_proto) {
1612
1613 /* found the matching entry */
1614 fin->fi_sport = fp->ip4f_info.fi_sport;
1615 fin->fi_dport = fp->ip4f_info.fi_dport;
1616 fin->fi_gpi = fp->ip4f_info.fi_gpi;
1617
1618 if ((ntohs(ip->ip_off) & IP_MF) == 0)
1619 /* this is the last fragment,
1620 release the entry. */
1621 ip4f_free(fp);
1622
1623 return (1);
1624 }
1625
1626 /* no matching entry found */
1627 return (0);
1628 }
1629
1630 static int
ip4f_init(void)1631 ip4f_init(void)
1632 {
1633 struct ip4_frag *fp;
1634 int i;
1635
1636 TAILQ_INIT(&ip4f_list);
1637 for (i=0; i<IP4F_TABSIZE; i++) {
1638 fp = malloc(sizeof(struct ip4_frag), M_DEVBUF, M_NOWAIT);
1639 if (fp == NULL) {
1640 printf("ip4f_init: can't alloc %dth entry!\n", i);
1641 if (i == 0)
1642 return (-1);
1643 return (0);
1644 }
1645 fp->ip4f_valid = 0;
1646 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1647 }
1648 return (0);
1649 }
1650
1651 static struct ip4_frag *
ip4f_alloc(void)1652 ip4f_alloc(void)
1653 {
1654 struct ip4_frag *fp;
1655
1656 /* reclaim an entry at the tail, put it at the head */
1657 fp = TAILQ_LAST(&ip4f_list, ip4f_list);
1658 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1659 fp->ip4f_valid = 1;
1660 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
1661 return (fp);
1662 }
1663
1664 static void
ip4f_free(struct ip4_frag * fp)1665 ip4f_free(struct ip4_frag *fp)
1666 {
1667 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1668 fp->ip4f_valid = 0;
1669 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1670 }
1671
1672 #endif /* ALTQ3_CLFIER_COMPAT */
1673