xref: /dragonfly/sys/net/altq/altq_subr.c (revision c93b565c)
1 /*	$KAME: altq_subr.c,v 1.23 2004/04/20 16:10:06 itojun Exp $	*/
2 /*	$DragonFly: src/sys/net/altq/altq_subr.c,v 1.12 2008/05/14 11:59:23 sephe Exp $ */
3 
4 /*
5  * Copyright (C) 1997-2003
6  *	Sony Computer Science Laboratories Inc.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "opt_altq.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 
34 #include <sys/param.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/systm.h>
38 #include <sys/proc.h>
39 #include <sys/socket.h>
40 #include <sys/socketvar.h>
41 #include <sys/kernel.h>
42 #include <sys/callout.h>
43 #include <sys/errno.h>
44 #include <sys/syslog.h>
45 #include <sys/sysctl.h>
46 #include <sys/queue.h>
47 #include <sys/thread2.h>
48 
49 #include <net/if.h>
50 #include <net/if_dl.h>
51 #include <net/if_types.h>
52 #include <net/ifq_var.h>
53 #include <net/netmsg2.h>
54 #include <net/netisr2.h>
55 
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/ip.h>
59 #ifdef INET6
60 #include <netinet/ip6.h>
61 #endif
62 #include <netinet/tcp.h>
63 #include <netinet/udp.h>
64 
65 #include <net/pf/pfvar.h>
66 #include <net/altq/altq.h>
67 
68 /* machine dependent clock related includes */
69 #include <machine/clock.h>		/* for tsc_frequency */
70 #include <machine/md_var.h>		/* for cpu_feature */
71 #include <machine/specialreg.h>		/* for CPUID_TSC */
72 
73 /*
74  * internal function prototypes
75  */
76 static void	tbr_timeout(void *);
77 static void	tbr_timeout_dispatch(netmsg_t);
78 static int	altq_enable_locked(struct ifaltq *);
79 static int	altq_disable_locked(struct ifaltq *);
80 static int	altq_detach_locked(struct ifaltq *);
81 static int	tbr_set_locked(struct ifaltq *, struct tb_profile *);
82 
83 int (*altq_input)(struct mbuf *, int) = NULL;
84 static int tbr_timer = 0;	/* token bucket regulator timer */
85 static struct callout tbr_callout;
86 static struct netmsg_base tbr_timeout_netmsg;
87 
88 int pfaltq_running;	/* keep track of running state */
89 
90 MALLOC_DEFINE(M_ALTQ, "altq", "ALTQ structures");
91 
92 /*
93  * alternate queueing support routines
94  */
95 
96 /* look up the queue state by the interface name and the queueing type. */
97 void *
98 altq_lookup(const char *name, int type)
99 {
100 	struct ifnet *ifp;
101 
102 	if ((ifp = ifunit(name)) != NULL) {
103 		if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
104 			return (ifp->if_snd.altq_disc);
105 	}
106 
107 	return (NULL);
108 }
109 
110 int
111 altq_attach(struct ifaltq *ifq, int type, void *discipline,
112     altq_mapsubq_t mapsubq,
113     ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request,
114     void *clfier,
115     void *(*classify)(struct ifaltq *, struct mbuf *, struct altq_pktattr *))
116 {
117 	if (!ifq_is_ready(ifq))
118 		return ENXIO;
119 
120 	ifq->altq_type     = type;
121 	ifq->altq_disc     = discipline;
122 	ifq->altq_clfier   = clfier;
123 	ifq->altq_classify = classify;
124 	ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
125 	ifq_set_methods(ifq, mapsubq, enqueue, dequeue, request);
126 	return 0;
127 }
128 
129 static int
130 altq_detach_locked(struct ifaltq *ifq)
131 {
132 	if (!ifq_is_ready(ifq))
133 		return ENXIO;
134 	if (ifq_is_enabled(ifq))
135 		return EBUSY;
136 	if (!ifq_is_attached(ifq))
137 		return (0);
138 
139 	ifq_set_classic(ifq);
140 	ifq->altq_type     = ALTQT_NONE;
141 	ifq->altq_disc     = NULL;
142 	ifq->altq_clfier   = NULL;
143 	ifq->altq_classify = NULL;
144 	ifq->altq_flags &= ALTQF_CANTCHANGE;
145 	return 0;
146 }
147 
148 int
149 altq_detach(struct ifaltq *ifq)
150 {
151 	int error;
152 
153 	ifq_lock_all(ifq);
154 	error = altq_detach_locked(ifq);
155 	ifq_unlock_all(ifq);
156 	return error;
157 }
158 
159 static int
160 altq_enable_locked(struct ifaltq *ifq)
161 {
162 	if (!ifq_is_ready(ifq))
163 		return ENXIO;
164 	if (ifq_is_enabled(ifq))
165 		return 0;
166 
167 	ifq_purge_all_locked(ifq);
168 
169 	ifq->altq_flags |= ALTQF_ENABLED;
170 	if (ifq->altq_clfier != NULL)
171 		ifq->altq_flags |= ALTQF_CLASSIFY;
172 	return 0;
173 }
174 
175 int
176 altq_enable(struct ifaltq *ifq)
177 {
178 	int error;
179 
180 	ifq_lock_all(ifq);
181 	error = altq_enable_locked(ifq);
182 	ifq_unlock_all(ifq);
183 	return error;
184 }
185 
186 static int
187 altq_disable_locked(struct ifaltq *ifq)
188 {
189 	if (!ifq_is_enabled(ifq))
190 		return 0;
191 
192 	ifq_purge_all_locked(ifq);
193 	ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
194 	return 0;
195 }
196 
197 int
198 altq_disable(struct ifaltq *ifq)
199 {
200 	int error;
201 
202 	ifq_lock_all(ifq);
203 	error = altq_disable_locked(ifq);
204 	ifq_unlock_all(ifq);
205 	return error;
206 }
207 
208 /*
209  * internal representation of token bucket parameters
210  *	rate:	byte_per_unittime << 32
211  *		(((bits_per_sec) / 8) << 32) / machclk_freq
212  *	depth:	byte << 32
213  *
214  */
215 #define	TBR_SHIFT	32
216 #define	TBR_SCALE(x)	((int64_t)(x) << TBR_SHIFT)
217 #define	TBR_UNSCALE(x)	((x) >> TBR_SHIFT)
218 
219 struct mbuf *
220 tbr_dequeue(struct ifaltq_subque *ifsq, int op)
221 {
222 	struct ifaltq *ifq = ifsq->ifsq_altq;
223 	struct tb_regulator *tbr;
224 	struct mbuf *m;
225 	int64_t interval;
226 	uint64_t now;
227 
228 	if (ifsq_get_index(ifsq) != ALTQ_SUBQ_INDEX_DEFAULT) {
229 		/*
230 		 * Race happened, the unrelated subqueue was
231 		 * picked during the packet scheduler transition.
232 		 */
233 		ifsq_classic_request(ifsq, ALTRQ_PURGE, NULL);
234 		return NULL;
235 	}
236 
237 	crit_enter();
238 	tbr = ifq->altq_tbr;
239 	if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
240 		/* if this is a remove after poll, bypass tbr check */
241 	} else {
242 		/* update token only when it is negative */
243 		if (tbr->tbr_token <= 0) {
244 			now = read_machclk();
245 			interval = now - tbr->tbr_last;
246 			if (interval >= tbr->tbr_filluptime)
247 				tbr->tbr_token = tbr->tbr_depth;
248 			else {
249 				tbr->tbr_token += interval * tbr->tbr_rate;
250 				if (tbr->tbr_token > tbr->tbr_depth)
251 					tbr->tbr_token = tbr->tbr_depth;
252 			}
253 			tbr->tbr_last = now;
254 		}
255 		/* if token is still negative, don't allow dequeue */
256 		if (tbr->tbr_token <= 0) {
257 			crit_exit();
258 			return (NULL);
259 		}
260 	}
261 
262 	if (ifq_is_enabled(ifq))
263 		m = (*ifsq->ifsq_dequeue)(ifsq, op);
264 	else
265 		m = ifsq_classic_dequeue(ifsq, op);
266 
267 	if (m != NULL && op == ALTDQ_REMOVE)
268 		tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
269 	tbr->tbr_lastop = op;
270 	crit_exit();
271 	return (m);
272 }
273 
274 /*
275  * set a token bucket regulator.
276  * if the specified rate is zero, the token bucket regulator is deleted.
277  */
278 static int
279 tbr_set_locked(struct ifaltq *ifq, struct tb_profile *profile)
280 {
281 	struct tb_regulator *tbr, *otbr;
282 
283 	if (machclk_freq == 0)
284 		init_machclk();
285 	if (machclk_freq == 0) {
286 		kprintf("%s: no cpu clock available!\n", __func__);
287 		return (ENXIO);
288 	}
289 
290 	if (profile->rate == 0) {
291 		/* delete this tbr */
292 		if ((tbr = ifq->altq_tbr) == NULL)
293 			return (ENOENT);
294 		ifq->altq_tbr = NULL;
295 		kfree(tbr, M_ALTQ);
296 		return (0);
297 	}
298 
299 	tbr = kmalloc(sizeof(*tbr), M_ALTQ, M_WAITOK | M_ZERO);
300 	tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
301 	tbr->tbr_depth = TBR_SCALE(profile->depth);
302 	if (tbr->tbr_rate > 0)
303 		tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
304 	else
305 		tbr->tbr_filluptime = 0xffffffffffffffffLL;
306 	tbr->tbr_token = tbr->tbr_depth;
307 	tbr->tbr_last = read_machclk();
308 	tbr->tbr_lastop = ALTDQ_REMOVE;
309 
310 	otbr = ifq->altq_tbr;
311 	ifq->altq_tbr = tbr;	/* set the new tbr */
312 
313 	if (otbr != NULL)
314 		kfree(otbr, M_ALTQ);
315 	else if (tbr_timer == 0) {
316 		callout_reset_bycpu(&tbr_callout, 1, tbr_timeout, NULL, 0);
317 		tbr_timer = 1;
318 	}
319 	return (0);
320 }
321 
322 int
323 tbr_set(struct ifaltq *ifq, struct tb_profile *profile)
324 {
325 	int error;
326 
327 	ifq_lock_all(ifq);
328 	error = tbr_set_locked(ifq, profile);
329 	ifq_unlock_all(ifq);
330 	return error;
331 }
332 
333 static void
334 tbr_timeout(void *arg __unused)
335 {
336 	struct lwkt_msg *lmsg = &tbr_timeout_netmsg.lmsg;
337 
338 	KASSERT(mycpuid == 0, ("not on cpu0"));
339 	crit_enter();
340 	if (lmsg->ms_flags & MSGF_DONE)
341 		lwkt_sendmsg_oncpu(netisr_cpuport(0), lmsg);
342 	crit_exit();
343 }
344 
345 /*
346  * tbr_timeout goes through the interface list, and kicks the drivers
347  * if necessary.
348  */
349 static void
350 tbr_timeout_dispatch(netmsg_t nmsg)
351 {
352 	const struct ifnet_array *arr;
353 	int active, i;
354 
355 	KASSERT(&curthread->td_msgport == netisr_cpuport(0),
356 	    ("not in netisr0"));
357 
358 	crit_enter();
359 	lwkt_replymsg(&nmsg->lmsg, 0);	/* reply ASAP */
360 	crit_exit();
361 
362 	active = 0;
363 	arr = ifnet_array_get();
364 	for (i = 0; i < arr->ifnet_count; ++i) {
365 		struct ifnet *ifp = arr->ifnet_arr[i];
366 		struct ifaltq_subque *ifsq;
367 
368 		if (ifp->if_snd.altq_tbr == NULL)
369 			continue;
370 
371 		ifsq = &ifp->if_snd.altq_subq[ALTQ_SUBQ_INDEX_DEFAULT];
372 		active++;
373 		if (!ifsq_is_empty(ifsq) && ifp->if_start != NULL) {
374 			ifsq_serialize_hw(ifsq);
375 			(*ifp->if_start)(ifp, ifsq);
376 			ifsq_deserialize_hw(ifsq);
377 		}
378 	}
379 	if (active > 0)
380 		callout_reset(&tbr_callout, 1, tbr_timeout, NULL);
381 	else
382 		tbr_timer = 0;	/* don't need tbr_timer anymore */
383 }
384 
385 /*
386  * get token bucket regulator profile
387  */
388 int
389 tbr_get(struct ifaltq *ifq, struct tb_profile *profile)
390 {
391 	struct tb_regulator *tbr;
392 
393 	if ((tbr = ifq->altq_tbr) == NULL) {
394 		profile->rate = 0;
395 		profile->depth = 0;
396 	} else {
397 		profile->rate =
398 		    (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
399 		profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
400 	}
401 	return (0);
402 }
403 
404 /*
405  * attach a discipline to the interface.  if one already exists, it is
406  * overridden.
407  */
408 int
409 altq_pfattach(struct pf_altq *a)
410 {
411 	struct ifaltq *ifq;
412 	struct ifnet *ifp;
413 	int error;
414 
415 	if (a->scheduler == ALTQT_NONE)
416 		return 0;
417 
418 	if (a->altq_disc == NULL)
419 		return EINVAL;
420 
421 	ifnet_lock();
422 
423 	ifp = ifunit(a->ifname);
424 	if (ifp == NULL) {
425 		ifnet_unlock();
426 		return EINVAL;
427 	}
428 	ifq = &ifp->if_snd;
429 
430 	ifq_lock_all(ifq);
431 
432 	switch (a->scheduler) {
433 #ifdef ALTQ_CBQ
434 	case ALTQT_CBQ:
435 		error = cbq_pfattach(a, ifq);
436 		break;
437 #endif
438 #ifdef ALTQ_PRIQ
439 	case ALTQT_PRIQ:
440 		error = priq_pfattach(a, ifq);
441 		break;
442 #endif
443 #ifdef ALTQ_HFSC
444 	case ALTQT_HFSC:
445 		error = hfsc_pfattach(a, ifq);
446 		break;
447 #endif
448 #ifdef ALTQ_FAIRQ
449 	case ALTQT_FAIRQ:
450 		error = fairq_pfattach(a, ifq);
451 		break;
452 #endif
453 	default:
454 		error = ENXIO;
455 		goto back;
456 	}
457 
458 	/* if the state is running, enable altq */
459 	if (error == 0 && pfaltq_running && ifq->altq_type != ALTQT_NONE &&
460 	    !ifq_is_enabled(ifq))
461 		error = altq_enable_locked(ifq);
462 
463 	/* if altq is already enabled, reset set tokenbucket regulator */
464 	if (error == 0 && ifq_is_enabled(ifq)) {
465 		struct tb_profile tb;
466 
467 		tb.rate = a->ifbandwidth;
468 		tb.depth = a->tbrsize;
469 		error = tbr_set_locked(ifq, &tb);
470 	}
471 back:
472 	ifq_unlock_all(ifq);
473 	ifnet_unlock();
474 	return (error);
475 }
476 
477 /*
478  * detach a discipline from the interface.
479  * it is possible that the discipline was already overridden by another
480  * discipline.
481  */
482 int
483 altq_pfdetach(struct pf_altq *a)
484 {
485 	struct ifnet *ifp;
486 	struct ifaltq *ifq;
487 	int error = 0;
488 
489 	ifnet_lock();
490 
491 	ifp = ifunit(a->ifname);
492 	if (ifp == NULL) {
493 		ifnet_unlock();
494 		return (EINVAL);
495 	}
496 	ifq = &ifp->if_snd;
497 
498 	/* if this discipline is no longer referenced, just return */
499 	if (a->altq_disc == NULL) {
500 		ifnet_unlock();
501 		return (0);
502 	}
503 
504 	ifq_lock_all(ifq);
505 
506 	if (a->altq_disc != ifq->altq_disc)
507 		goto back;
508 
509 	if (ifq_is_enabled(ifq))
510 		error = altq_disable_locked(ifq);
511 	if (error == 0)
512 		error = altq_detach_locked(ifq);
513 
514 back:
515 	ifq_unlock_all(ifq);
516 	ifnet_unlock();
517 	return (error);
518 }
519 
520 /*
521  * add a discipline or a queue
522  */
523 int
524 altq_add(struct pf_altq *a)
525 {
526 	int error = 0;
527 
528 	if (a->qname[0] != 0)
529 		return (altq_add_queue(a));
530 
531 	if (machclk_freq == 0)
532 		init_machclk();
533 	if (machclk_freq == 0)
534 		panic("altq_add: no cpu clock");
535 
536 	switch (a->scheduler) {
537 #ifdef ALTQ_CBQ
538 	case ALTQT_CBQ:
539 		error = cbq_add_altq(a);
540 		break;
541 #endif
542 #ifdef ALTQ_PRIQ
543 	case ALTQT_PRIQ:
544 		error = priq_add_altq(a);
545 		break;
546 #endif
547 #ifdef ALTQ_HFSC
548 	case ALTQT_HFSC:
549 		error = hfsc_add_altq(a);
550 		break;
551 #endif
552 #ifdef ALTQ_FAIRQ
553 	case ALTQT_FAIRQ:
554 		error = fairq_add_altq(a);
555 		break;
556 #endif
557 	default:
558 		error = ENXIO;
559 	}
560 
561 	return (error);
562 }
563 
564 /*
565  * remove a discipline or a queue
566  */
567 int
568 altq_remove(struct pf_altq *a)
569 {
570 	int error = 0;
571 
572 	if (a->qname[0] != 0)
573 		return (altq_remove_queue(a));
574 
575 	switch (a->scheduler) {
576 #ifdef ALTQ_CBQ
577 	case ALTQT_CBQ:
578 		error = cbq_remove_altq(a);
579 		break;
580 #endif
581 #ifdef ALTQ_PRIQ
582 	case ALTQT_PRIQ:
583 		error = priq_remove_altq(a);
584 		break;
585 #endif
586 #ifdef ALTQ_HFSC
587 	case ALTQT_HFSC:
588 		error = hfsc_remove_altq(a);
589 		break;
590 #endif
591 #ifdef ALTQ_FAIRQ
592 	case ALTQT_FAIRQ:
593 		error = fairq_remove_altq(a);
594 		break;
595 #endif
596 	default:
597 		error = ENXIO;
598 	}
599 
600 	return (error);
601 }
602 
603 /*
604  * add a queue to the discipline
605  */
606 int
607 altq_add_queue(struct pf_altq *a)
608 {
609 	int error = 0;
610 
611 	switch (a->scheduler) {
612 #ifdef ALTQ_CBQ
613 	case ALTQT_CBQ:
614 		error = cbq_add_queue(a);
615 		break;
616 #endif
617 #ifdef ALTQ_PRIQ
618 	case ALTQT_PRIQ:
619 		error = priq_add_queue(a);
620 		break;
621 #endif
622 #ifdef ALTQ_HFSC
623 	case ALTQT_HFSC:
624 		error = hfsc_add_queue(a);
625 		break;
626 #endif
627 #ifdef ALTQ_FAIRQ
628 	case ALTQT_FAIRQ:
629 		error = fairq_add_queue(a);
630 		break;
631 #endif
632 	default:
633 		error = ENXIO;
634 	}
635 
636 	return (error);
637 }
638 
639 /*
640  * remove a queue from the discipline
641  */
642 int
643 altq_remove_queue(struct pf_altq *a)
644 {
645 	int error = 0;
646 
647 	switch (a->scheduler) {
648 #ifdef ALTQ_CBQ
649 	case ALTQT_CBQ:
650 		error = cbq_remove_queue(a);
651 		break;
652 #endif
653 #ifdef ALTQ_PRIQ
654 	case ALTQT_PRIQ:
655 		error = priq_remove_queue(a);
656 		break;
657 #endif
658 #ifdef ALTQ_HFSC
659 	case ALTQT_HFSC:
660 		error = hfsc_remove_queue(a);
661 		break;
662 #endif
663 #ifdef ALTQ_FAIRQ
664 	case ALTQT_FAIRQ:
665 		error = fairq_remove_queue(a);
666 		break;
667 #endif
668 	default:
669 		error = ENXIO;
670 	}
671 
672 	return (error);
673 }
674 
675 /*
676  * get queue statistics
677  */
678 int
679 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
680 {
681 	int error = 0;
682 
683 	switch (a->scheduler) {
684 #ifdef ALTQ_CBQ
685 	case ALTQT_CBQ:
686 		error = cbq_getqstats(a, ubuf, nbytes);
687 		break;
688 #endif
689 #ifdef ALTQ_PRIQ
690 	case ALTQT_PRIQ:
691 		error = priq_getqstats(a, ubuf, nbytes);
692 		break;
693 #endif
694 #ifdef ALTQ_HFSC
695 	case ALTQT_HFSC:
696 		error = hfsc_getqstats(a, ubuf, nbytes);
697 		break;
698 #endif
699 #ifdef ALTQ_FAIRQ
700 	case ALTQT_FAIRQ:
701 		error = fairq_getqstats(a, ubuf, nbytes);
702 		break;
703 #endif
704 	default:
705 		error = ENXIO;
706 	}
707 
708 	return (error);
709 }
710 
711 /*
712  * read and write diffserv field in IPv4 or IPv6 header
713  */
714 uint8_t
715 read_dsfield(struct mbuf *m, struct altq_pktattr *pktattr)
716 {
717 	struct mbuf *m0;
718 	uint8_t ds_field = 0;
719 
720 	if (pktattr == NULL ||
721 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
722 		return ((uint8_t)0);
723 
724 	/* verify that pattr_hdr is within the mbuf data */
725 	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
726 		if ((pktattr->pattr_hdr >= m0->m_data) &&
727 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
728 			break;
729 	}
730 	if (m0 == NULL) {
731 		/* ick, pattr_hdr is stale */
732 		pktattr->pattr_af = AF_UNSPEC;
733 #ifdef ALTQ_DEBUG
734 		kprintf("read_dsfield: can't locate header!\n");
735 #endif
736 		return ((uint8_t)0);
737 	}
738 
739 	if (pktattr->pattr_af == AF_INET) {
740 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
741 
742 		if (ip->ip_v != 4)
743 			return ((uint8_t)0);	/* version mismatch! */
744 		ds_field = ip->ip_tos;
745 	}
746 #ifdef INET6
747 	else if (pktattr->pattr_af == AF_INET6) {
748 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
749 		uint32_t flowlabel;
750 
751 		flowlabel = ntohl(ip6->ip6_flow);
752 		if ((flowlabel >> 28) != 6)
753 			return ((uint8_t)0);	/* version mismatch! */
754 		ds_field = (flowlabel >> 20) & 0xff;
755 	}
756 #endif
757 	return (ds_field);
758 }
759 
760 void
761 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, uint8_t dsfield)
762 {
763 	struct mbuf *m0;
764 
765 	if (pktattr == NULL ||
766 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
767 		return;
768 
769 	/* verify that pattr_hdr is within the mbuf data */
770 	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
771 		if ((pktattr->pattr_hdr >= m0->m_data) &&
772 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
773 			break;
774 	}
775 	if (m0 == NULL) {
776 		/* ick, pattr_hdr is stale */
777 		pktattr->pattr_af = AF_UNSPEC;
778 #ifdef ALTQ_DEBUG
779 		kprintf("write_dsfield: can't locate header!\n");
780 #endif
781 		return;
782 	}
783 
784 	if (pktattr->pattr_af == AF_INET) {
785 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
786 		uint8_t old;
787 		int32_t sum;
788 
789 		if (ip->ip_v != 4)
790 			return;		/* version mismatch! */
791 		old = ip->ip_tos;
792 		dsfield |= old & 3;	/* leave CU bits */
793 		if (old == dsfield)
794 			return;
795 		ip->ip_tos = dsfield;
796 		/*
797 		 * update checksum (from RFC1624)
798 		 *	   HC' = ~(~HC + ~m + m')
799 		 */
800 		sum = ~ntohs(ip->ip_sum) & 0xffff;
801 		sum += 0xff00 + (~old & 0xff) + dsfield;
802 		sum = (sum >> 16) + (sum & 0xffff);
803 		sum += (sum >> 16);  /* add carry */
804 
805 		ip->ip_sum = htons(~sum & 0xffff);
806 	}
807 #ifdef INET6
808 	else if (pktattr->pattr_af == AF_INET6) {
809 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
810 		uint32_t flowlabel;
811 
812 		flowlabel = ntohl(ip6->ip6_flow);
813 		if ((flowlabel >> 28) != 6)
814 			return;		/* version mismatch! */
815 		flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
816 		ip6->ip6_flow = htonl(flowlabel);
817 	}
818 #endif
819 }
820 
821 /*
822  * high resolution clock support taking advantage of a machine dependent
823  * high resolution time counter (e.g., timestamp counter of intel pentium).
824  * we assume
825  *  - 64-bit-long monotonically-increasing counter
826  *  - frequency range is 100M-4GHz (CPU speed)
827  */
828 /* if pcc is not available or disabled, emulate 256MHz using microtime() */
829 #define	MACHCLK_SHIFT	8
830 
831 static int machclk_usepcc;
832 uint64_t machclk_freq = 0;
833 uint32_t machclk_per_tick = 0;
834 
835 void
836 init_machclk(void)
837 {
838 	callout_init_mp(&tbr_callout);
839 	netmsg_init(&tbr_timeout_netmsg, NULL, &netisr_adone_rport,
840 	    MSGF_PRIORITY, tbr_timeout_dispatch);
841 
842 #ifdef ALTQ_NOPCC
843 	machclk_usepcc = 0;
844 #else
845 	machclk_usepcc = 1;
846 #endif
847 
848 #if defined(__i386__) || defined(__x86_64__)
849 	if (!tsc_mpsync)
850 		machclk_usepcc = 0;
851 #else
852 	machclk_usepcc = 0;
853 #endif
854 
855 	if (!machclk_usepcc) {
856 		/* emulate 256MHz using microtime() */
857 		machclk_freq = 1000000LLU << MACHCLK_SHIFT;
858 		machclk_per_tick = machclk_freq / hz;
859 #ifdef ALTQ_DEBUG
860 		kprintf("altq: emulate %juHz cpu clock\n",
861 		    (uintmax_t)machclk_freq);
862 #endif
863 		return;
864 	}
865 
866 	/*
867 	 * If the clock frequency (of Pentium TSC) is accessible,
868 	 * just use it.
869 	 */
870 #ifdef _RDTSC_SUPPORTED_
871 	if (tsc_present)
872 		machclk_freq = (uint64_t)tsc_frequency;
873 #endif
874 
875 	/*
876 	 * If we don't know the clock frequency, measure it.
877 	 */
878 	if (machclk_freq == 0) {
879 		static int	wait;
880 		struct timeval	tv_start, tv_end;
881 		uint64_t	start, end, diff;
882 		int		timo;
883 
884 		microtime(&tv_start);
885 		start = read_machclk();
886 		timo = hz;	/* 1 sec */
887 		tsleep(&wait, PCATCH, "init_machclk", timo);
888 		microtime(&tv_end);
889 		end = read_machclk();
890 		diff = (uint64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
891 		    + tv_end.tv_usec - tv_start.tv_usec;
892 		if (diff != 0)
893 			machclk_freq = (end - start) * 1000000 / diff;
894 	}
895 
896 	machclk_per_tick = machclk_freq / hz;
897 
898 #ifdef ALTQ_DEBUG
899 	kprintf("altq: CPU clock: %juHz\n", (uintmax_t)machclk_freq);
900 #endif
901 }
902 
903 uint64_t
904 read_machclk(void)
905 {
906 	uint64_t val;
907 
908 	if (machclk_usepcc) {
909 #ifdef _RDTSC_SUPPORTED_
910 		val = rdtsc();
911 #else
912 		panic("read_machclk");
913 #endif
914 	} else {
915 		struct timeval tv;
916 
917 		microtime(&tv);
918 		val = (((uint64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
919 		    + tv.tv_usec) << MACHCLK_SHIFT);
920 	}
921 	return (val);
922 }
923