xref: /dragonfly/sys/net/altq/altq_subr.c (revision 8edfbc5e)
1 /*	$KAME: altq_subr.c,v 1.23 2004/04/20 16:10:06 itojun Exp $	*/
2 
3 /*
4  * Copyright (C) 1997-2003
5  *	Sony Computer Science Laboratories Inc.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include "opt_altq.h"
30 #include "opt_inet.h"
31 #include "opt_inet6.h"
32 
33 #include <sys/param.h>
34 #include <sys/malloc.h>
35 #include <sys/mbuf.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/socket.h>
39 #include <sys/socketvar.h>
40 #include <sys/kernel.h>
41 #include <sys/callout.h>
42 #include <sys/errno.h>
43 #include <sys/syslog.h>
44 #include <sys/sysctl.h>
45 #include <sys/queue.h>
46 #include <sys/thread2.h>
47 
48 #include <net/if.h>
49 #include <net/if_dl.h>
50 #include <net/if_types.h>
51 #include <net/ifq_var.h>
52 #include <net/netmsg2.h>
53 #include <net/netisr2.h>
54 
55 #include <netinet/in.h>
56 #include <netinet/in_systm.h>
57 #include <netinet/ip.h>
58 #ifdef INET6
59 #include <netinet/ip6.h>
60 #endif
61 #include <netinet/tcp.h>
62 #include <netinet/udp.h>
63 
64 #include <net/pf/pfvar.h>
65 #include <net/altq/altq.h>
66 
67 /* machine dependent clock related includes */
68 #include <machine/clock.h>		/* for tsc_frequency */
69 #include <machine/md_var.h>		/* for cpu_feature */
70 #include <machine/specialreg.h>		/* for CPUID_TSC */
71 
72 /*
73  * internal function prototypes
74  */
75 static void	tbr_timeout(void *);
76 static void	tbr_timeout_dispatch(netmsg_t);
77 static int	altq_enable_locked(struct ifaltq *);
78 static int	altq_disable_locked(struct ifaltq *);
79 static int	altq_detach_locked(struct ifaltq *);
80 static int	tbr_set_locked(struct ifaltq *, struct tb_profile *);
81 
82 int (*altq_input)(struct mbuf *, int) = NULL;
83 static int tbr_timer = 0;	/* token bucket regulator timer */
84 static struct callout tbr_callout;
85 static struct netmsg_base tbr_timeout_netmsg;
86 
87 int pfaltq_running;	/* keep track of running state */
88 
89 MALLOC_DEFINE(M_ALTQ, "altq", "ALTQ structures");
90 
91 /*
92  * alternate queueing support routines
93  */
94 
95 /* look up the queue state by the interface name and the queueing type. */
96 void *
97 altq_lookup(const char *name, int type)
98 {
99 	struct ifnet *ifp;
100 
101 	if ((ifp = ifunit(name)) != NULL) {
102 		if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
103 			return (ifp->if_snd.altq_disc);
104 	}
105 
106 	return (NULL);
107 }
108 
109 int
110 altq_attach(struct ifaltq *ifq, int type, void *discipline,
111     altq_mapsubq_t mapsubq,
112     ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request,
113     void *clfier,
114     void *(*classify)(struct ifaltq *, struct mbuf *, struct altq_pktattr *))
115 {
116 	if (!ifq_is_ready(ifq))
117 		return ENXIO;
118 
119 	ifq->altq_type     = type;
120 	ifq->altq_disc     = discipline;
121 	ifq->altq_clfier   = clfier;
122 	ifq->altq_classify = classify;
123 	ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
124 	ifq_set_methods(ifq, mapsubq, enqueue, dequeue, request);
125 	return 0;
126 }
127 
128 static int
129 altq_detach_locked(struct ifaltq *ifq)
130 {
131 	if (!ifq_is_ready(ifq))
132 		return ENXIO;
133 	if (ifq_is_enabled(ifq))
134 		return EBUSY;
135 	if (!ifq_is_attached(ifq))
136 		return (0);
137 
138 	ifq_set_classic(ifq);
139 	ifq->altq_type     = ALTQT_NONE;
140 	ifq->altq_disc     = NULL;
141 	ifq->altq_clfier   = NULL;
142 	ifq->altq_classify = NULL;
143 	ifq->altq_flags &= ALTQF_CANTCHANGE;
144 	return 0;
145 }
146 
147 int
148 altq_detach(struct ifaltq *ifq)
149 {
150 	int error;
151 
152 	ifq_lock_all(ifq);
153 	error = altq_detach_locked(ifq);
154 	ifq_unlock_all(ifq);
155 	return error;
156 }
157 
158 static int
159 altq_enable_locked(struct ifaltq *ifq)
160 {
161 	if (!ifq_is_ready(ifq))
162 		return ENXIO;
163 	if (ifq_is_enabled(ifq))
164 		return 0;
165 
166 	ifq_purge_all_locked(ifq);
167 
168 	ifq->altq_flags |= ALTQF_ENABLED;
169 	if (ifq->altq_clfier != NULL)
170 		ifq->altq_flags |= ALTQF_CLASSIFY;
171 	return 0;
172 }
173 
174 int
175 altq_enable(struct ifaltq *ifq)
176 {
177 	int error;
178 
179 	ifq_lock_all(ifq);
180 	error = altq_enable_locked(ifq);
181 	ifq_unlock_all(ifq);
182 	return error;
183 }
184 
185 static int
186 altq_disable_locked(struct ifaltq *ifq)
187 {
188 	if (!ifq_is_enabled(ifq))
189 		return 0;
190 
191 	ifq_purge_all_locked(ifq);
192 	ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
193 	return 0;
194 }
195 
196 int
197 altq_disable(struct ifaltq *ifq)
198 {
199 	int error;
200 
201 	ifq_lock_all(ifq);
202 	error = altq_disable_locked(ifq);
203 	ifq_unlock_all(ifq);
204 	return error;
205 }
206 
207 /*
208  * internal representation of token bucket parameters
209  *	rate:	byte_per_unittime << 32
210  *		(((bits_per_sec) / 8) << 32) / machclk_freq
211  *	depth:	byte << 32
212  *
213  */
214 #define	TBR_SHIFT	32
215 #define	TBR_SCALE(x)	((int64_t)(x) << TBR_SHIFT)
216 #define	TBR_UNSCALE(x)	((x) >> TBR_SHIFT)
217 
218 struct mbuf *
219 tbr_dequeue(struct ifaltq_subque *ifsq, int op)
220 {
221 	struct ifaltq *ifq = ifsq->ifsq_altq;
222 	struct tb_regulator *tbr;
223 	struct mbuf *m;
224 	int64_t interval;
225 	uint64_t now;
226 
227 	if (ifsq_get_index(ifsq) != ALTQ_SUBQ_INDEX_DEFAULT) {
228 		/*
229 		 * Race happened, the unrelated subqueue was
230 		 * picked during the packet scheduler transition.
231 		 */
232 		ifsq_classic_request(ifsq, ALTRQ_PURGE, NULL);
233 		return NULL;
234 	}
235 
236 	crit_enter();
237 	tbr = ifq->altq_tbr;
238 	if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
239 		/* if this is a remove after poll, bypass tbr check */
240 	} else {
241 		/* update token only when it is negative */
242 		if (tbr->tbr_token <= 0) {
243 			now = read_machclk();
244 			interval = now - tbr->tbr_last;
245 			if (interval >= tbr->tbr_filluptime)
246 				tbr->tbr_token = tbr->tbr_depth;
247 			else {
248 				tbr->tbr_token += interval * tbr->tbr_rate;
249 				if (tbr->tbr_token > tbr->tbr_depth)
250 					tbr->tbr_token = tbr->tbr_depth;
251 			}
252 			tbr->tbr_last = now;
253 		}
254 		/* if token is still negative, don't allow dequeue */
255 		if (tbr->tbr_token <= 0) {
256 			crit_exit();
257 			return (NULL);
258 		}
259 	}
260 
261 	if (ifq_is_enabled(ifq))
262 		m = (*ifsq->ifsq_dequeue)(ifsq, op);
263 	else
264 		m = ifsq_classic_dequeue(ifsq, op);
265 
266 	if (m != NULL && op == ALTDQ_REMOVE)
267 		tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
268 	tbr->tbr_lastop = op;
269 	crit_exit();
270 	return (m);
271 }
272 
273 /*
274  * set a token bucket regulator.
275  * if the specified rate is zero, the token bucket regulator is deleted.
276  */
277 static int
278 tbr_set_locked(struct ifaltq *ifq, struct tb_profile *profile)
279 {
280 	struct tb_regulator *tbr, *otbr;
281 
282 	if (machclk_freq == 0)
283 		init_machclk();
284 	if (machclk_freq == 0) {
285 		kprintf("%s: no cpu clock available!\n", __func__);
286 		return (ENXIO);
287 	}
288 
289 	if (profile->rate == 0) {
290 		/* delete this tbr */
291 		if ((tbr = ifq->altq_tbr) == NULL)
292 			return (ENOENT);
293 		ifq->altq_tbr = NULL;
294 		kfree(tbr, M_ALTQ);
295 		return (0);
296 	}
297 
298 	tbr = kmalloc(sizeof(*tbr), M_ALTQ, M_WAITOK | M_ZERO);
299 	tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
300 	tbr->tbr_depth = TBR_SCALE(profile->depth);
301 	if (tbr->tbr_rate > 0)
302 		tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
303 	else
304 		tbr->tbr_filluptime = 0xffffffffffffffffLL;
305 	tbr->tbr_token = tbr->tbr_depth;
306 	tbr->tbr_last = read_machclk();
307 	tbr->tbr_lastop = ALTDQ_REMOVE;
308 
309 	otbr = ifq->altq_tbr;
310 	ifq->altq_tbr = tbr;	/* set the new tbr */
311 
312 	if (otbr != NULL)
313 		kfree(otbr, M_ALTQ);
314 	else if (tbr_timer == 0) {
315 		callout_reset_bycpu(&tbr_callout, 1, tbr_timeout, NULL, 0);
316 		tbr_timer = 1;
317 	}
318 	return (0);
319 }
320 
321 int
322 tbr_set(struct ifaltq *ifq, struct tb_profile *profile)
323 {
324 	int error;
325 
326 	ifq_lock_all(ifq);
327 	error = tbr_set_locked(ifq, profile);
328 	ifq_unlock_all(ifq);
329 	return error;
330 }
331 
332 static void
333 tbr_timeout(void *arg __unused)
334 {
335 	struct lwkt_msg *lmsg = &tbr_timeout_netmsg.lmsg;
336 
337 	KASSERT(mycpuid == 0, ("not on cpu0"));
338 	crit_enter();
339 	if (lmsg->ms_flags & MSGF_DONE)
340 		lwkt_sendmsg_oncpu(netisr_cpuport(0), lmsg);
341 	crit_exit();
342 }
343 
344 /*
345  * tbr_timeout goes through the interface list, and kicks the drivers
346  * if necessary.
347  */
348 static void
349 tbr_timeout_dispatch(netmsg_t nmsg)
350 {
351 	const struct ifnet_array *arr;
352 	int active, i;
353 
354 	ASSERT_IN_NETISR(0);
355 
356 	crit_enter();
357 	lwkt_replymsg(&nmsg->lmsg, 0);	/* reply ASAP */
358 	crit_exit();
359 
360 	active = 0;
361 	arr = ifnet_array_get();
362 	for (i = 0; i < arr->ifnet_count; ++i) {
363 		struct ifnet *ifp = arr->ifnet_arr[i];
364 		struct ifaltq_subque *ifsq;
365 
366 		if (ifp->if_snd.altq_tbr == NULL)
367 			continue;
368 
369 		ifsq = &ifp->if_snd.altq_subq[ALTQ_SUBQ_INDEX_DEFAULT];
370 		active++;
371 		if (!ifsq_is_empty(ifsq) && ifp->if_start != NULL) {
372 			ifsq_serialize_hw(ifsq);
373 			(*ifp->if_start)(ifp, ifsq);
374 			ifsq_deserialize_hw(ifsq);
375 		}
376 	}
377 	if (active > 0)
378 		callout_reset(&tbr_callout, 1, tbr_timeout, NULL);
379 	else
380 		tbr_timer = 0;	/* don't need tbr_timer anymore */
381 }
382 
383 /*
384  * get token bucket regulator profile
385  */
386 int
387 tbr_get(struct ifaltq *ifq, struct tb_profile *profile)
388 {
389 	struct tb_regulator *tbr;
390 
391 	if ((tbr = ifq->altq_tbr) == NULL) {
392 		profile->rate = 0;
393 		profile->depth = 0;
394 	} else {
395 		profile->rate =
396 		    (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
397 		profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
398 	}
399 	return (0);
400 }
401 
402 /*
403  * attach a discipline to the interface.  if one already exists, it is
404  * overridden.
405  */
406 int
407 altq_pfattach(struct pf_altq *a)
408 {
409 	struct ifaltq *ifq;
410 	struct ifnet *ifp;
411 	int error;
412 
413 	if (a->scheduler == ALTQT_NONE)
414 		return 0;
415 
416 	if (a->altq_disc == NULL)
417 		return EINVAL;
418 
419 	ifnet_lock();
420 
421 	ifp = ifunit(a->ifname);
422 	if (ifp == NULL) {
423 		ifnet_unlock();
424 		return EINVAL;
425 	}
426 	ifq = &ifp->if_snd;
427 
428 	ifq_lock_all(ifq);
429 
430 	switch (a->scheduler) {
431 #ifdef ALTQ_CBQ
432 	case ALTQT_CBQ:
433 		error = cbq_pfattach(a, ifq);
434 		break;
435 #endif
436 #ifdef ALTQ_PRIQ
437 	case ALTQT_PRIQ:
438 		error = priq_pfattach(a, ifq);
439 		break;
440 #endif
441 #ifdef ALTQ_HFSC
442 	case ALTQT_HFSC:
443 		error = hfsc_pfattach(a, ifq);
444 		break;
445 #endif
446 #ifdef ALTQ_FAIRQ
447 	case ALTQT_FAIRQ:
448 		error = fairq_pfattach(a, ifq);
449 		break;
450 #endif
451 	default:
452 		error = ENXIO;
453 		goto back;
454 	}
455 
456 	/* if the state is running, enable altq */
457 	if (error == 0 && pfaltq_running && ifq->altq_type != ALTQT_NONE &&
458 	    !ifq_is_enabled(ifq))
459 		error = altq_enable_locked(ifq);
460 
461 	/* if altq is already enabled, reset set tokenbucket regulator */
462 	if (error == 0 && ifq_is_enabled(ifq)) {
463 		struct tb_profile tb;
464 
465 		tb.rate = a->ifbandwidth;
466 		tb.depth = a->tbrsize;
467 		error = tbr_set_locked(ifq, &tb);
468 	}
469 back:
470 	ifq_unlock_all(ifq);
471 	ifnet_unlock();
472 	return (error);
473 }
474 
475 /*
476  * detach a discipline from the interface.
477  * it is possible that the discipline was already overridden by another
478  * discipline.
479  */
480 int
481 altq_pfdetach(struct pf_altq *a)
482 {
483 	struct ifnet *ifp;
484 	struct ifaltq *ifq;
485 	int error = 0;
486 
487 	ifnet_lock();
488 
489 	ifp = ifunit(a->ifname);
490 	if (ifp == NULL) {
491 		ifnet_unlock();
492 		return (EINVAL);
493 	}
494 	ifq = &ifp->if_snd;
495 
496 	/* if this discipline is no longer referenced, just return */
497 	if (a->altq_disc == NULL) {
498 		ifnet_unlock();
499 		return (0);
500 	}
501 
502 	ifq_lock_all(ifq);
503 
504 	if (a->altq_disc != ifq->altq_disc)
505 		goto back;
506 
507 	if (ifq_is_enabled(ifq))
508 		error = altq_disable_locked(ifq);
509 	if (error == 0)
510 		error = altq_detach_locked(ifq);
511 
512 back:
513 	ifq_unlock_all(ifq);
514 	ifnet_unlock();
515 	return (error);
516 }
517 
518 /*
519  * add a discipline or a queue
520  */
521 int
522 altq_add(struct pf_altq *a)
523 {
524 	int error = 0;
525 
526 	if (a->qname[0] != 0)
527 		return (altq_add_queue(a));
528 
529 	if (machclk_freq == 0)
530 		init_machclk();
531 	if (machclk_freq == 0)
532 		panic("altq_add: no cpu clock");
533 
534 	switch (a->scheduler) {
535 #ifdef ALTQ_CBQ
536 	case ALTQT_CBQ:
537 		error = cbq_add_altq(a);
538 		break;
539 #endif
540 #ifdef ALTQ_PRIQ
541 	case ALTQT_PRIQ:
542 		error = priq_add_altq(a);
543 		break;
544 #endif
545 #ifdef ALTQ_HFSC
546 	case ALTQT_HFSC:
547 		error = hfsc_add_altq(a);
548 		break;
549 #endif
550 #ifdef ALTQ_FAIRQ
551 	case ALTQT_FAIRQ:
552 		error = fairq_add_altq(a);
553 		break;
554 #endif
555 	default:
556 		error = ENXIO;
557 	}
558 
559 	return (error);
560 }
561 
562 /*
563  * remove a discipline or a queue
564  */
565 int
566 altq_remove(struct pf_altq *a)
567 {
568 	int error = 0;
569 
570 	if (a->qname[0] != 0)
571 		return (altq_remove_queue(a));
572 
573 	switch (a->scheduler) {
574 #ifdef ALTQ_CBQ
575 	case ALTQT_CBQ:
576 		error = cbq_remove_altq(a);
577 		break;
578 #endif
579 #ifdef ALTQ_PRIQ
580 	case ALTQT_PRIQ:
581 		error = priq_remove_altq(a);
582 		break;
583 #endif
584 #ifdef ALTQ_HFSC
585 	case ALTQT_HFSC:
586 		error = hfsc_remove_altq(a);
587 		break;
588 #endif
589 #ifdef ALTQ_FAIRQ
590 	case ALTQT_FAIRQ:
591 		error = fairq_remove_altq(a);
592 		break;
593 #endif
594 	default:
595 		error = ENXIO;
596 	}
597 
598 	return (error);
599 }
600 
601 /*
602  * add a queue to the discipline
603  */
604 int
605 altq_add_queue(struct pf_altq *a)
606 {
607 	int error = 0;
608 
609 	switch (a->scheduler) {
610 #ifdef ALTQ_CBQ
611 	case ALTQT_CBQ:
612 		error = cbq_add_queue(a);
613 		break;
614 #endif
615 #ifdef ALTQ_PRIQ
616 	case ALTQT_PRIQ:
617 		error = priq_add_queue(a);
618 		break;
619 #endif
620 #ifdef ALTQ_HFSC
621 	case ALTQT_HFSC:
622 		error = hfsc_add_queue(a);
623 		break;
624 #endif
625 #ifdef ALTQ_FAIRQ
626 	case ALTQT_FAIRQ:
627 		error = fairq_add_queue(a);
628 		break;
629 #endif
630 	default:
631 		error = ENXIO;
632 	}
633 
634 	return (error);
635 }
636 
637 /*
638  * remove a queue from the discipline
639  */
640 int
641 altq_remove_queue(struct pf_altq *a)
642 {
643 	int error = 0;
644 
645 	switch (a->scheduler) {
646 #ifdef ALTQ_CBQ
647 	case ALTQT_CBQ:
648 		error = cbq_remove_queue(a);
649 		break;
650 #endif
651 #ifdef ALTQ_PRIQ
652 	case ALTQT_PRIQ:
653 		error = priq_remove_queue(a);
654 		break;
655 #endif
656 #ifdef ALTQ_HFSC
657 	case ALTQT_HFSC:
658 		error = hfsc_remove_queue(a);
659 		break;
660 #endif
661 #ifdef ALTQ_FAIRQ
662 	case ALTQT_FAIRQ:
663 		error = fairq_remove_queue(a);
664 		break;
665 #endif
666 	default:
667 		error = ENXIO;
668 	}
669 
670 	return (error);
671 }
672 
673 /*
674  * get queue statistics
675  */
676 int
677 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
678 {
679 	int error = 0;
680 
681 	switch (a->scheduler) {
682 #ifdef ALTQ_CBQ
683 	case ALTQT_CBQ:
684 		error = cbq_getqstats(a, ubuf, nbytes);
685 		break;
686 #endif
687 #ifdef ALTQ_PRIQ
688 	case ALTQT_PRIQ:
689 		error = priq_getqstats(a, ubuf, nbytes);
690 		break;
691 #endif
692 #ifdef ALTQ_HFSC
693 	case ALTQT_HFSC:
694 		error = hfsc_getqstats(a, ubuf, nbytes);
695 		break;
696 #endif
697 #ifdef ALTQ_FAIRQ
698 	case ALTQT_FAIRQ:
699 		error = fairq_getqstats(a, ubuf, nbytes);
700 		break;
701 #endif
702 	default:
703 		error = ENXIO;
704 	}
705 
706 	return (error);
707 }
708 
709 /*
710  * read and write diffserv field in IPv4 or IPv6 header
711  */
712 uint8_t
713 read_dsfield(struct mbuf *m, struct altq_pktattr *pktattr)
714 {
715 	struct mbuf *m0;
716 	uint8_t ds_field = 0;
717 
718 	if (pktattr == NULL ||
719 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
720 		return ((uint8_t)0);
721 
722 	/* verify that pattr_hdr is within the mbuf data */
723 	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
724 		if ((pktattr->pattr_hdr >= m0->m_data) &&
725 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
726 			break;
727 	}
728 	if (m0 == NULL) {
729 		/* ick, pattr_hdr is stale */
730 		pktattr->pattr_af = AF_UNSPEC;
731 #ifdef ALTQ_DEBUG
732 		kprintf("read_dsfield: can't locate header!\n");
733 #endif
734 		return ((uint8_t)0);
735 	}
736 
737 	if (pktattr->pattr_af == AF_INET) {
738 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
739 
740 		if (ip->ip_v != 4)
741 			return ((uint8_t)0);	/* version mismatch! */
742 		ds_field = ip->ip_tos;
743 	}
744 #ifdef INET6
745 	else if (pktattr->pattr_af == AF_INET6) {
746 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
747 		uint32_t flowlabel;
748 
749 		flowlabel = ntohl(ip6->ip6_flow);
750 		if ((flowlabel >> 28) != 6)
751 			return ((uint8_t)0);	/* version mismatch! */
752 		ds_field = (flowlabel >> 20) & 0xff;
753 	}
754 #endif
755 	return (ds_field);
756 }
757 
758 void
759 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, uint8_t dsfield)
760 {
761 	struct mbuf *m0;
762 
763 	if (pktattr == NULL ||
764 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
765 		return;
766 
767 	/* verify that pattr_hdr is within the mbuf data */
768 	for (m0 = m; m0 != NULL; m0 = m0->m_next) {
769 		if ((pktattr->pattr_hdr >= m0->m_data) &&
770 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
771 			break;
772 	}
773 	if (m0 == NULL) {
774 		/* ick, pattr_hdr is stale */
775 		pktattr->pattr_af = AF_UNSPEC;
776 #ifdef ALTQ_DEBUG
777 		kprintf("write_dsfield: can't locate header!\n");
778 #endif
779 		return;
780 	}
781 
782 	if (pktattr->pattr_af == AF_INET) {
783 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
784 		uint8_t old;
785 		int32_t sum;
786 
787 		if (ip->ip_v != 4)
788 			return;		/* version mismatch! */
789 		old = ip->ip_tos;
790 		dsfield |= old & 3;	/* leave CU bits */
791 		if (old == dsfield)
792 			return;
793 		ip->ip_tos = dsfield;
794 		/*
795 		 * update checksum (from RFC1624)
796 		 *	   HC' = ~(~HC + ~m + m')
797 		 */
798 		sum = ~ntohs(ip->ip_sum) & 0xffff;
799 		sum += 0xff00 + (~old & 0xff) + dsfield;
800 		sum = (sum >> 16) + (sum & 0xffff);
801 		sum += (sum >> 16);  /* add carry */
802 
803 		ip->ip_sum = htons(~sum & 0xffff);
804 	}
805 #ifdef INET6
806 	else if (pktattr->pattr_af == AF_INET6) {
807 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
808 		uint32_t flowlabel;
809 
810 		flowlabel = ntohl(ip6->ip6_flow);
811 		if ((flowlabel >> 28) != 6)
812 			return;		/* version mismatch! */
813 		flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
814 		ip6->ip6_flow = htonl(flowlabel);
815 	}
816 #endif
817 }
818 
819 /*
820  * high resolution clock support taking advantage of a machine dependent
821  * high resolution time counter (e.g., timestamp counter of intel pentium).
822  * we assume
823  *  - 64-bit-long monotonically-increasing counter
824  *  - frequency range is 100M-4GHz (CPU speed)
825  */
826 /* if pcc is not available or disabled, emulate 256MHz using microtime() */
827 #define	MACHCLK_SHIFT	8
828 
829 static int machclk_usepcc;
830 uint64_t machclk_freq = 0;
831 uint32_t machclk_per_tick = 0;
832 
833 void
834 init_machclk(void)
835 {
836 	callout_init_mp(&tbr_callout);
837 	netmsg_init(&tbr_timeout_netmsg, NULL, &netisr_adone_rport,
838 	    MSGF_PRIORITY, tbr_timeout_dispatch);
839 
840 #ifdef ALTQ_NOPCC
841 	machclk_usepcc = 0;
842 #else
843 	machclk_usepcc = 1;
844 #endif
845 
846 #if defined(__x86_64__)
847 	if (!tsc_mpsync)
848 		machclk_usepcc = 0;
849 #else
850 	machclk_usepcc = 0;
851 #endif
852 
853 	if (!machclk_usepcc) {
854 		/* emulate 256MHz using microtime() */
855 		machclk_freq = 1000000LLU << MACHCLK_SHIFT;
856 		machclk_per_tick = machclk_freq / hz;
857 #ifdef ALTQ_DEBUG
858 		kprintf("altq: emulate %juHz cpu clock\n",
859 		    (uintmax_t)machclk_freq);
860 #endif
861 		return;
862 	}
863 
864 	/*
865 	 * If the clock frequency (of Pentium TSC) is accessible,
866 	 * just use it.
867 	 */
868 #ifdef _RDTSC_SUPPORTED_
869 	if (tsc_present)
870 		machclk_freq = (uint64_t)tsc_frequency;
871 #endif
872 
873 	/*
874 	 * If we don't know the clock frequency, measure it.
875 	 */
876 	if (machclk_freq == 0) {
877 		static int	wait;
878 		struct timeval	tv_start, tv_end;
879 		uint64_t	start, end, diff;
880 		int		timo;
881 
882 		microtime(&tv_start);
883 		start = read_machclk();
884 		timo = hz;	/* 1 sec */
885 		tsleep(&wait, PCATCH, "init_machclk", timo);
886 		microtime(&tv_end);
887 		end = read_machclk();
888 		diff = (uint64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
889 		    + tv_end.tv_usec - tv_start.tv_usec;
890 		if (diff != 0)
891 			machclk_freq = (end - start) * 1000000 / diff;
892 	}
893 
894 	machclk_per_tick = machclk_freq / hz;
895 
896 #ifdef ALTQ_DEBUG
897 	kprintf("altq: CPU clock: %juHz\n", (uintmax_t)machclk_freq);
898 #endif
899 }
900 
901 uint64_t
902 read_machclk(void)
903 {
904 	uint64_t val;
905 
906 	if (machclk_usepcc) {
907 #ifdef _RDTSC_SUPPORTED_
908 		val = rdtsc();
909 #else
910 		panic("read_machclk");
911 #endif
912 	} else {
913 		struct timeval tv;
914 
915 		microtime(&tv);
916 		val = (((uint64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
917 		    + tv.tv_usec) << MACHCLK_SHIFT);
918 	}
919 	return (val);
920 }
921