xref: /dragonfly/sys/platform/pc64/isa/clock.c (revision e98bdfd3)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * Copyright (c) 2008 The DragonFly Project.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * William Jolitz and Don Ahn.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
34  * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $
35  */
36 
37 /*
38  * Routines to handle clock hardware.
39  */
40 
41 /*
42  * inittodr, settodr and support routines written
43  * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at>
44  *
45  * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94
46  */
47 
48 #if 0
49 #include "opt_clock.h"
50 #endif
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/eventhandler.h>
55 #include <sys/time.h>
56 #include <sys/kernel.h>
57 #include <sys/bus.h>
58 #include <sys/sysctl.h>
59 #include <sys/cons.h>
60 #include <sys/kbio.h>
61 #include <sys/systimer.h>
62 #include <sys/globaldata.h>
63 #include <sys/machintr.h>
64 #include <sys/interrupt.h>
65 
66 #include <sys/thread2.h>
67 
68 #include <machine/clock.h>
69 #include <machine/cputypes.h>
70 #include <machine/frame.h>
71 #include <machine/ipl.h>
72 #include <machine/limits.h>
73 #include <machine/md_var.h>
74 #include <machine/psl.h>
75 #include <machine/segments.h>
76 #include <machine/smp.h>
77 #include <machine/specialreg.h>
78 #include <machine/intr_machdep.h>
79 
80 #include <machine_base/apic/ioapic.h>
81 #include <machine_base/apic/ioapic_abi.h>
82 #include <machine_base/icu/icu.h>
83 #include <bus/isa/isa.h>
84 #include <bus/isa/rtc.h>
85 #include <machine_base/isa/timerreg.h>
86 
87 static void i8254_restore(void);
88 static void resettodr_on_shutdown(void *arg __unused);
89 
90 /*
91  * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
92  * can use a simple formula for leap years.
93  */
94 #define	LEAPYEAR(y) ((u_int)(y) % 4 == 0)
95 #define DAYSPERYEAR   (31+28+31+30+31+30+31+31+30+31+30+31)
96 
97 #ifndef TIMER_FREQ
98 #define TIMER_FREQ   1193182
99 #endif
100 
101 static uint8_t i8254_walltimer_sel;
102 static uint16_t i8254_walltimer_cntr;
103 
104 int	adjkerntz;		/* local offset from GMT in seconds */
105 int	disable_rtc_set;	/* disable resettodr() if != 0 */
106 int	tsc_present;
107 int	tsc_invariant;
108 int	tsc_mpsync;
109 int64_t	tsc_frequency;
110 int	tsc_is_broken;
111 int	wall_cmos_clock;	/* wall CMOS clock assumed if != 0 */
112 int	timer0_running;
113 enum tstate { RELEASED, ACQUIRED };
114 enum tstate timer0_state;
115 enum tstate timer1_state;
116 enum tstate timer2_state;
117 
118 static	int	beeping = 0;
119 static	const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
120 static	u_char	rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
121 static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
122 static  int	rtc_loaded;
123 
124 static int i8254_cputimer_div;
125 
126 static int i8254_nointr;
127 static int i8254_intr_disable = 1;
128 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable);
129 
130 static struct callout sysbeepstop_ch;
131 
132 static sysclock_t i8254_cputimer_count(void);
133 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last);
134 static void i8254_cputimer_destruct(struct cputimer *cputimer);
135 
136 static struct cputimer	i8254_cputimer = {
137     SLIST_ENTRY_INITIALIZER,
138     "i8254",
139     CPUTIMER_PRI_8254,
140     0,
141     i8254_cputimer_count,
142     cputimer_default_fromhz,
143     cputimer_default_fromus,
144     i8254_cputimer_construct,
145     i8254_cputimer_destruct,
146     TIMER_FREQ,
147     0, 0, 0
148 };
149 
150 static sysclock_t tsc_cputimer_count_mfence(void);
151 static sysclock_t tsc_cputimer_count_lfence(void);
152 static void tsc_cputimer_construct(struct cputimer *, sysclock_t);
153 
154 static struct cputimer	tsc_cputimer = {
155     SLIST_ENTRY_INITIALIZER,
156     "TSC",
157     CPUTIMER_PRI_TSC,
158     CPUTIMER_TSC,
159     tsc_cputimer_count_mfence, /* safe bet */
160     cputimer_default_fromhz,
161     cputimer_default_fromus,
162     tsc_cputimer_construct,
163     cputimer_default_destruct,
164     0,
165     0, 0, 0
166 };
167 
168 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t);
169 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *);
170 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t);
171 
172 static struct cputimer_intr i8254_cputimer_intr = {
173     .freq = TIMER_FREQ,
174     .reload = i8254_intr_reload,
175     .enable = cputimer_intr_default_enable,
176     .config = i8254_intr_config,
177     .restart = cputimer_intr_default_restart,
178     .pmfixup = cputimer_intr_default_pmfixup,
179     .initclock = i8254_intr_initclock,
180     .next = SLIST_ENTRY_INITIALIZER,
181     .name = "i8254",
182     .type = CPUTIMER_INTR_8254,
183     .prio = CPUTIMER_INTR_PRIO_8254,
184     .caps = CPUTIMER_INTR_CAP_PS
185 };
186 
187 /*
188  * timer0 clock interrupt.  Timer0 is in one-shot mode and has stopped
189  * counting as of this interrupt.  We use timer1 in free-running mode (not
190  * generating any interrupts) as our main counter.  Each cpu has timeouts
191  * pending.
192  *
193  * This code is INTR_MPSAFE and may be called without the BGL held.
194  */
195 static void
196 clkintr(void *dummy, void *frame_arg)
197 {
198 	static sysclock_t sysclock_count;	/* NOTE! Must be static */
199 	struct globaldata *gd = mycpu;
200 	struct globaldata *gscan;
201 	int n;
202 
203 	/*
204 	 * SWSTROBE mode is a one-shot, the timer is no longer running
205 	 */
206 	timer0_running = 0;
207 
208 	/*
209 	 * XXX the dispatcher needs work.  right now we call systimer_intr()
210 	 * directly or via IPI for any cpu with systimers queued, which is
211 	 * usually *ALL* of them.  We need to use the LAPIC timer for this.
212 	 */
213 	sysclock_count = sys_cputimer->count();
214 	for (n = 0; n < ncpus; ++n) {
215 	    gscan = globaldata_find(n);
216 	    if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL)
217 		continue;
218 	    if (gscan != gd) {
219 		lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr,
220 				&sysclock_count, 1);
221 	    } else {
222 		systimer_intr(&sysclock_count, 0, frame_arg);
223 	    }
224 	}
225 }
226 
227 
228 /*
229  * NOTE! not MP safe.
230  */
231 int
232 acquire_timer2(int mode)
233 {
234 	if (timer2_state != RELEASED)
235 		return (-1);
236 	timer2_state = ACQUIRED;
237 
238 	/*
239 	 * This access to the timer registers is as atomic as possible
240 	 * because it is a single instruction.  We could do better if we
241 	 * knew the rate.
242 	 */
243 	outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f));
244 	return (0);
245 }
246 
247 int
248 release_timer2(void)
249 {
250 	if (timer2_state != ACQUIRED)
251 		return (-1);
252 	outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT);
253 	timer2_state = RELEASED;
254 	return (0);
255 }
256 
257 #include "opt_ddb.h"
258 #ifdef DDB
259 #include <ddb/ddb.h>
260 
261 DB_SHOW_COMMAND(rtc, rtc)
262 {
263 	kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n",
264 	       rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY),
265 	       rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC),
266 	       rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR));
267 }
268 #endif /* DDB */
269 
270 /*
271  * Return the current cpu timer count as a 32 bit integer.
272  */
273 static
274 sysclock_t
275 i8254_cputimer_count(void)
276 {
277 	static uint16_t cputimer_last;
278 	uint16_t count;
279 	sysclock_t ret;
280 
281 	clock_lock();
282 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH);
283 	count = (uint8_t)inb(i8254_walltimer_cntr);		/* get countdown */
284 	count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8);
285 	count = -count;					/* -> countup */
286 	if (count < cputimer_last)			/* rollover */
287 		i8254_cputimer.base += 0x00010000;
288 	ret = i8254_cputimer.base | count;
289 	cputimer_last = count;
290 	clock_unlock();
291 	return(ret);
292 }
293 
294 /*
295  * This function is called whenever the system timebase changes, allowing
296  * us to calculate what is needed to convert a system timebase tick
297  * into an 8254 tick for the interrupt timer.  If we can convert to a
298  * simple shift, multiplication, or division, we do so.  Otherwise 64
299  * bit arithmatic is required every time the interrupt timer is reloaded.
300  */
301 static void
302 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer)
303 {
304     int freq;
305     int div;
306 
307     /*
308      * Will a simple divide do the trick?
309      */
310     div = (timer->freq + (cti->freq / 2)) / cti->freq;
311     freq = cti->freq * div;
312 
313     if (freq >= timer->freq - 1 && freq <= timer->freq + 1)
314 	i8254_cputimer_div = div;
315     else
316 	i8254_cputimer_div = 0;
317 }
318 
319 /*
320  * Reload for the next timeout.  It is possible for the reload value
321  * to be 0 or negative, indicating that an immediate timer interrupt
322  * is desired.  For now make the minimum 2 ticks.
323  *
324  * We may have to convert from the system timebase to the 8254 timebase.
325  */
326 static void
327 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
328 {
329     uint16_t count;
330 
331     if (i8254_cputimer_div)
332 	reload /= i8254_cputimer_div;
333     else
334 	reload = (int64_t)reload * cti->freq / sys_cputimer->freq;
335 
336     if ((int)reload < 2)
337 	reload = 2;
338 
339     clock_lock();
340     if (timer0_running) {
341 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);	/* count-down timer */
342 	count = (uint8_t)inb(TIMER_CNTR0);		/* lsb */
343 	count |= ((uint8_t)inb(TIMER_CNTR0) << 8);	/* msb */
344 	if (reload < count) {
345 	    outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
346 	    outb(TIMER_CNTR0, (uint8_t)reload); 	/* lsb */
347 	    outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
348 	}
349     } else {
350 	timer0_running = 1;
351 	if (reload > 0xFFFF)
352 	    reload = 0;		/* full count */
353 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
354 	outb(TIMER_CNTR0, (uint8_t)reload); 		/* lsb */
355 	outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
356     }
357     clock_unlock();
358 }
359 
360 /*
361  * DELAY(usec)	     - Spin for the specified number of microseconds.
362  * DRIVERSLEEP(usec) - Spin for the specified number of microseconds,
363  *		       but do a thread switch in the loop
364  *
365  * Relies on timer 1 counting down from (cputimer_freq / hz)
366  * Note: timer had better have been programmed before this is first used!
367  */
368 static void
369 DODELAY(int n, int doswitch)
370 {
371 	ssysclock_t delta, ticks_left;
372 	sysclock_t prev_tick, tick;
373 
374 #ifdef DELAYDEBUG
375 	int getit_calls = 1;
376 	int n1;
377 	static int state = 0;
378 
379 	if (state == 0) {
380 		state = 1;
381 		for (n1 = 1; n1 <= 10000000; n1 *= 10)
382 			DELAY(n1);
383 		state = 2;
384 	}
385 	if (state == 1)
386 		kprintf("DELAY(%d)...", n);
387 #endif
388 	/*
389 	 * Guard against the timer being uninitialized if we are called
390 	 * early for console i/o.
391 	 */
392 	if (timer0_state == RELEASED)
393 		i8254_restore();
394 
395 	/*
396 	 * Read the counter first, so that the rest of the setup overhead is
397 	 * counted.  Then calculate the number of hardware timer ticks
398 	 * required, rounding up to be sure we delay at least the requested
399 	 * number of microseconds.
400 	 */
401 	prev_tick = sys_cputimer->count();
402 	ticks_left = ((u_int)n * (int64_t)sys_cputimer->freq + 999999) /
403 		     1000000;
404 
405 	/*
406 	 * Loop until done.
407 	 */
408 	while (ticks_left > 0) {
409 		tick = sys_cputimer->count();
410 #ifdef DELAYDEBUG
411 		++getit_calls;
412 #endif
413 		delta = tick - prev_tick;
414 		prev_tick = tick;
415 		if (delta < 0)
416 			delta = 0;
417 		ticks_left -= delta;
418 		if (doswitch && ticks_left > 0)
419 			lwkt_switch();
420 		cpu_pause();
421 	}
422 #ifdef DELAYDEBUG
423 	if (state == 1)
424 		kprintf(" %d calls to getit() at %d usec each\n",
425 		       getit_calls, (n + 5) / getit_calls);
426 #endif
427 }
428 
429 /*
430  * DELAY() never switches.
431  */
432 void
433 DELAY(int n)
434 {
435 	DODELAY(n, 0);
436 }
437 
438 /*
439  * Returns non-zero if the specified time period has elapsed.  Call
440  * first with last_clock set to 0.
441  */
442 int
443 CHECKTIMEOUT(TOTALDELAY *tdd)
444 {
445 	sysclock_t delta;
446 	int us;
447 
448 	if (tdd->started == 0) {
449 		if (timer0_state == RELEASED)
450 			i8254_restore();
451 		tdd->last_clock = sys_cputimer->count();
452 		tdd->started = 1;
453 		return(0);
454 	}
455 	delta = sys_cputimer->count() - tdd->last_clock;
456 	us = (u_int64_t)delta * (u_int64_t)1000000 /
457 	     (u_int64_t)sys_cputimer->freq;
458 	tdd->last_clock += (u_int64_t)us * (u_int64_t)sys_cputimer->freq /
459 			   1000000;
460 	tdd->us -= us;
461 	return (tdd->us < 0);
462 }
463 
464 
465 /*
466  * DRIVERSLEEP() does not switch if called with a spinlock held or
467  * from a hard interrupt.
468  */
469 void
470 DRIVERSLEEP(int usec)
471 {
472 	globaldata_t gd = mycpu;
473 
474 	if (gd->gd_intr_nesting_level || gd->gd_spinlocks) {
475 		DODELAY(usec, 0);
476 	} else {
477 		DODELAY(usec, 1);
478 	}
479 }
480 
481 static void
482 sysbeepstop(void *chan)
483 {
484 	outb(IO_PPI, inb(IO_PPI)&0xFC);	/* disable counter2 output to speaker */
485 	beeping = 0;
486 	release_timer2();
487 }
488 
489 int
490 sysbeep(int pitch, int period)
491 {
492 	if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
493 		return(-1);
494 	if (sysbeep_enable == 0)
495 		return(-1);
496 	/*
497 	 * Nobody else is using timer2, we do not need the clock lock
498 	 */
499 	outb(TIMER_CNTR2, pitch);
500 	outb(TIMER_CNTR2, (pitch>>8));
501 	if (!beeping) {
502 		/* enable counter2 output to speaker */
503 		outb(IO_PPI, inb(IO_PPI) | 3);
504 		beeping = period;
505 		callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL);
506 	}
507 	return (0);
508 }
509 
510 /*
511  * RTC support routines
512  */
513 
514 int
515 rtcin(int reg)
516 {
517 	u_char val;
518 
519 	crit_enter();
520 	outb(IO_RTC, reg);
521 	inb(0x84);
522 	val = inb(IO_RTC + 1);
523 	inb(0x84);
524 	crit_exit();
525 	return (val);
526 }
527 
528 static __inline void
529 writertc(u_char reg, u_char val)
530 {
531 	crit_enter();
532 	inb(0x84);
533 	outb(IO_RTC, reg);
534 	inb(0x84);
535 	outb(IO_RTC + 1, val);
536 	inb(0x84);		/* XXX work around wrong order in rtcin() */
537 	crit_exit();
538 }
539 
540 static __inline int
541 readrtc(int port)
542 {
543 	return(bcd2bin(rtcin(port)));
544 }
545 
546 static u_int
547 calibrate_clocks(void)
548 {
549 	u_int64_t old_tsc;
550 	u_int tot_count;
551 	sysclock_t count, prev_count;
552 	int sec, start_sec, timeout;
553 
554 	if (bootverbose)
555 	        kprintf("Calibrating clock(s) ...\n");
556 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
557 		goto fail;
558 	timeout = 100000000;
559 
560 	/* Read the mc146818A seconds counter. */
561 	for (;;) {
562 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
563 			sec = rtcin(RTC_SEC);
564 			break;
565 		}
566 		if (--timeout == 0)
567 			goto fail;
568 	}
569 
570 	/* Wait for the mC146818A seconds counter to change. */
571 	start_sec = sec;
572 	for (;;) {
573 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
574 			sec = rtcin(RTC_SEC);
575 			if (sec != start_sec)
576 				break;
577 		}
578 		if (--timeout == 0)
579 			goto fail;
580 	}
581 
582 	/* Start keeping track of the i8254 counter. */
583 	prev_count = sys_cputimer->count();
584 	tot_count = 0;
585 
586 	if (tsc_present)
587 		old_tsc = rdtsc();
588 	else
589 		old_tsc = 0;		/* shut up gcc */
590 
591 	/*
592 	 * Wait for the mc146818A seconds counter to change.  Read the i8254
593 	 * counter for each iteration since this is convenient and only
594 	 * costs a few usec of inaccuracy. The timing of the final reads
595 	 * of the counters almost matches the timing of the initial reads,
596 	 * so the main cause of inaccuracy is the varying latency from
597 	 * inside getit() or rtcin(RTC_STATUSA) to the beginning of the
598 	 * rtcin(RTC_SEC) that returns a changed seconds count.  The
599 	 * maximum inaccuracy from this cause is < 10 usec on 486's.
600 	 */
601 	start_sec = sec;
602 	for (;;) {
603 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP))
604 			sec = rtcin(RTC_SEC);
605 		count = sys_cputimer->count();
606 		tot_count += (int)(count - prev_count);
607 		prev_count = count;
608 		if (sec != start_sec)
609 			break;
610 		if (--timeout == 0)
611 			goto fail;
612 	}
613 
614 	/*
615 	 * Read the cpu cycle counter.  The timing considerations are
616 	 * similar to those for the i8254 clock.
617 	 */
618 	if (tsc_present) {
619 		tsc_frequency = rdtsc() - old_tsc;
620 	}
621 
622 	if (tsc_present) {
623 		kprintf("TSC%s clock: %llu Hz, ",
624 		    tsc_invariant ? " invariant" : "",
625 		    (long long)tsc_frequency);
626 	}
627 	kprintf("i8254 clock: %u Hz\n", tot_count);
628 	return (tot_count);
629 
630 fail:
631 	kprintf("failed, using default i8254 clock of %u Hz\n",
632 		i8254_cputimer.freq);
633 	return (i8254_cputimer.freq);
634 }
635 
636 static void
637 i8254_restore(void)
638 {
639 	timer0_state = ACQUIRED;
640 
641 	clock_lock();
642 
643 	/*
644 	 * Timer0 is our fine-grained variable clock interrupt
645 	 */
646 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
647 	outb(TIMER_CNTR0, 2);	/* lsb */
648 	outb(TIMER_CNTR0, 0);	/* msb */
649 	clock_unlock();
650 
651 	if (!i8254_nointr) {
652 		cputimer_intr_register(&i8254_cputimer_intr);
653 		cputimer_intr_select(&i8254_cputimer_intr, 0);
654 	}
655 
656 	/*
657 	 * Timer1 or timer2 is our free-running clock, but only if another
658 	 * has not been selected.
659 	 */
660 	cputimer_register(&i8254_cputimer);
661 	cputimer_select(&i8254_cputimer, 0);
662 }
663 
664 static void
665 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
666 {
667  	int which;
668 
669 	/*
670 	 * Should we use timer 1 or timer 2 ?
671 	 */
672 	which = 0;
673 	TUNABLE_INT_FETCH("hw.i8254.walltimer", &which);
674 	if (which != 1 && which != 2)
675 		which = 2;
676 
677 	switch(which) {
678 	case 1:
679 		timer->name = "i8254_timer1";
680 		timer->type = CPUTIMER_8254_SEL1;
681 		i8254_walltimer_sel = TIMER_SEL1;
682 		i8254_walltimer_cntr = TIMER_CNTR1;
683 		timer1_state = ACQUIRED;
684 		break;
685 	case 2:
686 		timer->name = "i8254_timer2";
687 		timer->type = CPUTIMER_8254_SEL2;
688 		i8254_walltimer_sel = TIMER_SEL2;
689 		i8254_walltimer_cntr = TIMER_CNTR2;
690 		timer2_state = ACQUIRED;
691 		break;
692 	}
693 
694 	timer->base = (oldclock + 0xFFFF) & ~0xFFFF;
695 
696 	clock_lock();
697 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT);
698 	outb(i8254_walltimer_cntr, 0);	/* lsb */
699 	outb(i8254_walltimer_cntr, 0);	/* msb */
700 	outb(IO_PPI, inb(IO_PPI) | 1);	/* bit 0: enable gate, bit 1: spkr */
701 	clock_unlock();
702 }
703 
704 static void
705 i8254_cputimer_destruct(struct cputimer *timer)
706 {
707 	switch(timer->type) {
708 	case CPUTIMER_8254_SEL1:
709 	    timer1_state = RELEASED;
710 	    break;
711 	case CPUTIMER_8254_SEL2:
712 	    timer2_state = RELEASED;
713 	    break;
714 	default:
715 	    break;
716 	}
717 	timer->type = 0;
718 }
719 
720 static void
721 rtc_restore(void)
722 {
723 	/* Restore all of the RTC's "status" (actually, control) registers. */
724 	writertc(RTC_STATUSB, RTCSB_24HR);
725 	writertc(RTC_STATUSA, rtc_statusa);
726 	writertc(RTC_STATUSB, rtc_statusb);
727 }
728 
729 /*
730  * Restore all the timers.
731  *
732  * This function is called to resynchronize our core timekeeping after a
733  * long halt, e.g. from apm_default_resume() and friends.  It is also
734  * called if after a BIOS call we have detected munging of the 8254.
735  * It is necessary because cputimer_count() counter's delta may have grown
736  * too large for nanouptime() and friends to handle, or (in the case of 8254
737  * munging) might cause the SYSTIMER code to prematurely trigger.
738  */
739 void
740 timer_restore(void)
741 {
742 	crit_enter();
743 	i8254_restore();		/* restore timer_freq and hz */
744 	rtc_restore();			/* reenable RTC interrupts */
745 	crit_exit();
746 }
747 
748 /*
749  * Initialize 8254 timer 0 early so that it can be used in DELAY().
750  */
751 void
752 startrtclock(void)
753 {
754 	u_int delta, freq;
755 
756 	/*
757 	 * Can we use the TSC?
758 	 *
759 	 * NOTE: If running under qemu, probably a good idea to force the
760 	 *	 TSC because we are not likely to detect it as being
761 	 *	 invariant or mpsyncd if you don't.  This will greatly
762 	 *	 reduce SMP contention.
763 	 */
764 	if (cpu_feature & CPUID_TSC) {
765 		tsc_present = 1;
766 		TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant);
767 
768 		if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
769 		     cpu_vendor_id == CPU_VENDOR_AMD) &&
770 		    cpu_exthigh >= 0x80000007) {
771 			u_int regs[4];
772 
773 			do_cpuid(0x80000007, regs);
774 			if (regs[3] & 0x100)
775 				tsc_invariant = 1;
776 		}
777 	} else {
778 		tsc_present = 0;
779 	}
780 
781 	/*
782 	 * Initial RTC state, don't do anything unexpected
783 	 */
784 	writertc(RTC_STATUSA, rtc_statusa);
785 	writertc(RTC_STATUSB, RTCSB_24HR);
786 
787 	/*
788 	 * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to
789 	 * generate an interrupt, which we will ignore for now.
790 	 *
791 	 * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000
792 	 * (so it counts a full 2^16 and repeats).  We will use this timer
793 	 * for our counting.
794 	 */
795 	i8254_restore();
796 	freq = calibrate_clocks();
797 #ifdef CLK_CALIBRATION_LOOP
798 	if (bootverbose) {
799 		int c;
800 
801 		cnpoll(TRUE);
802 		kprintf("Press a key on the console to "
803 			"abort clock calibration\n");
804 		while ((c = cncheckc()) == -1 || c == NOKEY)
805 			calibrate_clocks();
806 		cnpoll(FALSE);
807 	}
808 #endif
809 
810 	/*
811 	 * Use the calibrated i8254 frequency if it seems reasonable.
812 	 * Otherwise use the default, and don't use the calibrated i586
813 	 * frequency.
814 	 */
815 	delta = freq > i8254_cputimer.freq ?
816 			freq - i8254_cputimer.freq : i8254_cputimer.freq - freq;
817 	if (delta < i8254_cputimer.freq / 100) {
818 #ifndef CLK_USE_I8254_CALIBRATION
819 		if (bootverbose)
820 			kprintf(
821 "CLK_USE_I8254_CALIBRATION not specified - using default frequency\n");
822 		freq = i8254_cputimer.freq;
823 #endif
824 		/*
825 		 * NOTE:
826 		 * Interrupt timer's freq must be adjusted
827 		 * before we change the cuptimer's frequency.
828 		 */
829 		i8254_cputimer_intr.freq = freq;
830 		cputimer_set_frequency(&i8254_cputimer, freq);
831 	} else {
832 		if (bootverbose)
833 			kprintf(
834 		    "%d Hz differs from default of %d Hz by more than 1%%\n",
835 			       freq, i8254_cputimer.freq);
836 		tsc_frequency = 0;
837 	}
838 
839 #ifndef CLK_USE_TSC_CALIBRATION
840 	if (tsc_frequency != 0) {
841 		if (bootverbose)
842 			kprintf(
843 "CLK_USE_TSC_CALIBRATION not specified - using old calibration method\n");
844 		tsc_frequency = 0;
845 	}
846 #endif
847 	if (tsc_present && tsc_frequency == 0) {
848 		/*
849 		 * Calibration of the i586 clock relative to the mc146818A
850 		 * clock failed.  Do a less accurate calibration relative
851 		 * to the i8254 clock.
852 		 */
853 		u_int64_t old_tsc = rdtsc();
854 
855 		DELAY(1000000);
856 		tsc_frequency = rdtsc() - old_tsc;
857 #ifdef CLK_USE_TSC_CALIBRATION
858 		if (bootverbose) {
859 			kprintf("TSC clock: %llu Hz (Method B)\n",
860 				tsc_frequency);
861 		}
862 #endif
863 	}
864 
865 	EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown, NULL, SHUTDOWN_PRI_LAST);
866 }
867 
868 /*
869  * Sync the time of day back to the RTC on shutdown, but only if
870  * we have already loaded it and have not crashed.
871  */
872 static void
873 resettodr_on_shutdown(void *arg __unused)
874 {
875  	if (rtc_loaded && panicstr == NULL) {
876 		resettodr();
877 	}
878 }
879 
880 /*
881  * Initialize the time of day register, based on the time base which is, e.g.
882  * from a filesystem.
883  */
884 void
885 inittodr(time_t base)
886 {
887 	unsigned long	sec, days;
888 	int		year, month;
889 	int		y, m;
890 	struct timespec ts;
891 
892 	if (base) {
893 		ts.tv_sec = base;
894 		ts.tv_nsec = 0;
895 		set_timeofday(&ts);
896 	}
897 
898 	/* Look if we have a RTC present and the time is valid */
899 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
900 		goto wrong_time;
901 
902 	/* wait for time update to complete */
903 	/* If RTCSA_TUP is zero, we have at least 244us before next update */
904 	crit_enter();
905 	while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
906 		crit_exit();
907 		crit_enter();
908 	}
909 
910 	days = 0;
911 #ifdef USE_RTC_CENTURY
912 	year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
913 #else
914 	year = readrtc(RTC_YEAR) + 1900;
915 	if (year < 1970)
916 		year += 100;
917 #endif
918 	if (year < 1970) {
919 		crit_exit();
920 		goto wrong_time;
921 	}
922 	month = readrtc(RTC_MONTH);
923 	for (m = 1; m < month; m++)
924 		days += daysinmonth[m-1];
925 	if ((month > 2) && LEAPYEAR(year))
926 		days ++;
927 	days += readrtc(RTC_DAY) - 1;
928 	for (y = 1970; y < year; y++)
929 		days += DAYSPERYEAR + LEAPYEAR(y);
930 	sec = ((( days * 24 +
931 		  readrtc(RTC_HRS)) * 60 +
932 		  readrtc(RTC_MIN)) * 60 +
933 		  readrtc(RTC_SEC));
934 	/* sec now contains the number of seconds, since Jan 1 1970,
935 	   in the local time zone */
936 
937 	sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
938 
939 	y = (int)(time_second - sec);
940 	if (y <= -2 || y >= 2) {
941 		/* badly off, adjust it */
942 		ts.tv_sec = sec;
943 		ts.tv_nsec = 0;
944 		set_timeofday(&ts);
945 	}
946 	rtc_loaded = 1;
947 	crit_exit();
948 	return;
949 
950 wrong_time:
951 	kprintf("Invalid time in real time clock.\n");
952 	kprintf("Check and reset the date immediately!\n");
953 }
954 
955 /*
956  * Write system time back to RTC
957  */
958 void
959 resettodr(void)
960 {
961 	struct timeval tv;
962 	unsigned long tm;
963 	int m;
964 	int y;
965 
966 	if (disable_rtc_set)
967 		return;
968 
969 	microtime(&tv);
970 	tm = tv.tv_sec;
971 
972 	crit_enter();
973 	/* Disable RTC updates and interrupts. */
974 	writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
975 
976 	/* Calculate local time to put in RTC */
977 
978 	tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
979 
980 	writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60;	/* Write back Seconds */
981 	writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60;	/* Write back Minutes */
982 	writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24;	/* Write back Hours   */
983 
984 	/* We have now the days since 01-01-1970 in tm */
985 	writertc(RTC_WDAY, (tm+4)%7);			/* Write back Weekday */
986 	for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
987 	     tm >= m;
988 	     y++,      m = DAYSPERYEAR + LEAPYEAR(y))
989 	     tm -= m;
990 
991 	/* Now we have the years in y and the day-of-the-year in tm */
992 	writertc(RTC_YEAR, bin2bcd(y%100));		/* Write back Year    */
993 #ifdef USE_RTC_CENTURY
994 	writertc(RTC_CENTURY, bin2bcd(y/100));		/* ... and Century    */
995 #endif
996 	for (m = 0; ; m++) {
997 		int ml;
998 
999 		ml = daysinmonth[m];
1000 		if (m == 1 && LEAPYEAR(y))
1001 			ml++;
1002 		if (tm < ml)
1003 			break;
1004 		tm -= ml;
1005 	}
1006 
1007 	writertc(RTC_MONTH, bin2bcd(m + 1));            /* Write back Month   */
1008 	writertc(RTC_DAY, bin2bcd(tm + 1));             /* Write back Month Day */
1009 
1010 	/* Reenable RTC updates and interrupts. */
1011 	writertc(RTC_STATUSB, rtc_statusb);
1012 	crit_exit();
1013 }
1014 
1015 static int
1016 i8254_ioapic_trial(int irq, struct cputimer_intr *cti)
1017 {
1018 	sysclock_t base;
1019 	long lastcnt;
1020 
1021 	/*
1022 	 * Following code assumes the 8254 is the cpu timer,
1023 	 * so make sure it is.
1024 	 */
1025 	KKASSERT(sys_cputimer == &i8254_cputimer);
1026 	KKASSERT(cti == &i8254_cputimer_intr);
1027 
1028 	lastcnt = get_interrupt_counter(irq, mycpuid);
1029 
1030 	/*
1031 	 * Force an 8254 Timer0 interrupt and wait 1/100s for
1032 	 * it to happen, then see if we got it.
1033 	 */
1034 	kprintf("IOAPIC: testing 8254 interrupt delivery\n");
1035 
1036 	i8254_intr_reload(cti, 2);
1037 	base = sys_cputimer->count();
1038 	while (sys_cputimer->count() - base < sys_cputimer->freq / 100)
1039 		; /* nothing */
1040 
1041 	if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0)
1042 		return ENOENT;
1043 	return 0;
1044 }
1045 
1046 /*
1047  * Start both clocks running.  DragonFly note: the stat clock is no longer
1048  * used.  Instead, 8254 based systimers are used for all major clock
1049  * interrupts.
1050  */
1051 static void
1052 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected)
1053 {
1054 	void *clkdesc = NULL;
1055 	int irq = 0, mixed_mode = 0, error;
1056 
1057 	KKASSERT(mycpuid == 0);
1058 	callout_init_mp(&sysbeepstop_ch);
1059 
1060 	if (!selected && i8254_intr_disable)
1061 		goto nointr;
1062 
1063 	/*
1064 	 * The stat interrupt mask is different without the
1065 	 * statistics clock.  Also, don't set the interrupt
1066 	 * flag which would normally cause the RTC to generate
1067 	 * interrupts.
1068 	 */
1069 	rtc_statusb = RTCSB_24HR;
1070 
1071 	/* Finish initializing 8254 timer 0. */
1072 	if (ioapic_enable) {
1073 		irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE,
1074 			INTR_POLARITY_HIGH);
1075 		if (irq < 0) {
1076 mixed_mode_setup:
1077 			error = ioapic_conf_legacy_extint(0);
1078 			if (!error) {
1079 				irq = machintr_legacy_intr_find(0,
1080 				    INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
1081 				if (irq < 0)
1082 					error = ENOENT;
1083 			}
1084 
1085 			if (error) {
1086 				if (!selected) {
1087 					kprintf("IOAPIC: setup mixed mode for "
1088 						"irq 0 failed: %d\n", error);
1089 					goto nointr;
1090 				} else {
1091 					panic("IOAPIC: setup mixed mode for "
1092 					      "irq 0 failed: %d\n", error);
1093 				}
1094 			}
1095 			mixed_mode = 1;
1096 		}
1097 		clkdesc = register_int(irq, clkintr, NULL, "clk",
1098 				       NULL,
1099 				       INTR_EXCL | INTR_CLOCK |
1100 				       INTR_NOPOLL | INTR_MPSAFE |
1101 				       INTR_NOENTROPY, 0);
1102 	} else {
1103 		register_int(0, clkintr, NULL, "clk", NULL,
1104 			     INTR_EXCL | INTR_CLOCK |
1105 			     INTR_NOPOLL | INTR_MPSAFE |
1106 			     INTR_NOENTROPY, 0);
1107 	}
1108 
1109 	/* Initialize RTC. */
1110 	writertc(RTC_STATUSA, rtc_statusa);
1111 	writertc(RTC_STATUSB, RTCSB_24HR);
1112 
1113 	if (ioapic_enable) {
1114 		error = i8254_ioapic_trial(irq, cti);
1115 		if (error) {
1116 			if (mixed_mode) {
1117 				if (!selected) {
1118 					kprintf("IOAPIC: mixed mode for irq %d "
1119 						"trial failed: %d\n",
1120 						irq, error);
1121 					goto nointr;
1122 				} else {
1123 					panic("IOAPIC: mixed mode for irq %d "
1124 					      "trial failed: %d\n", irq, error);
1125 				}
1126 			} else {
1127 				kprintf("IOAPIC: warning 8254 is not connected "
1128 					"to the correct pin, try mixed mode\n");
1129 				unregister_int(clkdesc, 0);
1130 				goto mixed_mode_setup;
1131 			}
1132 		}
1133 	}
1134 	return;
1135 
1136 nointr:
1137 	i8254_nointr = 1; /* don't try to register again */
1138 	cputimer_intr_deregister(cti);
1139 }
1140 
1141 void
1142 setstatclockrate(int newhz)
1143 {
1144 	if (newhz == RTC_PROFRATE)
1145 		rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF;
1146 	else
1147 		rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
1148 	writertc(RTC_STATUSA, rtc_statusa);
1149 }
1150 
1151 #if 0
1152 static unsigned
1153 tsc_get_timecount(struct timecounter *tc)
1154 {
1155 	return (rdtsc());
1156 }
1157 #endif
1158 
1159 #ifdef KERN_TIMESTAMP
1160 #define KERN_TIMESTAMP_SIZE 16384
1161 static u_long tsc[KERN_TIMESTAMP_SIZE] ;
1162 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc,
1163 	sizeof(tsc), "LU", "Kernel timestamps");
1164 void
1165 _TSTMP(u_int32_t x)
1166 {
1167 	static int i;
1168 
1169 	tsc[i] = (u_int32_t)rdtsc();
1170 	tsc[i+1] = x;
1171 	i = i + 2;
1172 	if (i >= KERN_TIMESTAMP_SIZE)
1173 		i = 0;
1174 	tsc[i] = 0; /* mark last entry */
1175 }
1176 #endif /* KERN_TIMESTAMP */
1177 
1178 /*
1179  *
1180  */
1181 
1182 static int
1183 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS)
1184 {
1185     sysclock_t count;
1186     uint64_t tscval;
1187     char buf[32];
1188 
1189     crit_enter();
1190     if (sys_cputimer == &i8254_cputimer)
1191 	count = sys_cputimer->count();
1192     else
1193 	count = 0;
1194     if (tsc_present)
1195 	tscval = rdtsc();
1196     else
1197 	tscval = 0;
1198     crit_exit();
1199     ksnprintf(buf, sizeof(buf), "%08x %016llx", count, (long long)tscval);
1200     return(SYSCTL_OUT(req, buf, strlen(buf) + 1));
1201 }
1202 
1203 struct tsc_mpsync_arg {
1204 	volatile uint64_t	tsc_target;
1205 	volatile int		tsc_mpsync;
1206 };
1207 
1208 struct tsc_mpsync_thr {
1209 	volatile int		tsc_done_cnt;
1210 	volatile int		tsc_mpsync_cnt;
1211 };
1212 
1213 static void
1214 tsc_mpsync_test_remote(void *xarg)
1215 {
1216 	struct tsc_mpsync_arg *arg = xarg;
1217 	uint64_t tsc;
1218 
1219 	tsc = rdtsc_ordered();
1220 	if (tsc < arg->tsc_target)
1221 		arg->tsc_mpsync = 0;
1222 }
1223 
1224 static void
1225 tsc_mpsync_test_loop(struct tsc_mpsync_arg *arg)
1226 {
1227 	struct globaldata *gd = mycpu;
1228 	uint64_t test_end, test_begin;
1229 	u_int i;
1230 
1231 	if (bootverbose) {
1232 		kprintf("cpu%d: TSC testing MP synchronization ...\n",
1233 		    gd->gd_cpuid);
1234 	}
1235 
1236 	test_begin = rdtsc_ordered();
1237 	/* Run test for 100ms */
1238 	test_end = test_begin + (tsc_frequency / 10);
1239 
1240 	arg->tsc_mpsync = 1;
1241 	arg->tsc_target = test_begin;
1242 
1243 #define TSC_TEST_TRYMAX		1000000	/* Make sure we could stop */
1244 #define TSC_TEST_TRYMIN		50000
1245 
1246 	for (i = 0; i < TSC_TEST_TRYMAX; ++i) {
1247 		struct lwkt_cpusync cs;
1248 
1249 		crit_enter();
1250 		lwkt_cpusync_init(&cs, gd->gd_other_cpus,
1251 		    tsc_mpsync_test_remote, arg);
1252 		lwkt_cpusync_interlock(&cs);
1253 		arg->tsc_target = rdtsc_ordered();
1254 		cpu_mfence();
1255 		lwkt_cpusync_deinterlock(&cs);
1256 		crit_exit();
1257 
1258 		if (!arg->tsc_mpsync) {
1259 			kprintf("cpu%d: TSC is not MP synchronized @%u\n",
1260 			    gd->gd_cpuid, i);
1261 			break;
1262 		}
1263 		if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN)
1264 			break;
1265 	}
1266 
1267 #undef TSC_TEST_TRYMIN
1268 #undef TSC_TEST_TRYMAX
1269 
1270 	if (arg->tsc_target == test_begin) {
1271 		kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid);
1272 		/* XXX disable TSC? */
1273 		tsc_invariant = 0;
1274 		arg->tsc_mpsync = 0;
1275 		return;
1276 	}
1277 
1278 	if (arg->tsc_mpsync && bootverbose) {
1279 		kprintf("cpu%d: TSC is MP synchronized after %u tries\n",
1280 		    gd->gd_cpuid, i);
1281 	}
1282 }
1283 
1284 static void
1285 tsc_mpsync_ap_thread(void *xthr)
1286 {
1287 	struct tsc_mpsync_thr *thr = xthr;
1288 	struct tsc_mpsync_arg arg;
1289 
1290 	tsc_mpsync_test_loop(&arg);
1291 	if (arg.tsc_mpsync) {
1292 		atomic_add_int(&thr->tsc_mpsync_cnt, 1);
1293 		cpu_sfence();
1294 	}
1295 	atomic_add_int(&thr->tsc_done_cnt, 1);
1296 
1297 	lwkt_exit();
1298 }
1299 
1300 static void
1301 tsc_mpsync_test(void)
1302 {
1303 	struct tsc_mpsync_arg arg;
1304 
1305 	if (!tsc_invariant) {
1306 		/* Not even invariant TSC */
1307 		return;
1308 	}
1309 
1310 	if (ncpus == 1) {
1311 		/* Only one CPU */
1312 		tsc_mpsync = 1;
1313 		return;
1314 	}
1315 
1316 	/*
1317 	 * Forcing can be used w/qemu to reduce contention
1318 	 */
1319 	TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync);
1320 	if (tsc_mpsync) {
1321 		kprintf("TSC as cputimer forced\n");
1322 		return;
1323 	}
1324 
1325 	if (cpu_vendor_id != CPU_VENDOR_INTEL) {
1326 		/* XXX only Intel works */
1327 		return;
1328 	}
1329 
1330 	kprintf("TSC testing MP synchronization ...\n");
1331 
1332 	tsc_mpsync_test_loop(&arg);
1333 	if (arg.tsc_mpsync) {
1334 		struct tsc_mpsync_thr thr;
1335 		int cpu;
1336 
1337 		/*
1338 		 * Test TSC MP synchronization on APs.
1339 		 */
1340 
1341 		thr.tsc_done_cnt = 1;
1342 		thr.tsc_mpsync_cnt = 1;
1343 
1344 		for (cpu = 0; cpu < ncpus; ++cpu) {
1345 			if (cpu == mycpuid)
1346 				continue;
1347 
1348 			lwkt_create(tsc_mpsync_ap_thread, &thr, NULL,
1349 			    NULL, 0, cpu, "tsc mpsync %d", cpu);
1350 		}
1351 
1352 		while (thr.tsc_done_cnt != ncpus) {
1353 			cpu_pause();
1354 			cpu_lfence();
1355 		}
1356 		if (thr.tsc_mpsync_cnt == ncpus)
1357 			tsc_mpsync = 1;
1358 	}
1359 
1360 	if (tsc_mpsync)
1361 		kprintf("TSC is MP synchronized\n");
1362 	else
1363 		kprintf("TSC is not MP synchronized\n");
1364 }
1365 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL);
1366 
1367 #define TSC_CPUTIMER_FREQMAX	128000000	/* 128Mhz */
1368 
1369 static int tsc_cputimer_shift;
1370 
1371 static void
1372 tsc_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
1373 {
1374 	timer->base = 0;
1375 	timer->base = oldclock - timer->count();
1376 }
1377 
1378 static __inline sysclock_t
1379 tsc_cputimer_count(void)
1380 {
1381 	uint64_t tsc;
1382 
1383 	tsc = rdtsc();
1384 	tsc >>= tsc_cputimer_shift;
1385 
1386 	return (tsc + tsc_cputimer.base);
1387 }
1388 
1389 static sysclock_t
1390 tsc_cputimer_count_lfence(void)
1391 {
1392 	cpu_lfence();
1393 	return tsc_cputimer_count();
1394 }
1395 
1396 static sysclock_t
1397 tsc_cputimer_count_mfence(void)
1398 {
1399 	cpu_mfence();
1400 	return tsc_cputimer_count();
1401 }
1402 
1403 static void
1404 tsc_cputimer_register(void)
1405 {
1406 	uint64_t freq;
1407 	int enable = 1;
1408 
1409 	if (!tsc_mpsync)
1410 		return;
1411 
1412 	TUNABLE_INT_FETCH("hw.tsc_cputimer_enable", &enable);
1413 	if (!enable)
1414 		return;
1415 
1416 	freq = tsc_frequency;
1417 	while (freq > TSC_CPUTIMER_FREQMAX) {
1418 		freq >>= 1;
1419 		++tsc_cputimer_shift;
1420 	}
1421 	kprintf("TSC: cputimer freq %ju, shift %d\n",
1422 	    (uintmax_t)freq, tsc_cputimer_shift);
1423 
1424 	tsc_cputimer.freq = freq;
1425 
1426 	if (cpu_vendor_id == CPU_VENDOR_INTEL)
1427 		tsc_cputimer.count = tsc_cputimer_count_lfence;
1428 	else
1429 		tsc_cputimer.count = tsc_cputimer_count_mfence; /* safe bet */
1430 
1431 	cputimer_register(&tsc_cputimer);
1432 	cputimer_select(&tsc_cputimer, 0);
1433 }
1434 SYSINIT(tsc_cputimer_reg, SI_BOOT2_POST_SMP, SI_ORDER_FIRST,
1435 	tsc_cputimer_register, NULL);
1436 
1437 SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254");
1438 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0,
1439 	    "frequency");
1440 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD,
1441 	    0, 0, hw_i8254_timestamp, "A", "");
1442 
1443 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD,
1444 	    &tsc_present, 0, "TSC Available");
1445 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD,
1446 	    &tsc_invariant, 0, "Invariant TSC");
1447 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD,
1448 	    &tsc_mpsync, 0, "TSC is synchronized across CPUs");
1449 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD,
1450 	    &tsc_frequency, 0, "TSC Frequency");
1451