xref: /dragonfly/sys/platform/pc64/isa/clock.c (revision 7bcb6caf)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * Copyright (c) 2008 The DragonFly Project.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * William Jolitz and Don Ahn.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
34  * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $
35  */
36 
37 /*
38  * Routines to handle clock hardware.
39  */
40 
41 /*
42  * inittodr, settodr and support routines written
43  * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at>
44  *
45  * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94
46  */
47 
48 #if 0
49 #include "opt_clock.h"
50 #endif
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/eventhandler.h>
55 #include <sys/time.h>
56 #include <sys/kernel.h>
57 #include <sys/bus.h>
58 #include <sys/sysctl.h>
59 #include <sys/cons.h>
60 #include <sys/kbio.h>
61 #include <sys/systimer.h>
62 #include <sys/globaldata.h>
63 #include <sys/machintr.h>
64 #include <sys/interrupt.h>
65 
66 #include <sys/thread2.h>
67 
68 #include <machine/clock.h>
69 #include <machine/cputypes.h>
70 #include <machine/frame.h>
71 #include <machine/ipl.h>
72 #include <machine/limits.h>
73 #include <machine/md_var.h>
74 #include <machine/psl.h>
75 #include <machine/segments.h>
76 #include <machine/smp.h>
77 #include <machine/specialreg.h>
78 #include <machine/intr_machdep.h>
79 
80 #include <machine_base/apic/ioapic.h>
81 #include <machine_base/apic/ioapic_abi.h>
82 #include <machine_base/icu/icu.h>
83 #include <bus/isa/isa.h>
84 #include <bus/isa/rtc.h>
85 #include <machine_base/isa/timerreg.h>
86 
87 SET_DECLARE(timecounter_init_set, const timecounter_init_t);
88 TIMECOUNTER_INIT(placeholder, NULL);
89 
90 static void i8254_restore(void);
91 static void resettodr_on_shutdown(void *arg __unused);
92 
93 /*
94  * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
95  * can use a simple formula for leap years.
96  */
97 #define	LEAPYEAR(y) ((u_int)(y) % 4 == 0)
98 #define DAYSPERYEAR   (31+28+31+30+31+30+31+31+30+31+30+31)
99 
100 #ifndef TIMER_FREQ
101 #define TIMER_FREQ   1193182
102 #endif
103 
104 static uint8_t i8254_walltimer_sel;
105 static uint16_t i8254_walltimer_cntr;
106 
107 int	adjkerntz;		/* local offset from GMT in seconds */
108 int	disable_rtc_set;	/* disable resettodr() if != 0 */
109 int	tsc_present;
110 int	tsc_invariant;
111 int	tsc_mpsync;
112 int	tsc_is_broken;
113 int	wall_cmos_clock;	/* wall CMOS clock assumed if != 0 */
114 int	timer0_running;
115 tsc_uclock_t tsc_frequency;
116 tsc_uclock_t tsc_oneus_approx;	/* always at least 1, approx only */
117 
118 enum tstate { RELEASED, ACQUIRED };
119 enum tstate timer0_state;
120 enum tstate timer1_state;
121 enum tstate timer2_state;
122 
123 int	i8254_cputimer_disable;	/* No need to initialize i8254 cputimer. */
124 
125 static	int	beeping = 0;
126 static	const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
127 static	u_char	rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
128 static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
129 static  int	rtc_loaded;
130 
131 static int i8254_cputimer_div;
132 
133 static int i8254_nointr;
134 static int i8254_intr_disable = 1;
135 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable);
136 
137 static int calibrate_timers_with_rtc = 0;
138 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc);
139 
140 static int calibrate_tsc_fast = 1;
141 TUNABLE_INT("hw.calibrate_tsc_fast", &calibrate_tsc_fast);
142 
143 static int calibrate_test;
144 TUNABLE_INT("hw.tsc_calibrate_test", &calibrate_test);
145 
146 static struct callout sysbeepstop_ch;
147 
148 static sysclock_t i8254_cputimer_count(void);
149 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last);
150 static void i8254_cputimer_destruct(struct cputimer *cputimer);
151 
152 static struct cputimer	i8254_cputimer = {
153     .next		= SLIST_ENTRY_INITIALIZER,
154     .name		= "i8254",
155     .pri		= CPUTIMER_PRI_8254,
156     .type		= 0,	/* determined later */
157     .count		= i8254_cputimer_count,
158     .fromhz		= cputimer_default_fromhz,
159     .fromus		= cputimer_default_fromus,
160     .construct		= i8254_cputimer_construct,
161     .destruct		= i8254_cputimer_destruct,
162     .freq		= TIMER_FREQ
163 };
164 
165 static sysclock_t tsc_cputimer_count_mfence(void);
166 static sysclock_t tsc_cputimer_count_lfence(void);
167 static void tsc_cputimer_construct(struct cputimer *, sysclock_t);
168 
169 static struct cputimer	tsc_cputimer = {
170     .next		= SLIST_ENTRY_INITIALIZER,
171     .name		= "TSC",
172     .pri		= CPUTIMER_PRI_TSC,
173     .type		= CPUTIMER_TSC,
174     .count		= NULL,	/* determined later */
175     .fromhz		= cputimer_default_fromhz,
176     .fromus		= cputimer_default_fromus,
177     .construct		= tsc_cputimer_construct,
178     .destruct		= cputimer_default_destruct,
179     .freq		= 0	/* determined later */
180 };
181 
182 static struct cpucounter tsc_cpucounter = {
183     .freq		= 0,	/* determined later */
184     .count		= NULL,	/* determined later */
185     .flags		= 0,	/* adjusted later */
186     .prio		= CPUCOUNTER_PRIO_TSC,
187     .type		= CPUCOUNTER_TSC
188 };
189 
190 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t);
191 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *);
192 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t);
193 
194 static struct cputimer_intr i8254_cputimer_intr = {
195     .freq = TIMER_FREQ,
196     .reload = i8254_intr_reload,
197     .enable = cputimer_intr_default_enable,
198     .config = i8254_intr_config,
199     .restart = cputimer_intr_default_restart,
200     .pmfixup = cputimer_intr_default_pmfixup,
201     .initclock = i8254_intr_initclock,
202     .pcpuhand = NULL,
203     .next = SLIST_ENTRY_INITIALIZER,
204     .name = "i8254",
205     .type = CPUTIMER_INTR_8254,
206     .prio = CPUTIMER_INTR_PRIO_8254,
207     .caps = CPUTIMER_INTR_CAP_PS,
208     .priv = NULL
209 };
210 
211 /*
212  * timer0 clock interrupt.  Timer0 is in one-shot mode and has stopped
213  * counting as of this interrupt.  We use timer1 in free-running mode (not
214  * generating any interrupts) as our main counter.  Each cpu has timeouts
215  * pending.
216  *
217  * This code is INTR_MPSAFE and may be called without the BGL held.
218  */
219 static void
220 clkintr(void *dummy, void *frame_arg)
221 {
222 	static sysclock_t sysclock_count;	/* NOTE! Must be static */
223 	struct globaldata *gd = mycpu;
224 	struct globaldata *gscan;
225 	int n;
226 
227 	/*
228 	 * SWSTROBE mode is a one-shot, the timer is no longer running
229 	 */
230 	timer0_running = 0;
231 
232 	/*
233 	 * XXX the dispatcher needs work.  right now we call systimer_intr()
234 	 * directly or via IPI for any cpu with systimers queued, which is
235 	 * usually *ALL* of them.  We need to use the LAPIC timer for this.
236 	 */
237 	sysclock_count = sys_cputimer->count();
238 	for (n = 0; n < ncpus; ++n) {
239 	    gscan = globaldata_find(n);
240 	    if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL)
241 		continue;
242 	    if (gscan != gd) {
243 		lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr,
244 				&sysclock_count, 1);
245 	    } else {
246 		systimer_intr(&sysclock_count, 0, frame_arg);
247 	    }
248 	}
249 }
250 
251 
252 /*
253  * NOTE! not MP safe.
254  */
255 int
256 acquire_timer2(int mode)
257 {
258 	if (timer2_state != RELEASED)
259 		return (-1);
260 	timer2_state = ACQUIRED;
261 
262 	/*
263 	 * This access to the timer registers is as atomic as possible
264 	 * because it is a single instruction.  We could do better if we
265 	 * knew the rate.
266 	 */
267 	outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f));
268 	return (0);
269 }
270 
271 int
272 release_timer2(void)
273 {
274 	if (timer2_state != ACQUIRED)
275 		return (-1);
276 	outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT);
277 	timer2_state = RELEASED;
278 	return (0);
279 }
280 
281 #include "opt_ddb.h"
282 #ifdef DDB
283 #include <ddb/ddb.h>
284 
285 DB_SHOW_COMMAND(rtc, rtc)
286 {
287 	kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n",
288 	       rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY),
289 	       rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC),
290 	       rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR));
291 }
292 #endif /* DDB */
293 
294 /*
295  * Return the current cpu timer count as a 32 bit integer.
296  */
297 static
298 sysclock_t
299 i8254_cputimer_count(void)
300 {
301 	static uint16_t cputimer_last;
302 	uint16_t count;
303 	sysclock_t ret;
304 
305 	clock_lock();
306 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH);
307 	count = (uint8_t)inb(i8254_walltimer_cntr);		/* get countdown */
308 	count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8);
309 	count = -count;					/* -> countup */
310 	if (count < cputimer_last)			/* rollover */
311 		i8254_cputimer.base += 0x00010000;
312 	ret = i8254_cputimer.base | count;
313 	cputimer_last = count;
314 	clock_unlock();
315 	return(ret);
316 }
317 
318 /*
319  * This function is called whenever the system timebase changes, allowing
320  * us to calculate what is needed to convert a system timebase tick
321  * into an 8254 tick for the interrupt timer.  If we can convert to a
322  * simple shift, multiplication, or division, we do so.  Otherwise 64
323  * bit arithmatic is required every time the interrupt timer is reloaded.
324  */
325 static void
326 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer)
327 {
328     int freq;
329     int div;
330 
331     /*
332      * Will a simple divide do the trick?
333      */
334     div = (timer->freq + (cti->freq / 2)) / cti->freq;
335     freq = cti->freq * div;
336 
337     if (freq >= timer->freq - 1 && freq <= timer->freq + 1)
338 	i8254_cputimer_div = div;
339     else
340 	i8254_cputimer_div = 0;
341 }
342 
343 /*
344  * Reload for the next timeout.  It is possible for the reload value
345  * to be 0 or negative, indicating that an immediate timer interrupt
346  * is desired.  For now make the minimum 2 ticks.
347  *
348  * We may have to convert from the system timebase to the 8254 timebase.
349  */
350 static void
351 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
352 {
353     uint16_t count;
354 
355     if (i8254_cputimer_div)
356 	reload /= i8254_cputimer_div;
357     else
358 	reload = (int64_t)reload * cti->freq / sys_cputimer->freq;
359 
360     if ((int)reload < 2)
361 	reload = 2;
362 
363     clock_lock();
364     if (timer0_running) {
365 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);	/* count-down timer */
366 	count = (uint8_t)inb(TIMER_CNTR0);		/* lsb */
367 	count |= ((uint8_t)inb(TIMER_CNTR0) << 8);	/* msb */
368 	if (reload < count) {
369 	    outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
370 	    outb(TIMER_CNTR0, (uint8_t)reload); 	/* lsb */
371 	    outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
372 	}
373     } else {
374 	timer0_running = 1;
375 	if (reload > 0xFFFF)
376 	    reload = 0;		/* full count */
377 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
378 	outb(TIMER_CNTR0, (uint8_t)reload); 		/* lsb */
379 	outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
380     }
381     clock_unlock();
382 }
383 
384 /*
385  * DELAY(usec)	     - Spin for the specified number of microseconds.
386  * DRIVERSLEEP(usec) - Spin for the specified number of microseconds,
387  *		       but do a thread switch in the loop
388  *
389  * Relies on timer 1 counting down from (cputimer_freq / hz)
390  * Note: timer had better have been programmed before this is first used!
391  */
392 static void
393 DODELAY(int n, int doswitch)
394 {
395 	ssysclock_t delta, ticks_left;
396 	sysclock_t prev_tick, tick;
397 
398 #ifdef DELAYDEBUG
399 	int getit_calls = 1;
400 	int n1;
401 	static int state = 0;
402 
403 	if (state == 0) {
404 		state = 1;
405 		for (n1 = 1; n1 <= 10000000; n1 *= 10)
406 			DELAY(n1);
407 		state = 2;
408 	}
409 	if (state == 1)
410 		kprintf("DELAY(%d)...", n);
411 #endif
412 	/*
413 	 * Guard against the timer being uninitialized if we are called
414 	 * early for console i/o.
415 	 */
416 	if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
417 		i8254_restore();
418 
419 	/*
420 	 * Read the counter first, so that the rest of the setup overhead is
421 	 * counted.  Then calculate the number of hardware timer ticks
422 	 * required, rounding up to be sure we delay at least the requested
423 	 * number of microseconds.
424 	 */
425 	prev_tick = sys_cputimer->count();
426 	ticks_left = ((u_int)n * (int64_t)sys_cputimer->freq + 999999) /
427 		     1000000;
428 
429 	/*
430 	 * Loop until done.
431 	 */
432 	while (ticks_left > 0) {
433 		tick = sys_cputimer->count();
434 #ifdef DELAYDEBUG
435 		++getit_calls;
436 #endif
437 		delta = tick - prev_tick;
438 		prev_tick = tick;
439 		if (delta < 0)
440 			delta = 0;
441 		ticks_left -= delta;
442 		if (doswitch && ticks_left > 0)
443 			lwkt_switch();
444 		cpu_pause();
445 	}
446 #ifdef DELAYDEBUG
447 	if (state == 1)
448 		kprintf(" %d calls to getit() at %d usec each\n",
449 		       getit_calls, (n + 5) / getit_calls);
450 #endif
451 }
452 
453 /*
454  * DELAY() never switches.
455  */
456 void
457 DELAY(int n)
458 {
459 	DODELAY(n, 0);
460 }
461 
462 /*
463  * Returns non-zero if the specified time period has elapsed.  Call
464  * first with last_clock set to 0.
465  */
466 int
467 CHECKTIMEOUT(TOTALDELAY *tdd)
468 {
469 	sysclock_t delta;
470 	int us;
471 
472 	if (tdd->started == 0) {
473 		if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
474 			i8254_restore();
475 		tdd->last_clock = sys_cputimer->count();
476 		tdd->started = 1;
477 		return(0);
478 	}
479 	delta = sys_cputimer->count() - tdd->last_clock;
480 	us = (u_int64_t)delta * (u_int64_t)1000000 /
481 	     (u_int64_t)sys_cputimer->freq;
482 	tdd->last_clock += (u_int64_t)us * (u_int64_t)sys_cputimer->freq /
483 			   1000000;
484 	tdd->us -= us;
485 	return (tdd->us < 0);
486 }
487 
488 
489 /*
490  * DRIVERSLEEP() does not switch if called with a spinlock held or
491  * from a hard interrupt.
492  */
493 void
494 DRIVERSLEEP(int usec)
495 {
496 	globaldata_t gd = mycpu;
497 
498 	if (gd->gd_intr_nesting_level || gd->gd_spinlocks) {
499 		DODELAY(usec, 0);
500 	} else {
501 		DODELAY(usec, 1);
502 	}
503 }
504 
505 static void
506 sysbeepstop(void *chan)
507 {
508 	outb(IO_PPI, inb(IO_PPI)&0xFC);	/* disable counter2 output to speaker */
509 	beeping = 0;
510 	release_timer2();
511 }
512 
513 int
514 sysbeep(int pitch, int period)
515 {
516 	if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
517 		return(-1);
518 	if (sysbeep_enable == 0)
519 		return(-1);
520 	/*
521 	 * Nobody else is using timer2, we do not need the clock lock
522 	 */
523 	outb(TIMER_CNTR2, pitch);
524 	outb(TIMER_CNTR2, (pitch>>8));
525 	if (!beeping) {
526 		/* enable counter2 output to speaker */
527 		outb(IO_PPI, inb(IO_PPI) | 3);
528 		beeping = period;
529 		callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL);
530 	}
531 	return (0);
532 }
533 
534 /*
535  * RTC support routines
536  */
537 
538 int
539 rtcin(int reg)
540 {
541 	u_char val;
542 
543 	crit_enter();
544 	outb(IO_RTC, reg);
545 	inb(0x84);
546 	val = inb(IO_RTC + 1);
547 	inb(0x84);
548 	crit_exit();
549 	return (val);
550 }
551 
552 static __inline void
553 writertc(u_char reg, u_char val)
554 {
555 	crit_enter();
556 	inb(0x84);
557 	outb(IO_RTC, reg);
558 	inb(0x84);
559 	outb(IO_RTC + 1, val);
560 	inb(0x84);		/* XXX work around wrong order in rtcin() */
561 	crit_exit();
562 }
563 
564 static __inline int
565 readrtc(int port)
566 {
567 	return(bcd2bin(rtcin(port)));
568 }
569 
570 static u_int
571 calibrate_clocks(void)
572 {
573 	tsc_uclock_t old_tsc;
574 	u_int tot_count;
575 	sysclock_t count, prev_count;
576 	int sec, start_sec, timeout;
577 
578 	if (bootverbose)
579 	        kprintf("Calibrating clock(s) ...\n");
580 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
581 		goto fail;
582 	timeout = 100000000;
583 
584 	/* Read the mc146818A seconds counter. */
585 	for (;;) {
586 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
587 			sec = rtcin(RTC_SEC);
588 			break;
589 		}
590 		if (--timeout == 0)
591 			goto fail;
592 	}
593 
594 	/* Wait for the mC146818A seconds counter to change. */
595 	start_sec = sec;
596 	for (;;) {
597 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
598 			sec = rtcin(RTC_SEC);
599 			if (sec != start_sec)
600 				break;
601 		}
602 		if (--timeout == 0)
603 			goto fail;
604 	}
605 
606 	/* Start keeping track of the i8254 counter. */
607 	prev_count = sys_cputimer->count();
608 	tot_count = 0;
609 
610 	if (tsc_present)
611 		old_tsc = rdtsc();
612 	else
613 		old_tsc = 0;		/* shut up gcc */
614 
615 	/*
616 	 * Wait for the mc146818A seconds counter to change.  Read the i8254
617 	 * counter for each iteration since this is convenient and only
618 	 * costs a few usec of inaccuracy. The timing of the final reads
619 	 * of the counters almost matches the timing of the initial reads,
620 	 * so the main cause of inaccuracy is the varying latency from
621 	 * inside getit() or rtcin(RTC_STATUSA) to the beginning of the
622 	 * rtcin(RTC_SEC) that returns a changed seconds count.  The
623 	 * maximum inaccuracy from this cause is < 10 usec on 486's.
624 	 */
625 	start_sec = sec;
626 	for (;;) {
627 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP))
628 			sec = rtcin(RTC_SEC);
629 		count = sys_cputimer->count();
630 		tot_count += (int)(count - prev_count);
631 		prev_count = count;
632 		if (sec != start_sec)
633 			break;
634 		if (--timeout == 0)
635 			goto fail;
636 	}
637 
638 	/*
639 	 * Read the cpu cycle counter.  The timing considerations are
640 	 * similar to those for the i8254 clock.
641 	 */
642 	if (tsc_present) {
643 		tsc_frequency = rdtsc() - old_tsc;
644 		if (bootverbose) {
645 			kprintf("TSC clock: %jd Hz (Method A)\n",
646 			    (intmax_t)tsc_frequency);
647 		}
648 	}
649 	tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
650 
651 	kprintf("i8254 clock: %u Hz\n", tot_count);
652 	return (tot_count);
653 
654 fail:
655 	kprintf("failed, using default i8254 clock of %u Hz\n",
656 		i8254_cputimer.freq);
657 	return (i8254_cputimer.freq);
658 }
659 
660 static void
661 i8254_restore(void)
662 {
663 	timer0_state = ACQUIRED;
664 
665 	clock_lock();
666 
667 	/*
668 	 * Timer0 is our fine-grained variable clock interrupt
669 	 */
670 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
671 	outb(TIMER_CNTR0, 2);	/* lsb */
672 	outb(TIMER_CNTR0, 0);	/* msb */
673 	clock_unlock();
674 
675 	if (!i8254_nointr) {
676 		cputimer_intr_register(&i8254_cputimer_intr);
677 		cputimer_intr_select(&i8254_cputimer_intr, 0);
678 	}
679 
680 	/*
681 	 * Timer1 or timer2 is our free-running clock, but only if another
682 	 * has not been selected.
683 	 */
684 	cputimer_register(&i8254_cputimer);
685 	cputimer_select(&i8254_cputimer, 0);
686 }
687 
688 static void
689 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
690 {
691  	int which;
692 
693 	/*
694 	 * Should we use timer 1 or timer 2 ?
695 	 */
696 	which = 0;
697 	TUNABLE_INT_FETCH("hw.i8254.walltimer", &which);
698 	if (which != 1 && which != 2)
699 		which = 2;
700 
701 	switch(which) {
702 	case 1:
703 		timer->name = "i8254_timer1";
704 		timer->type = CPUTIMER_8254_SEL1;
705 		i8254_walltimer_sel = TIMER_SEL1;
706 		i8254_walltimer_cntr = TIMER_CNTR1;
707 		timer1_state = ACQUIRED;
708 		break;
709 	case 2:
710 		timer->name = "i8254_timer2";
711 		timer->type = CPUTIMER_8254_SEL2;
712 		i8254_walltimer_sel = TIMER_SEL2;
713 		i8254_walltimer_cntr = TIMER_CNTR2;
714 		timer2_state = ACQUIRED;
715 		break;
716 	}
717 
718 	timer->base = (oldclock + 0xFFFF) & ~0xFFFF;
719 
720 	clock_lock();
721 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT);
722 	outb(i8254_walltimer_cntr, 0);	/* lsb */
723 	outb(i8254_walltimer_cntr, 0);	/* msb */
724 	outb(IO_PPI, inb(IO_PPI) | 1);	/* bit 0: enable gate, bit 1: spkr */
725 	clock_unlock();
726 }
727 
728 static void
729 i8254_cputimer_destruct(struct cputimer *timer)
730 {
731 	switch(timer->type) {
732 	case CPUTIMER_8254_SEL1:
733 	    timer1_state = RELEASED;
734 	    break;
735 	case CPUTIMER_8254_SEL2:
736 	    timer2_state = RELEASED;
737 	    break;
738 	default:
739 	    break;
740 	}
741 	timer->type = 0;
742 }
743 
744 static void
745 rtc_restore(void)
746 {
747 	/* Restore all of the RTC's "status" (actually, control) registers. */
748 	writertc(RTC_STATUSB, RTCSB_24HR);
749 	writertc(RTC_STATUSA, rtc_statusa);
750 	writertc(RTC_STATUSB, rtc_statusb);
751 }
752 
753 /*
754  * Restore all the timers.
755  *
756  * This function is called to resynchronize our core timekeeping after a
757  * long halt, e.g. from apm_default_resume() and friends.  It is also
758  * called if after a BIOS call we have detected munging of the 8254.
759  * It is necessary because cputimer_count() counter's delta may have grown
760  * too large for nanouptime() and friends to handle, or (in the case of 8254
761  * munging) might cause the SYSTIMER code to prematurely trigger.
762  */
763 void
764 timer_restore(void)
765 {
766 	crit_enter();
767 	if (i8254_cputimer_disable == 0)
768 		i8254_restore();	/* restore timer_freq and hz */
769 	rtc_restore();			/* reenable RTC interrupts */
770 	crit_exit();
771 }
772 
773 #define MAX_MEASURE_RETRIES	100
774 
775 static u_int64_t
776 do_measure(u_int64_t timer_latency, u_int64_t *latency, sysclock_t *time,
777     int *retries)
778 {
779 	u_int64_t tsc1, tsc2;
780 	u_int64_t threshold;
781 	sysclock_t val;
782 	int cnt = 0;
783 
784 	do {
785 		if (cnt > MAX_MEASURE_RETRIES/2)
786 			threshold = timer_latency << 1;
787 		else
788 			threshold = timer_latency + (timer_latency >> 2);
789 
790 		cnt++;
791 		tsc1 = rdtsc_ordered();
792 		val = sys_cputimer->count();
793 		tsc2 = rdtsc_ordered();
794 	} while (timer_latency > 0 && cnt < MAX_MEASURE_RETRIES &&
795 	    tsc2 - tsc1 > threshold);
796 
797 	*retries = cnt - 1;
798 	*latency = tsc2 - tsc1;
799 	*time = val;
800 	return tsc1;
801 }
802 
803 static u_int64_t
804 do_calibrate_cputimer(u_int usecs, u_int64_t timer_latency)
805 {
806 	if (calibrate_tsc_fast) {
807 		u_int64_t old_tsc1, start_lat1, new_tsc1, end_lat1;
808 		u_int64_t old_tsc2, start_lat2, new_tsc2, end_lat2;
809 		u_int64_t freq1, freq2;
810 		sysclock_t start1, end1, start2, end2;
811 		int retries1, retries2, retries3, retries4;
812 
813 		DELAY(1000);
814 		old_tsc1 = do_measure(timer_latency, &start_lat1, &start1,
815 		    &retries1);
816 		DELAY(20000);
817 		old_tsc2 = do_measure(timer_latency, &start_lat2, &start2,
818 		    &retries2);
819 		DELAY(usecs);
820 		new_tsc1 = do_measure(timer_latency, &end_lat1, &end1,
821 		    &retries3);
822 		DELAY(20000);
823 		new_tsc2 = do_measure(timer_latency, &end_lat2, &end2,
824 		    &retries4);
825 
826 		old_tsc1 += start_lat1;
827 		old_tsc2 += start_lat2;
828 		freq1 = (new_tsc1 - old_tsc1) + (start_lat1 + end_lat1) / 2;
829 		freq2 = (new_tsc2 - old_tsc2) + (start_lat2 + end_lat2) / 2;
830 		end1 -= start1;
831 		end2 -= start2;
832 		/* This should in practice be safe from overflows. */
833 		freq1 = (freq1 * sys_cputimer->freq) / end1;
834 		freq2 = (freq2 * sys_cputimer->freq) / end2;
835 		if (calibrate_test && (retries1 > 0 || retries2 > 0)) {
836 			kprintf("%s: retries: %d, %d, %d, %d\n",
837 			    __func__, retries1, retries2, retries3, retries4);
838 		}
839 		if (calibrate_test) {
840 			kprintf("%s: freq1=%ju freq2=%ju avg=%ju\n",
841 			    __func__, freq1, freq2, (freq1 + freq2) / 2);
842 		}
843 		return (freq1 + freq2) / 2;
844 	} else {
845 		u_int64_t old_tsc, new_tsc;
846 		u_int64_t freq;
847 
848 		old_tsc = rdtsc_ordered();
849 		DELAY(usecs);
850 		new_tsc = rdtsc();
851 		freq = new_tsc - old_tsc;
852 		/* This should in practice be safe from overflows. */
853 		freq = (freq * 1000 * 1000) / usecs;
854 		return freq;
855 	}
856 }
857 
858 /*
859  * Initialize 8254 timer 0 early so that it can be used in DELAY().
860  */
861 void
862 startrtclock(void)
863 {
864 	const timecounter_init_t **list;
865 	u_int delta, freq;
866 
867 	callout_init_mp(&sysbeepstop_ch);
868 
869 	/*
870 	 * Can we use the TSC?
871 	 *
872 	 * NOTE: If running under qemu, probably a good idea to force the
873 	 *	 TSC because we are not likely to detect it as being
874 	 *	 invariant or mpsyncd if you don't.  This will greatly
875 	 *	 reduce SMP contention.
876 	 */
877 	if (cpu_feature & CPUID_TSC) {
878 		tsc_present = 1;
879 		TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant);
880 
881 		if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
882 		     cpu_vendor_id == CPU_VENDOR_AMD) &&
883 		    cpu_exthigh >= 0x80000007) {
884 			u_int regs[4];
885 
886 			do_cpuid(0x80000007, regs);
887 			if (regs[3] & 0x100)
888 				tsc_invariant = 1;
889 		}
890 	} else {
891 		tsc_present = 0;
892 	}
893 
894 	/*
895 	 * Initial RTC state, don't do anything unexpected
896 	 */
897 	writertc(RTC_STATUSA, rtc_statusa);
898 	writertc(RTC_STATUSB, RTCSB_24HR);
899 
900 	SET_FOREACH(list, timecounter_init_set) {
901 		if ((*list)->configure != NULL)
902 			(*list)->configure();
903 	}
904 
905 	/*
906 	 * If tsc_frequency is already initialized now, and a flag is set
907 	 * that i8254 timer is unneeded, we are done.
908 	 */
909 	if (tsc_frequency != 0 && i8254_cputimer_disable != 0)
910 		goto done;
911 
912 	/*
913 	 * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to
914 	 * generate an interrupt, which we will ignore for now.
915 	 *
916 	 * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000
917 	 * (so it counts a full 2^16 and repeats).  We will use this timer
918 	 * for our counting.
919 	 */
920 	if (i8254_cputimer_disable == 0)
921 		i8254_restore();
922 
923 	kprintf("Using cputimer %s for TSC calibration\n", sys_cputimer->name);
924 
925 	/*
926 	 * When booting without verbose messages, it's pointless to run the
927 	 * calibrate_clocks() calibration code, when we don't use the
928 	 * results in any way. With bootverbose, we are at least printing
929 	 *  this information to the kernel log.
930 	 */
931 	if (i8254_cputimer_disable != 0 ||
932 	    (calibrate_timers_with_rtc == 0 && !bootverbose)) {
933 		goto skip_rtc_based;
934 	}
935 
936 	freq = calibrate_clocks();
937 #ifdef CLK_CALIBRATION_LOOP
938 	if (bootverbose) {
939 		int c;
940 
941 		cnpoll(TRUE);
942 		kprintf("Press a key on the console to "
943 			"abort clock calibration\n");
944 		while ((c = cncheckc()) == -1 || c == NOKEY)
945 			calibrate_clocks();
946 		cnpoll(FALSE);
947 	}
948 #endif
949 
950 	/*
951 	 * Use the calibrated i8254 frequency if it seems reasonable.
952 	 * Otherwise use the default, and don't use the calibrated i586
953 	 * frequency.
954 	 */
955 	delta = freq > i8254_cputimer.freq ?
956 			freq - i8254_cputimer.freq : i8254_cputimer.freq - freq;
957 	if (delta < i8254_cputimer.freq / 100) {
958 		if (calibrate_timers_with_rtc == 0) {
959 			kprintf(
960 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n");
961 			freq = i8254_cputimer.freq;
962 		}
963 		/*
964 		 * NOTE:
965 		 * Interrupt timer's freq must be adjusted
966 		 * before we change the cuptimer's frequency.
967 		 */
968 		i8254_cputimer_intr.freq = freq;
969 		cputimer_set_frequency(&i8254_cputimer, freq);
970 	} else {
971 		if (bootverbose)
972 			kprintf("%d Hz differs from default of %d Hz "
973 				"by more than 1%%\n",
974 			        freq, i8254_cputimer.freq);
975 		tsc_frequency = 0;
976 	}
977 
978 	if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) {
979 		kprintf("hw.calibrate_timers_with_rtc not "
980 			"set - using old calibration method\n");
981 		tsc_frequency = 0;
982 	}
983 
984 skip_rtc_based:
985 	if (tsc_present && tsc_frequency == 0) {
986 		u_int cnt;
987 		u_int64_t cputime_latency_tsc = 0, max = 0, min = 0;
988 		int i;
989 
990 		for (i = 0; i < 10; i++) {
991 			/* Warm up */
992 			(void)sys_cputimer->count();
993 		}
994 		for (i = 0; i < 100; i++) {
995 			u_int64_t old_tsc, new_tsc;
996 
997 			old_tsc = rdtsc_ordered();
998 			(void)sys_cputimer->count();
999 			new_tsc = rdtsc_ordered();
1000 			cputime_latency_tsc += (new_tsc - old_tsc);
1001 			if (max < (new_tsc - old_tsc))
1002 				max = new_tsc - old_tsc;
1003 			if (min == 0 || min > (new_tsc - old_tsc))
1004 				min = new_tsc - old_tsc;
1005 		}
1006 		cputime_latency_tsc /= 100;
1007 		kprintf(
1008 		    "Timer latency (in TSC ticks): %lu min=%lu max=%lu\n",
1009 		    cputime_latency_tsc, min, max);
1010 		/* XXX Instead of this, properly filter out outliers. */
1011 		cputime_latency_tsc = min;
1012 
1013 		if (calibrate_test > 0) {
1014 			u_int64_t values[20], avg = 0;
1015 			for (i = 1; i <= 20; i++) {
1016 				u_int64_t freq;
1017 
1018 				freq = do_calibrate_cputimer(i * 100 * 1000,
1019 				    cputime_latency_tsc);
1020 				values[i - 1] = freq;
1021 			}
1022 			/* Compute an average TSC for the 1s to 2s delays. */
1023 			for (i = 10; i < 20; i++)
1024 				avg += values[i];
1025 			avg /= 10;
1026 			for (i = 0; i < 20; i++) {
1027 				kprintf("%ums: %lu (Diff from average: %ld)\n",
1028 				    (i + 1) * 100, values[i],
1029 				    (int64_t)(values[i] - avg));
1030 			}
1031 		}
1032 
1033 		if (calibrate_tsc_fast > 0) {
1034 			/* HPET would typically be >10MHz */
1035 			if (sys_cputimer->freq >= 10000000)
1036 				cnt = 200000;
1037 			else
1038 				cnt = 500000;
1039 		} else {
1040 			cnt = 1000000;
1041 		}
1042 
1043 		tsc_frequency = do_calibrate_cputimer(cnt, cputime_latency_tsc);
1044 		if (bootverbose && calibrate_timers_with_rtc) {
1045 			kprintf("TSC clock: %jd Hz (Method B)\n",
1046 			    (intmax_t)tsc_frequency);
1047 		}
1048 	}
1049 
1050 done:
1051 	if (tsc_present) {
1052 		kprintf("TSC%s clock: %jd Hz\n",
1053 		    tsc_invariant ? " invariant" : "",
1054 		    (intmax_t)tsc_frequency);
1055 	}
1056 	tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
1057 
1058 	EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown,
1059 			      NULL, SHUTDOWN_PRI_LAST);
1060 }
1061 
1062 /*
1063  * Sync the time of day back to the RTC on shutdown, but only if
1064  * we have already loaded it and have not crashed.
1065  */
1066 static void
1067 resettodr_on_shutdown(void *arg __unused)
1068 {
1069  	if (rtc_loaded && panicstr == NULL) {
1070 		resettodr();
1071 	}
1072 }
1073 
1074 /*
1075  * Initialize the time of day register, based on the time base which is, e.g.
1076  * from a filesystem.
1077  */
1078 void
1079 inittodr(time_t base)
1080 {
1081 	unsigned long	sec, days;
1082 	int		year, month;
1083 	int		y, m;
1084 	struct timespec ts;
1085 
1086 	if (base) {
1087 		ts.tv_sec = base;
1088 		ts.tv_nsec = 0;
1089 		set_timeofday(&ts);
1090 	}
1091 
1092 	/* Look if we have a RTC present and the time is valid */
1093 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
1094 		goto wrong_time;
1095 
1096 	/* wait for time update to complete */
1097 	/* If RTCSA_TUP is zero, we have at least 244us before next update */
1098 	crit_enter();
1099 	while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
1100 		crit_exit();
1101 		crit_enter();
1102 	}
1103 
1104 	days = 0;
1105 #ifdef USE_RTC_CENTURY
1106 	year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
1107 #else
1108 	year = readrtc(RTC_YEAR) + 1900;
1109 	if (year < 1970)
1110 		year += 100;
1111 #endif
1112 	if (year < 1970) {
1113 		crit_exit();
1114 		goto wrong_time;
1115 	}
1116 	month = readrtc(RTC_MONTH);
1117 	for (m = 1; m < month; m++)
1118 		days += daysinmonth[m-1];
1119 	if ((month > 2) && LEAPYEAR(year))
1120 		days ++;
1121 	days += readrtc(RTC_DAY) - 1;
1122 	for (y = 1970; y < year; y++)
1123 		days += DAYSPERYEAR + LEAPYEAR(y);
1124 	sec = ((( days * 24 +
1125 		  readrtc(RTC_HRS)) * 60 +
1126 		  readrtc(RTC_MIN)) * 60 +
1127 		  readrtc(RTC_SEC));
1128 	/* sec now contains the number of seconds, since Jan 1 1970,
1129 	   in the local time zone */
1130 
1131 	sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1132 
1133 	y = (int)(time_second - sec);
1134 	if (y <= -2 || y >= 2) {
1135 		/* badly off, adjust it */
1136 		ts.tv_sec = sec;
1137 		ts.tv_nsec = 0;
1138 		set_timeofday(&ts);
1139 	}
1140 	rtc_loaded = 1;
1141 	crit_exit();
1142 	return;
1143 
1144 wrong_time:
1145 	kprintf("Invalid time in real time clock.\n");
1146 	kprintf("Check and reset the date immediately!\n");
1147 }
1148 
1149 /*
1150  * Write system time back to RTC
1151  */
1152 void
1153 resettodr(void)
1154 {
1155 	struct timeval tv;
1156 	unsigned long tm;
1157 	int m;
1158 	int y;
1159 
1160 	if (disable_rtc_set)
1161 		return;
1162 
1163 	microtime(&tv);
1164 	tm = tv.tv_sec;
1165 
1166 	crit_enter();
1167 	/* Disable RTC updates and interrupts. */
1168 	writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
1169 
1170 	/* Calculate local time to put in RTC */
1171 
1172 	tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1173 
1174 	writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60;	/* Write back Seconds */
1175 	writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60;	/* Write back Minutes */
1176 	writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24;	/* Write back Hours   */
1177 
1178 	/* We have now the days since 01-01-1970 in tm */
1179 	writertc(RTC_WDAY, (tm+4)%7);			/* Write back Weekday */
1180 	for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
1181 	     tm >= m;
1182 	     y++,      m = DAYSPERYEAR + LEAPYEAR(y))
1183 	     tm -= m;
1184 
1185 	/* Now we have the years in y and the day-of-the-year in tm */
1186 	writertc(RTC_YEAR, bin2bcd(y%100));		/* Write back Year    */
1187 #ifdef USE_RTC_CENTURY
1188 	writertc(RTC_CENTURY, bin2bcd(y/100));		/* ... and Century    */
1189 #endif
1190 	for (m = 0; ; m++) {
1191 		int ml;
1192 
1193 		ml = daysinmonth[m];
1194 		if (m == 1 && LEAPYEAR(y))
1195 			ml++;
1196 		if (tm < ml)
1197 			break;
1198 		tm -= ml;
1199 	}
1200 
1201 	writertc(RTC_MONTH, bin2bcd(m + 1));            /* Write back Month   */
1202 	writertc(RTC_DAY, bin2bcd(tm + 1));             /* Write back Month Day */
1203 
1204 	/* Reenable RTC updates and interrupts. */
1205 	writertc(RTC_STATUSB, rtc_statusb);
1206 	crit_exit();
1207 }
1208 
1209 static int
1210 i8254_ioapic_trial(int irq, struct cputimer_intr *cti)
1211 {
1212 	sysclock_t base;
1213 	long lastcnt;
1214 
1215 	/*
1216 	 * Following code assumes the 8254 is the cpu timer,
1217 	 * so make sure it is.
1218 	 */
1219 	KKASSERT(sys_cputimer == &i8254_cputimer);
1220 	KKASSERT(cti == &i8254_cputimer_intr);
1221 
1222 	lastcnt = get_interrupt_counter(irq, mycpuid);
1223 
1224 	/*
1225 	 * Force an 8254 Timer0 interrupt and wait 1/100s for
1226 	 * it to happen, then see if we got it.
1227 	 */
1228 	kprintf("IOAPIC: testing 8254 interrupt delivery\n");
1229 
1230 	i8254_intr_reload(cti, 2);
1231 	base = sys_cputimer->count();
1232 	while (sys_cputimer->count() - base < sys_cputimer->freq / 100)
1233 		; /* nothing */
1234 
1235 	if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0)
1236 		return ENOENT;
1237 	return 0;
1238 }
1239 
1240 /*
1241  * Start both clocks running.  DragonFly note: the stat clock is no longer
1242  * used.  Instead, 8254 based systimers are used for all major clock
1243  * interrupts.
1244  */
1245 static void
1246 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected)
1247 {
1248 	void *clkdesc = NULL;
1249 	int irq = 0, mixed_mode = 0, error;
1250 
1251 	KKASSERT(mycpuid == 0);
1252 
1253 	if (!selected && i8254_intr_disable)
1254 		goto nointr;
1255 
1256 	/*
1257 	 * The stat interrupt mask is different without the
1258 	 * statistics clock.  Also, don't set the interrupt
1259 	 * flag which would normally cause the RTC to generate
1260 	 * interrupts.
1261 	 */
1262 	rtc_statusb = RTCSB_24HR;
1263 
1264 	/* Finish initializing 8254 timer 0. */
1265 	if (ioapic_enable) {
1266 		irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE,
1267 			INTR_POLARITY_HIGH);
1268 		if (irq < 0) {
1269 mixed_mode_setup:
1270 			error = ioapic_conf_legacy_extint(0);
1271 			if (!error) {
1272 				irq = machintr_legacy_intr_find(0,
1273 				    INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
1274 				if (irq < 0)
1275 					error = ENOENT;
1276 			}
1277 
1278 			if (error) {
1279 				if (!selected) {
1280 					kprintf("IOAPIC: setup mixed mode for "
1281 						"irq 0 failed: %d\n", error);
1282 					goto nointr;
1283 				} else {
1284 					panic("IOAPIC: setup mixed mode for "
1285 					      "irq 0 failed: %d\n", error);
1286 				}
1287 			}
1288 			mixed_mode = 1;
1289 		}
1290 		clkdesc = register_int(irq, clkintr, NULL, "clk",
1291 				       NULL,
1292 				       INTR_EXCL | INTR_CLOCK |
1293 				       INTR_NOPOLL | INTR_MPSAFE |
1294 				       INTR_NOENTROPY, 0);
1295 	} else {
1296 		register_int(0, clkintr, NULL, "clk", NULL,
1297 			     INTR_EXCL | INTR_CLOCK |
1298 			     INTR_NOPOLL | INTR_MPSAFE |
1299 			     INTR_NOENTROPY, 0);
1300 	}
1301 
1302 	/* Initialize RTC. */
1303 	writertc(RTC_STATUSA, rtc_statusa);
1304 	writertc(RTC_STATUSB, RTCSB_24HR);
1305 
1306 	if (ioapic_enable) {
1307 		error = i8254_ioapic_trial(irq, cti);
1308 		if (error) {
1309 			if (mixed_mode) {
1310 				if (!selected) {
1311 					kprintf("IOAPIC: mixed mode for irq %d "
1312 						"trial failed: %d\n",
1313 						irq, error);
1314 					goto nointr;
1315 				} else {
1316 					panic("IOAPIC: mixed mode for irq %d "
1317 					      "trial failed: %d\n", irq, error);
1318 				}
1319 			} else {
1320 				kprintf("IOAPIC: warning 8254 is not connected "
1321 					"to the correct pin, try mixed mode\n");
1322 				unregister_int(clkdesc, 0);
1323 				goto mixed_mode_setup;
1324 			}
1325 		}
1326 	}
1327 	return;
1328 
1329 nointr:
1330 	i8254_nointr = 1; /* don't try to register again */
1331 	cputimer_intr_deregister(cti);
1332 }
1333 
1334 void
1335 setstatclockrate(int newhz)
1336 {
1337 	if (newhz == RTC_PROFRATE)
1338 		rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF;
1339 	else
1340 		rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
1341 	writertc(RTC_STATUSA, rtc_statusa);
1342 }
1343 
1344 #if 0
1345 static unsigned
1346 tsc_get_timecount(struct timecounter *tc)
1347 {
1348 	return (rdtsc());
1349 }
1350 #endif
1351 
1352 #ifdef KERN_TIMESTAMP
1353 #define KERN_TIMESTAMP_SIZE 16384
1354 static u_long tsc[KERN_TIMESTAMP_SIZE] ;
1355 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc,
1356 	sizeof(tsc), "LU", "Kernel timestamps");
1357 void
1358 _TSTMP(u_int32_t x)
1359 {
1360 	static int i;
1361 
1362 	tsc[i] = (u_int32_t)rdtsc();
1363 	tsc[i+1] = x;
1364 	i = i + 2;
1365 	if (i >= KERN_TIMESTAMP_SIZE)
1366 		i = 0;
1367 	tsc[i] = 0; /* mark last entry */
1368 }
1369 #endif /* KERN_TIMESTAMP */
1370 
1371 /*
1372  *
1373  */
1374 
1375 static int
1376 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS)
1377 {
1378     sysclock_t count;
1379     uint64_t tscval;
1380     char buf[32];
1381 
1382     crit_enter();
1383     if (sys_cputimer == &i8254_cputimer)
1384 	count = sys_cputimer->count();
1385     else
1386 	count = 0;
1387     if (tsc_present)
1388 	tscval = rdtsc();
1389     else
1390 	tscval = 0;
1391     crit_exit();
1392     ksnprintf(buf, sizeof(buf), "%08x %016llx", count, (long long)tscval);
1393     return(SYSCTL_OUT(req, buf, strlen(buf) + 1));
1394 }
1395 
1396 struct tsc_mpsync_arg {
1397 	volatile uint64_t	tsc_target;
1398 	volatile int		tsc_mpsync;
1399 };
1400 
1401 struct tsc_mpsync_thr {
1402 	volatile int		tsc_done_cnt;
1403 	volatile int		tsc_mpsync_cnt;
1404 };
1405 
1406 static void
1407 tsc_mpsync_test_remote(void *xarg)
1408 {
1409 	struct tsc_mpsync_arg *arg = xarg;
1410 	uint64_t tsc;
1411 
1412 	tsc = rdtsc_ordered();
1413 	if (tsc < arg->tsc_target)
1414 		arg->tsc_mpsync = 0;
1415 }
1416 
1417 static void
1418 tsc_mpsync_test_loop(struct tsc_mpsync_arg *arg)
1419 {
1420 	struct globaldata *gd = mycpu;
1421 	tsc_uclock_t test_end, test_begin;
1422 	u_int i;
1423 
1424 	if (bootverbose) {
1425 		kprintf("cpu%d: TSC testing MP synchronization ...\n",
1426 		    gd->gd_cpuid);
1427 	}
1428 
1429 	test_begin = rdtsc_ordered();
1430 	/* Run test for 100ms */
1431 	test_end = test_begin + (tsc_frequency / 10);
1432 
1433 	arg->tsc_mpsync = 1;
1434 	arg->tsc_target = test_begin;
1435 
1436 #define TSC_TEST_TRYMAX		1000000	/* Make sure we could stop */
1437 #define TSC_TEST_TRYMIN		50000
1438 
1439 	for (i = 0; i < TSC_TEST_TRYMAX; ++i) {
1440 		struct lwkt_cpusync cs;
1441 
1442 		crit_enter();
1443 		lwkt_cpusync_init(&cs, gd->gd_other_cpus,
1444 		    tsc_mpsync_test_remote, arg);
1445 		lwkt_cpusync_interlock(&cs);
1446 		cpu_pause();
1447 		arg->tsc_target = rdtsc_ordered();
1448 		cpu_mfence();
1449 		lwkt_cpusync_deinterlock(&cs);
1450 		crit_exit();
1451 		cpu_pause();
1452 
1453 		if (!arg->tsc_mpsync) {
1454 			kprintf("cpu%d: TSC is not MP synchronized @%u\n",
1455 			    gd->gd_cpuid, i);
1456 			break;
1457 		}
1458 		if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN)
1459 			break;
1460 	}
1461 
1462 #undef TSC_TEST_TRYMIN
1463 #undef TSC_TEST_TRYMAX
1464 
1465 	if (arg->tsc_target == test_begin) {
1466 		kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid);
1467 		/* XXX disable TSC? */
1468 		tsc_invariant = 0;
1469 		arg->tsc_mpsync = 0;
1470 		return;
1471 	}
1472 
1473 	if (arg->tsc_mpsync && bootverbose) {
1474 		kprintf("cpu%d: TSC is MP synchronized after %u tries\n",
1475 		    gd->gd_cpuid, i);
1476 	}
1477 }
1478 
1479 static void
1480 tsc_mpsync_ap_thread(void *xthr)
1481 {
1482 	struct tsc_mpsync_thr *thr = xthr;
1483 	struct tsc_mpsync_arg arg;
1484 
1485 	tsc_mpsync_test_loop(&arg);
1486 	if (arg.tsc_mpsync) {
1487 		atomic_add_int(&thr->tsc_mpsync_cnt, 1);
1488 		cpu_sfence();
1489 	}
1490 	atomic_add_int(&thr->tsc_done_cnt, 1);
1491 
1492 	lwkt_exit();
1493 }
1494 
1495 static void
1496 tsc_mpsync_test(void)
1497 {
1498 	struct tsc_mpsync_arg arg;
1499 
1500 	if (!tsc_invariant) {
1501 		/* Not even invariant TSC */
1502 		return;
1503 	}
1504 
1505 	if (ncpus == 1) {
1506 		/* Only one CPU */
1507 		tsc_mpsync = 1;
1508 		return;
1509 	}
1510 
1511 	/*
1512 	 * Forcing can be used w/qemu to reduce contention
1513 	 */
1514 	TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync);
1515 
1516 	if (tsc_mpsync == 0) {
1517 		switch (cpu_vendor_id) {
1518 		case CPU_VENDOR_INTEL:
1519 			/*
1520 			 * Intel probably works
1521 			 */
1522 			break;
1523 
1524 		case CPU_VENDOR_AMD:
1525 			/*
1526 			 * For AMD 15h and 16h (i.e. The Bulldozer and Jaguar
1527 			 * architectures) we have to watch out for
1528 			 * Erratum 778:
1529 			 *     "Processor Core Time Stamp Counters May
1530 			 *      Experience Drift"
1531 			 * This Erratum is only listed for cpus in Family
1532 			 * 15h < Model 30h and for 16h < Model 30h.
1533 			 *
1534 			 * AMD < Bulldozer probably doesn't work
1535 			 */
1536 			if (CPUID_TO_FAMILY(cpu_id) == 0x15 ||
1537 			    CPUID_TO_FAMILY(cpu_id) == 0x16) {
1538 				if (CPUID_TO_MODEL(cpu_id) < 0x30)
1539 					return;
1540 			} else if (CPUID_TO_FAMILY(cpu_id) < 0x17) {
1541 				return;
1542 			}
1543 			break;
1544 
1545 		default:
1546 			/* probably won't work */
1547 			return;
1548 		}
1549 	} else if (tsc_mpsync < 0) {
1550 		kprintf("TSC MP synchronization test is disabled\n");
1551 		tsc_mpsync = 0;
1552 		return;
1553 	}
1554 
1555 	/*
1556 	 * Test even if forced above.  If forced, we will use the TSC
1557 	 * even if the test fails.
1558 	 */
1559 	kprintf("TSC testing MP synchronization ...\n");
1560 
1561 	tsc_mpsync_test_loop(&arg);
1562 	if (arg.tsc_mpsync) {
1563 		struct tsc_mpsync_thr thr;
1564 		int cpu;
1565 
1566 		/*
1567 		 * Test TSC MP synchronization on APs.
1568 		 */
1569 
1570 		thr.tsc_done_cnt = 1;
1571 		thr.tsc_mpsync_cnt = 1;
1572 
1573 		for (cpu = 0; cpu < ncpus; ++cpu) {
1574 			if (cpu == mycpuid)
1575 				continue;
1576 
1577 			lwkt_create(tsc_mpsync_ap_thread, &thr, NULL,
1578 			    NULL, 0, cpu, "tsc mpsync %d", cpu);
1579 		}
1580 
1581 		while (thr.tsc_done_cnt != ncpus) {
1582 			cpu_pause();
1583 			cpu_lfence();
1584 		}
1585 		if (thr.tsc_mpsync_cnt == ncpus)
1586 			tsc_mpsync = 1;
1587 	}
1588 
1589 	if (tsc_mpsync)
1590 		kprintf("TSC is MP synchronized\n");
1591 	else
1592 		kprintf("TSC is not MP synchronized\n");
1593 }
1594 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL);
1595 
1596 #define TSC_CPUTIMER_FREQMAX	128000000	/* 128Mhz */
1597 
1598 static int tsc_cputimer_shift;
1599 
1600 static void
1601 tsc_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
1602 {
1603 	timer->base = 0;
1604 	timer->base = oldclock - timer->count();
1605 }
1606 
1607 static __inline sysclock_t
1608 tsc_cputimer_count(void)
1609 {
1610 	uint64_t tsc;
1611 
1612 	tsc = rdtsc();
1613 	tsc >>= tsc_cputimer_shift;
1614 
1615 	return (tsc + tsc_cputimer.base);
1616 }
1617 
1618 static sysclock_t
1619 tsc_cputimer_count_lfence(void)
1620 {
1621 	cpu_lfence();
1622 	return tsc_cputimer_count();
1623 }
1624 
1625 static sysclock_t
1626 tsc_cputimer_count_mfence(void)
1627 {
1628 	cpu_mfence();
1629 	return tsc_cputimer_count();
1630 }
1631 
1632 static uint64_t
1633 tsc_cpucounter_count_lfence(void)
1634 {
1635 
1636 	cpu_lfence();
1637 	return (rdtsc());
1638 }
1639 
1640 static uint64_t
1641 tsc_cpucounter_count_mfence(void)
1642 {
1643 
1644 	cpu_mfence();
1645 	return (rdtsc());
1646 }
1647 
1648 static void
1649 tsc_cputimer_register(void)
1650 {
1651 	uint64_t freq;
1652 	int enable = 1;
1653 
1654 	if (!tsc_mpsync) {
1655 		if (tsc_invariant) {
1656 			/* Per-cpu cpucounter still works. */
1657 			goto regcnt;
1658 		}
1659 		return;
1660 	}
1661 
1662 	TUNABLE_INT_FETCH("hw.tsc_cputimer_enable", &enable);
1663 	if (!enable)
1664 		return;
1665 
1666 	freq = tsc_frequency;
1667 	while (freq > TSC_CPUTIMER_FREQMAX) {
1668 		freq >>= 1;
1669 		++tsc_cputimer_shift;
1670 	}
1671 	kprintf("TSC: cputimer freq %ju, shift %d\n",
1672 	    (uintmax_t)freq, tsc_cputimer_shift);
1673 
1674 	tsc_cputimer.freq = freq;
1675 
1676 	if (cpu_vendor_id == CPU_VENDOR_INTEL)
1677 		tsc_cputimer.count = tsc_cputimer_count_lfence;
1678 	else
1679 		tsc_cputimer.count = tsc_cputimer_count_mfence; /* safe bet */
1680 
1681 	cputimer_register(&tsc_cputimer);
1682 	cputimer_select(&tsc_cputimer, 0);
1683 
1684 	tsc_cpucounter.flags |= CPUCOUNTER_FLAG_MPSYNC;
1685 regcnt:
1686 	tsc_cpucounter.freq = tsc_frequency;
1687 	if (cpu_vendor_id == CPU_VENDOR_INTEL) {
1688 		tsc_cpucounter.count =
1689 		    tsc_cpucounter_count_lfence;
1690 	} else {
1691 		tsc_cpucounter.count =
1692 		    tsc_cpucounter_count_mfence; /* safe bet */
1693 	}
1694 	cpucounter_register(&tsc_cpucounter);
1695 }
1696 SYSINIT(tsc_cputimer_reg, SI_BOOT2_POST_SMP, SI_ORDER_FIRST,
1697 	tsc_cputimer_register, NULL);
1698 
1699 SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254");
1700 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0,
1701 	    "frequency");
1702 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD,
1703 	    0, 0, hw_i8254_timestamp, "A", "");
1704 
1705 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD,
1706 	    &tsc_present, 0, "TSC Available");
1707 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD,
1708 	    &tsc_invariant, 0, "Invariant TSC");
1709 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD,
1710 	    &tsc_mpsync, 0, "TSC is synchronized across CPUs");
1711 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD,
1712 	    &tsc_frequency, 0, "TSC Frequency");
1713