xref: /dragonfly/sys/platform/pc64/isa/clock.c (revision 2b7dbe20)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * Copyright (c) 2008 The DragonFly Project.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * William Jolitz and Don Ahn.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
34  * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $
35  */
36 
37 /*
38  * Routines to handle clock hardware.
39  */
40 
41 /*
42  * inittodr, settodr and support routines written
43  * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at>
44  *
45  * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94
46  */
47 
48 #if 0
49 #include "opt_clock.h"
50 #endif
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/eventhandler.h>
55 #include <sys/time.h>
56 #include <sys/kernel.h>
57 #include <sys/bus.h>
58 #include <sys/sysctl.h>
59 #include <sys/cons.h>
60 #include <sys/kbio.h>
61 #include <sys/systimer.h>
62 #include <sys/globaldata.h>
63 #include <sys/machintr.h>
64 #include <sys/interrupt.h>
65 
66 #include <sys/thread2.h>
67 
68 #include <machine/clock.h>
69 #include <machine/cputypes.h>
70 #include <machine/frame.h>
71 #include <machine/ipl.h>
72 #include <machine/limits.h>
73 #include <machine/md_var.h>
74 #include <machine/psl.h>
75 #include <machine/segments.h>
76 #include <machine/smp.h>
77 #include <machine/specialreg.h>
78 #include <machine/intr_machdep.h>
79 
80 #include <machine_base/apic/ioapic.h>
81 #include <machine_base/apic/ioapic_abi.h>
82 #include <machine_base/icu/icu.h>
83 #include <bus/isa/isa.h>
84 #include <bus/isa/rtc.h>
85 #include <machine_base/isa/timerreg.h>
86 
87 SET_DECLARE(timecounter_init_set, const timecounter_init_t);
88 TIMECOUNTER_INIT(placeholder, NULL);
89 
90 static void i8254_restore(void);
91 static void resettodr_on_shutdown(void *arg __unused);
92 
93 /*
94  * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
95  * can use a simple formula for leap years.
96  */
97 #define	LEAPYEAR(y) ((u_int)(y) % 4 == 0)
98 #define DAYSPERYEAR   (31+28+31+30+31+30+31+31+30+31+30+31)
99 
100 #ifndef TIMER_FREQ
101 #define TIMER_FREQ   1193182
102 #endif
103 
104 static uint8_t i8254_walltimer_sel;
105 static uint16_t i8254_walltimer_cntr;
106 static int timer0_running;
107 
108 int	adjkerntz;		/* local offset from GMT in seconds */
109 int	disable_rtc_set;	/* disable resettodr() if != 0 */
110 int	tsc_present;
111 int	tsc_invariant;
112 int	tsc_mpsync;
113 int	wall_cmos_clock;	/* wall CMOS clock assumed if != 0 */
114 tsc_uclock_t tsc_frequency;
115 tsc_uclock_t tsc_oneus_approx;	/* always at least 1, approx only */
116 
117 enum tstate { RELEASED, ACQUIRED };
118 static enum tstate timer0_state;
119 static enum tstate timer1_state;
120 static enum tstate timer2_state;
121 
122 int	i8254_cputimer_disable;	/* No need to initialize i8254 cputimer. */
123 
124 static	int	beeping = 0;
125 static	const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
126 static	u_char	rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
127 static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
128 static  int	rtc_loaded;
129 
130 static	sysclock_t i8254_cputimer_div;
131 
132 static int i8254_nointr;
133 static int i8254_intr_disable = 1;
134 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable);
135 
136 static int calibrate_timers_with_rtc = 0;
137 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc);
138 
139 static int calibrate_tsc_fast = 1;
140 TUNABLE_INT("hw.calibrate_tsc_fast", &calibrate_tsc_fast);
141 
142 static int calibrate_test;
143 TUNABLE_INT("hw.tsc_calibrate_test", &calibrate_test);
144 
145 static struct callout sysbeepstop_ch;
146 
147 static sysclock_t i8254_cputimer_count(void);
148 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last);
149 static void i8254_cputimer_destruct(struct cputimer *cputimer);
150 
151 static struct cputimer	i8254_cputimer = {
152     .next		= SLIST_ENTRY_INITIALIZER,
153     .name		= "i8254",
154     .pri		= CPUTIMER_PRI_8254,
155     .type		= 0,	/* determined later */
156     .count		= i8254_cputimer_count,
157     .fromhz		= cputimer_default_fromhz,
158     .fromus		= cputimer_default_fromus,
159     .construct		= i8254_cputimer_construct,
160     .destruct		= i8254_cputimer_destruct,
161     .freq		= TIMER_FREQ
162 };
163 
164 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t);
165 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *);
166 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t);
167 
168 static struct cputimer_intr i8254_cputimer_intr = {
169     .freq = TIMER_FREQ,
170     .reload = i8254_intr_reload,
171     .enable = cputimer_intr_default_enable,
172     .config = i8254_intr_config,
173     .restart = cputimer_intr_default_restart,
174     .pmfixup = cputimer_intr_default_pmfixup,
175     .initclock = i8254_intr_initclock,
176     .pcpuhand = NULL,
177     .next = SLIST_ENTRY_INITIALIZER,
178     .name = "i8254",
179     .type = CPUTIMER_INTR_8254,
180     .prio = CPUTIMER_INTR_PRIO_8254,
181     .caps = CPUTIMER_INTR_CAP_PS,
182     .priv = NULL
183 };
184 
185 /*
186  * Use this to lwkt_switch() when the scheduler clock is not
187  * yet running, otherwise lwkt_switch() won't do anything.
188  * XXX needs cleaning up in lwkt_thread.c
189  */
190 static void
191 lwkt_force_switch(void)
192 {
193 	crit_enter();
194 	lwkt_schedulerclock(curthread);
195 	crit_exit();
196 	lwkt_switch();
197 }
198 
199 /*
200  * timer0 clock interrupt.  Timer0 is in one-shot mode and has stopped
201  * counting as of this interrupt.  We use timer1 in free-running mode (not
202  * generating any interrupts) as our main counter.  Each cpu has timeouts
203  * pending.
204  *
205  * This code is INTR_MPSAFE and may be called without the BGL held.
206  */
207 static void
208 clkintr(void *dummy, void *frame_arg)
209 {
210 	static sysclock_t sysclock_count;	/* NOTE! Must be static */
211 	struct globaldata *gd = mycpu;
212 	struct globaldata *gscan;
213 	int n;
214 
215 	/*
216 	 * SWSTROBE mode is a one-shot, the timer is no longer running
217 	 */
218 	timer0_running = 0;
219 
220 	/*
221 	 * XXX the dispatcher needs work.  right now we call systimer_intr()
222 	 * directly or via IPI for any cpu with systimers queued, which is
223 	 * usually *ALL* of them.  We need to use the LAPIC timer for this.
224 	 */
225 	sysclock_count = sys_cputimer->count();
226 	for (n = 0; n < ncpus; ++n) {
227 	    gscan = globaldata_find(n);
228 	    if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL)
229 		continue;
230 	    if (gscan != gd) {
231 		lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr,
232 				&sysclock_count, 1);
233 	    } else {
234 		systimer_intr(&sysclock_count, 0, frame_arg);
235 	    }
236 	}
237 }
238 
239 
240 /*
241  * NOTE! not MP safe.
242  */
243 int
244 acquire_timer2(int mode)
245 {
246 	if (timer2_state != RELEASED)
247 		return (-1);
248 	timer2_state = ACQUIRED;
249 
250 	/*
251 	 * This access to the timer registers is as atomic as possible
252 	 * because it is a single instruction.  We could do better if we
253 	 * knew the rate.
254 	 */
255 	outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f));
256 	return (0);
257 }
258 
259 int
260 release_timer2(void)
261 {
262 	if (timer2_state != ACQUIRED)
263 		return (-1);
264 	outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT);
265 	timer2_state = RELEASED;
266 	return (0);
267 }
268 
269 #include "opt_ddb.h"
270 #ifdef DDB
271 #include <ddb/ddb.h>
272 
273 DB_SHOW_COMMAND(rtc, rtc)
274 {
275 	kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n",
276 	       rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY),
277 	       rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC),
278 	       rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR));
279 }
280 #endif /* DDB */
281 
282 /*
283  * Return the current cpu timer count as a 32 bit integer.
284  */
285 static
286 sysclock_t
287 i8254_cputimer_count(void)
288 {
289 	static uint16_t cputimer_last;
290 	uint16_t count;
291 	sysclock_t ret;
292 
293 	clock_lock();
294 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH);
295 	count = (uint8_t)inb(i8254_walltimer_cntr);	/* get countdown */
296 	count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8);
297 	count = -count;					/* -> countup */
298 	if (count < cputimer_last)			/* rollover */
299 		i8254_cputimer.base += 0x00010000U;
300 	ret = i8254_cputimer.base | count;
301 	cputimer_last = count;
302 	clock_unlock();
303 
304 	return(ret);
305 }
306 
307 /*
308  * This function is called whenever the system timebase changes, allowing
309  * us to calculate what is needed to convert a system timebase tick
310  * into an 8254 tick for the interrupt timer.  If we can convert to a
311  * simple shift, multiplication, or division, we do so.  Otherwise 64
312  * bit arithmatic is required every time the interrupt timer is reloaded.
313  */
314 static void
315 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer)
316 {
317     sysclock_t freq;
318     sysclock_t div;
319 
320     /*
321      * Will a simple divide do the trick?
322      */
323     div = (timer->freq + (cti->freq / 2)) / cti->freq;
324     freq = cti->freq * div;
325 
326     if (freq >= timer->freq - 1 && freq <= timer->freq + 1)
327 	i8254_cputimer_div = div;
328     else
329 	i8254_cputimer_div = 0;
330 }
331 
332 /*
333  * Reload for the next timeout.  It is possible for the reload value
334  * to be 0 or negative, indicating that an immediate timer interrupt
335  * is desired.  For now make the minimum 2 ticks.
336  *
337  * We may have to convert from the system timebase to the 8254 timebase.
338  */
339 static void
340 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
341 {
342     uint16_t count;
343 
344     if ((ssysclock_t)reload < 0)
345 	    reload = 1;
346     if (i8254_cputimer_div)
347 	reload /= i8254_cputimer_div;
348     else
349 	reload = muldivu64(reload, cti->freq, sys_cputimer->freq);
350 
351     if (reload < 2)
352 	reload = 2;		/* minimum count */
353     if (reload > 0xFFFF)
354 	reload = 0xFFFF;	/* almost full count (0 is full count) */
355 
356     clock_lock();
357     if (timer0_running) {
358 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);	/* count-down timer */
359 	count = (uint8_t)inb(TIMER_CNTR0);		/* lsb */
360 	count |= ((uint8_t)inb(TIMER_CNTR0) << 8);	/* msb */
361 	if (reload < count) {
362 	    outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
363 	    outb(TIMER_CNTR0, (uint8_t)reload); 	/* lsb */
364 	    outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
365 	}
366     } else {
367 	timer0_running = 1;
368 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
369 	outb(TIMER_CNTR0, (uint8_t)reload); 		/* lsb */
370 	outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
371     }
372     clock_unlock();
373 }
374 
375 /*
376  * DELAY(usec)	     - Spin for the specified number of microseconds.
377  * DRIVERSLEEP(usec) - Spin for the specified number of microseconds,
378  *		       but do a thread switch in the loop
379  *
380  * Relies on timer 1 counting down from (cputimer_freq / hz)
381  * Note: timer had better have been programmed before this is first used!
382  */
383 static void
384 DODELAY(int n, int doswitch)
385 {
386 	ssysclock_t delta, ticks_left;
387 	sysclock_t prev_tick, tick;
388 
389 #ifdef DELAYDEBUG
390 	int getit_calls = 1;
391 	int n1;
392 	static int state = 0;
393 
394 	if (state == 0) {
395 		state = 1;
396 		for (n1 = 1; n1 <= 10000000; n1 *= 10)
397 			DELAY(n1);
398 		state = 2;
399 	}
400 	if (state == 1)
401 		kprintf("DELAY(%d)...", n);
402 #endif
403 	/*
404 	 * Guard against the timer being uninitialized if we are called
405 	 * early for console i/o.
406 	 */
407 	if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
408 		i8254_restore();
409 
410 	/*
411 	 * Read the counter first, so that the rest of the setup overhead is
412 	 * counted.  Then calculate the number of hardware timer ticks
413 	 * required, rounding up to be sure we delay at least the requested
414 	 * number of microseconds.
415 	 */
416 	prev_tick = sys_cputimer->count();
417 	ticks_left = muldivu64(n, sys_cputimer->freq + 999999, 1000000);
418 
419 	/*
420 	 * Loop until done.
421 	 */
422 	while (ticks_left > 0) {
423 		tick = sys_cputimer->count();
424 #ifdef DELAYDEBUG
425 		++getit_calls;
426 #endif
427 		delta = tick - prev_tick;
428 		prev_tick = tick;
429 		if (delta < 0)
430 			delta = 0;
431 		ticks_left -= delta;
432 		if (doswitch && ticks_left > 0)
433 			lwkt_switch();
434 		cpu_pause();
435 	}
436 #ifdef DELAYDEBUG
437 	if (state == 1)
438 		kprintf(" %d calls to getit() at %d usec each\n",
439 		       getit_calls, (n + 5) / getit_calls);
440 #endif
441 }
442 
443 /*
444  * DELAY() never switches.
445  */
446 void
447 DELAY(int n)
448 {
449 	DODELAY(n, 0);
450 }
451 
452 /*
453  * Returns non-zero if the specified time period has elapsed.  Call
454  * first with last_clock set to 0.
455  */
456 int
457 CHECKTIMEOUT(TOTALDELAY *tdd)
458 {
459 	sysclock_t delta;
460 	int us;
461 
462 	if (tdd->started == 0) {
463 		if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
464 			i8254_restore();
465 		tdd->last_clock = sys_cputimer->count();
466 		tdd->started = 1;
467 		return(0);
468 	}
469 	delta = sys_cputimer->count() - tdd->last_clock;
470 	us = muldivu64(delta, 1000000, sys_cputimer->freq);
471 	tdd->last_clock += muldivu64(us, sys_cputimer->freq, 1000000);
472 	tdd->us -= us;
473 
474 	return (tdd->us < 0);
475 }
476 
477 
478 /*
479  * DRIVERSLEEP() does not switch if called with a spinlock held or
480  * from a hard interrupt.
481  */
482 void
483 DRIVERSLEEP(int usec)
484 {
485 	globaldata_t gd = mycpu;
486 
487 	if (gd->gd_intr_nesting_level || gd->gd_spinlocks) {
488 		DODELAY(usec, 0);
489 	} else {
490 		DODELAY(usec, 1);
491 	}
492 }
493 
494 static void
495 sysbeepstop(void *chan)
496 {
497 	outb(IO_PPI, inb(IO_PPI)&0xFC);	/* disable counter2 output to speaker */
498 	beeping = 0;
499 	release_timer2();
500 }
501 
502 int
503 sysbeep(int pitch, int period)
504 {
505 	if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
506 		return(-1);
507 	if (sysbeep_enable == 0)
508 		return(-1);
509 	/*
510 	 * Nobody else is using timer2, we do not need the clock lock
511 	 */
512 	outb(TIMER_CNTR2, pitch);
513 	outb(TIMER_CNTR2, (pitch>>8));
514 	if (!beeping) {
515 		/* enable counter2 output to speaker */
516 		outb(IO_PPI, inb(IO_PPI) | 3);
517 		beeping = period;
518 		callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL);
519 	}
520 	return (0);
521 }
522 
523 /*
524  * RTC support routines
525  */
526 
527 int
528 rtcin(int reg)
529 {
530 	u_char val;
531 
532 	crit_enter();
533 	outb(IO_RTC, reg);
534 	inb(0x84);
535 	val = inb(IO_RTC + 1);
536 	inb(0x84);
537 	crit_exit();
538 	return (val);
539 }
540 
541 static __inline void
542 writertc(u_char reg, u_char val)
543 {
544 	crit_enter();
545 	inb(0x84);
546 	outb(IO_RTC, reg);
547 	inb(0x84);
548 	outb(IO_RTC + 1, val);
549 	inb(0x84);		/* XXX work around wrong order in rtcin() */
550 	crit_exit();
551 }
552 
553 static __inline int
554 readrtc(int port)
555 {
556 	return(bcd2bin(rtcin(port)));
557 }
558 
559 static u_int
560 calibrate_clocks(void)
561 {
562 	tsc_uclock_t old_tsc;
563 	sysclock_t tot_count;
564 	sysclock_t count, prev_count;
565 	int sec, start_sec, timeout;
566 
567 	if (bootverbose)
568 	        kprintf("Calibrating clock(s) ...\n");
569 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
570 		goto fail;
571 	timeout = 100000000;
572 
573 	/* Read the mc146818A seconds counter. */
574 	for (;;) {
575 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
576 			sec = rtcin(RTC_SEC);
577 			break;
578 		}
579 		if (--timeout == 0)
580 			goto fail;
581 	}
582 
583 	/* Wait for the mC146818A seconds counter to change. */
584 	start_sec = sec;
585 	for (;;) {
586 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
587 			sec = rtcin(RTC_SEC);
588 			if (sec != start_sec)
589 				break;
590 		}
591 		if (--timeout == 0)
592 			goto fail;
593 	}
594 
595 	/* Start keeping track of the i8254 counter. */
596 	prev_count = sys_cputimer->count();
597 	tot_count = 0;
598 
599 	if (tsc_present)
600 		old_tsc = rdtsc();
601 	else
602 		old_tsc = 0;		/* shut up gcc */
603 
604 	/*
605 	 * Wait for the mc146818A seconds counter to change.  Read the i8254
606 	 * counter for each iteration since this is convenient and only
607 	 * costs a few usec of inaccuracy. The timing of the final reads
608 	 * of the counters almost matches the timing of the initial reads,
609 	 * so the main cause of inaccuracy is the varying latency from
610 	 * inside getit() or rtcin(RTC_STATUSA) to the beginning of the
611 	 * rtcin(RTC_SEC) that returns a changed seconds count.  The
612 	 * maximum inaccuracy from this cause is < 10 usec on 486's.
613 	 */
614 	start_sec = sec;
615 	for (;;) {
616 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP))
617 			sec = rtcin(RTC_SEC);
618 		count = sys_cputimer->count();
619 		tot_count += (sysclock_t)(count - prev_count);
620 		prev_count = count;
621 		if (sec != start_sec)
622 			break;
623 		if (--timeout == 0)
624 			goto fail;
625 	}
626 
627 	/*
628 	 * Read the cpu cycle counter.  The timing considerations are
629 	 * similar to those for the i8254 clock.
630 	 */
631 	if (tsc_present) {
632 		tsc_frequency = rdtsc() - old_tsc;
633 		if (bootverbose) {
634 			kprintf("TSC clock: %jd Hz (Method A)\n",
635 			    (intmax_t)tsc_frequency);
636 		}
637 	}
638 	tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
639 
640 	kprintf("i8254 clock: %lu Hz\n", tot_count);
641 	return (tot_count);
642 
643 fail:
644 	kprintf("failed, using default i8254 clock of %lu Hz\n",
645 		i8254_cputimer.freq);
646 	return (i8254_cputimer.freq);
647 }
648 
649 static void
650 i8254_restore(void)
651 {
652 	timer0_state = ACQUIRED;
653 
654 	clock_lock();
655 
656 	/*
657 	 * Timer0 is our fine-grained variable clock interrupt
658 	 */
659 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
660 	outb(TIMER_CNTR0, 2);	/* lsb */
661 	outb(TIMER_CNTR0, 0);	/* msb */
662 	clock_unlock();
663 
664 	if (!i8254_nointr) {
665 		cputimer_intr_register(&i8254_cputimer_intr);
666 		cputimer_intr_select(&i8254_cputimer_intr, 0);
667 	}
668 
669 	/*
670 	 * Timer1 or timer2 is our free-running clock, but only if another
671 	 * has not been selected.
672 	 */
673 	cputimer_register(&i8254_cputimer);
674 	cputimer_select(&i8254_cputimer, 0);
675 }
676 
677 static void
678 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
679 {
680  	int which;
681 
682 	/*
683 	 * Should we use timer 1 or timer 2 ?
684 	 */
685 	which = 0;
686 	TUNABLE_INT_FETCH("hw.i8254.walltimer", &which);
687 	if (which != 1 && which != 2)
688 		which = 2;
689 
690 	switch(which) {
691 	case 1:
692 		timer->name = "i8254_timer1";
693 		timer->type = CPUTIMER_8254_SEL1;
694 		i8254_walltimer_sel = TIMER_SEL1;
695 		i8254_walltimer_cntr = TIMER_CNTR1;
696 		timer1_state = ACQUIRED;
697 		break;
698 	case 2:
699 		timer->name = "i8254_timer2";
700 		timer->type = CPUTIMER_8254_SEL2;
701 		i8254_walltimer_sel = TIMER_SEL2;
702 		i8254_walltimer_cntr = TIMER_CNTR2;
703 		timer2_state = ACQUIRED;
704 		break;
705 	}
706 
707 	timer->base = (oldclock + 0xFFFF) & 0xFFFFFFFFFFFF0000LU;
708 
709 	clock_lock();
710 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT);
711 	outb(i8254_walltimer_cntr, 0);	/* lsb */
712 	outb(i8254_walltimer_cntr, 0);	/* msb */
713 	outb(IO_PPI, inb(IO_PPI) | 1);	/* bit 0: enable gate, bit 1: spkr */
714 	clock_unlock();
715 }
716 
717 static void
718 i8254_cputimer_destruct(struct cputimer *timer)
719 {
720 	switch(timer->type) {
721 	case CPUTIMER_8254_SEL1:
722 	    timer1_state = RELEASED;
723 	    break;
724 	case CPUTIMER_8254_SEL2:
725 	    timer2_state = RELEASED;
726 	    break;
727 	default:
728 	    break;
729 	}
730 	timer->type = 0;
731 }
732 
733 static void
734 rtc_restore(void)
735 {
736 	/* Restore all of the RTC's "status" (actually, control) registers. */
737 	writertc(RTC_STATUSB, RTCSB_24HR);
738 	writertc(RTC_STATUSA, rtc_statusa);
739 	writertc(RTC_STATUSB, rtc_statusb);
740 }
741 
742 /*
743  * Restore all the timers.
744  *
745  * This function is called to resynchronize our core timekeeping after a
746  * long halt, e.g. from apm_default_resume() and friends.  It is also
747  * called if after a BIOS call we have detected munging of the 8254.
748  * It is necessary because cputimer_count() counter's delta may have grown
749  * too large for nanouptime() and friends to handle, or (in the case of 8254
750  * munging) might cause the SYSTIMER code to prematurely trigger.
751  */
752 void
753 timer_restore(void)
754 {
755 	crit_enter();
756 	if (i8254_cputimer_disable == 0)
757 		i8254_restore();	/* restore timer_freq and hz */
758 	rtc_restore();			/* reenable RTC interrupts */
759 	crit_exit();
760 }
761 
762 #define MAX_MEASURE_RETRIES	100
763 
764 static u_int64_t
765 do_measure(u_int64_t timer_latency, u_int64_t *latency, sysclock_t *time,
766     int *retries)
767 {
768 	u_int64_t tsc1, tsc2;
769 	u_int64_t threshold;
770 	sysclock_t val;
771 	int cnt = 0;
772 
773 	do {
774 		if (cnt > MAX_MEASURE_RETRIES/2)
775 			threshold = timer_latency << 1;
776 		else
777 			threshold = timer_latency + (timer_latency >> 2);
778 
779 		cnt++;
780 		tsc1 = rdtsc_ordered();
781 		val = sys_cputimer->count();
782 		tsc2 = rdtsc_ordered();
783 	} while (timer_latency > 0 && cnt < MAX_MEASURE_RETRIES &&
784 	    tsc2 - tsc1 > threshold);
785 
786 	*retries = cnt - 1;
787 	*latency = tsc2 - tsc1;
788 	*time = val;
789 	return tsc1;
790 }
791 
792 static u_int64_t
793 do_calibrate_cputimer(u_int usecs, u_int64_t timer_latency)
794 {
795 	if (calibrate_tsc_fast) {
796 		u_int64_t old_tsc1, start_lat1, new_tsc1, end_lat1;
797 		u_int64_t old_tsc2, start_lat2, new_tsc2, end_lat2;
798 		u_int64_t freq1, freq2;
799 		sysclock_t start1, end1, start2, end2;
800 		int retries1, retries2, retries3, retries4;
801 
802 		DELAY(1000);
803 		old_tsc1 = do_measure(timer_latency, &start_lat1, &start1,
804 		    &retries1);
805 		DELAY(20000);
806 		old_tsc2 = do_measure(timer_latency, &start_lat2, &start2,
807 		    &retries2);
808 		DELAY(usecs);
809 		new_tsc1 = do_measure(timer_latency, &end_lat1, &end1,
810 		    &retries3);
811 		DELAY(20000);
812 		new_tsc2 = do_measure(timer_latency, &end_lat2, &end2,
813 		    &retries4);
814 
815 		old_tsc1 += start_lat1;
816 		old_tsc2 += start_lat2;
817 		freq1 = (new_tsc1 - old_tsc1) + (start_lat1 + end_lat1) / 2;
818 		freq2 = (new_tsc2 - old_tsc2) + (start_lat2 + end_lat2) / 2;
819 		end1 -= start1;
820 		end2 -= start2;
821 		/* This should in practice be safe from overflows. */
822 		freq1 = muldivu64(freq1, sys_cputimer->freq, end1);
823 		freq2 = muldivu64(freq2, sys_cputimer->freq, end2);
824 		if (calibrate_test && (retries1 > 0 || retries2 > 0)) {
825 			kprintf("%s: retries: %d, %d, %d, %d\n",
826 			    __func__, retries1, retries2, retries3, retries4);
827 		}
828 		if (calibrate_test) {
829 			kprintf("%s: freq1=%ju freq2=%ju avg=%ju\n",
830 			    __func__, freq1, freq2, (freq1 + freq2) / 2);
831 		}
832 		return (freq1 + freq2) / 2;
833 	} else {
834 		u_int64_t old_tsc, new_tsc;
835 		u_int64_t freq;
836 
837 		old_tsc = rdtsc_ordered();
838 		DELAY(usecs);
839 		new_tsc = rdtsc();
840 		freq = new_tsc - old_tsc;
841 		/* This should in practice be safe from overflows. */
842 		freq = (freq * 1000 * 1000) / usecs;
843 		return freq;
844 	}
845 }
846 
847 /*
848  * Initialize 8254 timer 0 early so that it can be used in DELAY().
849  */
850 void
851 startrtclock(void)
852 {
853 	const timecounter_init_t **list;
854 	sysclock_t delta, freq;
855 
856 	callout_init_mp(&sysbeepstop_ch);
857 
858 	/*
859 	 * Can we use the TSC?
860 	 *
861 	 * NOTE: If running under qemu, probably a good idea to force the
862 	 *	 TSC because we are not likely to detect it as being
863 	 *	 invariant or mpsyncd if you don't.  This will greatly
864 	 *	 reduce SMP contention.
865 	 */
866 	if (cpu_feature & CPUID_TSC) {
867 		tsc_present = 1;
868 		TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant);
869 
870 		if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
871 		     cpu_vendor_id == CPU_VENDOR_AMD) &&
872 		    cpu_exthigh >= 0x80000007) {
873 			u_int regs[4];
874 
875 			do_cpuid(0x80000007, regs);
876 			if (regs[3] & 0x100)
877 				tsc_invariant = 1;
878 		}
879 	} else {
880 		tsc_present = 0;
881 	}
882 
883 	/*
884 	 * Initial RTC state, don't do anything unexpected
885 	 */
886 	writertc(RTC_STATUSA, rtc_statusa);
887 	writertc(RTC_STATUSB, RTCSB_24HR);
888 
889 	SET_FOREACH(list, timecounter_init_set) {
890 		if ((*list)->configure != NULL)
891 			(*list)->configure();
892 	}
893 
894 	/*
895 	 * If tsc_frequency is already initialized now, and a flag is set
896 	 * that i8254 timer is unneeded, we are done.
897 	 */
898 	if (tsc_frequency != 0 && i8254_cputimer_disable != 0)
899 		goto done;
900 
901 	/*
902 	 * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to
903 	 * generate an interrupt, which we will ignore for now.
904 	 *
905 	 * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000
906 	 * (so it counts a full 2^16 and repeats).  We will use this timer
907 	 * for our counting.
908 	 */
909 	if (i8254_cputimer_disable == 0)
910 		i8254_restore();
911 
912 	kprintf("Using cputimer %s for TSC calibration\n", sys_cputimer->name);
913 
914 	/*
915 	 * When booting without verbose messages, it's pointless to run the
916 	 * calibrate_clocks() calibration code, when we don't use the
917 	 * results in any way. With bootverbose, we are at least printing
918 	 *  this information to the kernel log.
919 	 */
920 	if (i8254_cputimer_disable != 0 ||
921 	    (calibrate_timers_with_rtc == 0 && !bootverbose)) {
922 		goto skip_rtc_based;
923 	}
924 
925 	freq = calibrate_clocks();
926 #ifdef CLK_CALIBRATION_LOOP
927 	if (bootverbose) {
928 		int c;
929 
930 		cnpoll(TRUE);
931 		kprintf("Press a key on the console to "
932 			"abort clock calibration\n");
933 		while ((c = cncheckc()) == -1 || c == NOKEY)
934 			calibrate_clocks();
935 		cnpoll(FALSE);
936 	}
937 #endif
938 
939 	/*
940 	 * Use the calibrated i8254 frequency if it seems reasonable.
941 	 * Otherwise use the default, and don't use the calibrated i586
942 	 * frequency.
943 	 */
944 	delta = freq > i8254_cputimer.freq ?
945 		freq - i8254_cputimer.freq : i8254_cputimer.freq - freq;
946 	if (delta < i8254_cputimer.freq / 100) {
947 		if (calibrate_timers_with_rtc == 0) {
948 			kprintf(
949 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n");
950 			freq = i8254_cputimer.freq;
951 		}
952 		/*
953 		 * NOTE:
954 		 * Interrupt timer's freq must be adjusted
955 		 * before we change the cuptimer's frequency.
956 		 */
957 		i8254_cputimer_intr.freq = freq;
958 		cputimer_set_frequency(&i8254_cputimer, freq);
959 	} else {
960 		if (bootverbose)
961 			kprintf("%lu Hz differs from default of %lu Hz "
962 				"by more than 1%%\n",
963 			        freq, i8254_cputimer.freq);
964 		tsc_frequency = 0;
965 	}
966 
967 	if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) {
968 		kprintf("hw.calibrate_timers_with_rtc not "
969 			"set - using old calibration method\n");
970 		tsc_frequency = 0;
971 	}
972 
973 skip_rtc_based:
974 	if (tsc_present && tsc_frequency == 0) {
975 		u_int cnt;
976 		u_int64_t cputime_latency_tsc = 0, max = 0, min = 0;
977 		int i;
978 
979 		for (i = 0; i < 10; i++) {
980 			/* Warm up */
981 			(void)sys_cputimer->count();
982 		}
983 		for (i = 0; i < 100; i++) {
984 			u_int64_t old_tsc, new_tsc;
985 
986 			old_tsc = rdtsc_ordered();
987 			(void)sys_cputimer->count();
988 			new_tsc = rdtsc_ordered();
989 			cputime_latency_tsc += (new_tsc - old_tsc);
990 			if (max < (new_tsc - old_tsc))
991 				max = new_tsc - old_tsc;
992 			if (min == 0 || min > (new_tsc - old_tsc))
993 				min = new_tsc - old_tsc;
994 		}
995 		cputime_latency_tsc /= 100;
996 		kprintf(
997 		    "Timer latency (in TSC ticks): %lu min=%lu max=%lu\n",
998 		    cputime_latency_tsc, min, max);
999 		/* XXX Instead of this, properly filter out outliers. */
1000 		cputime_latency_tsc = min;
1001 
1002 		if (calibrate_test > 0) {
1003 			u_int64_t values[20], avg = 0;
1004 			for (i = 1; i <= 20; i++) {
1005 				u_int64_t freq;
1006 
1007 				freq = do_calibrate_cputimer(i * 100 * 1000,
1008 				    cputime_latency_tsc);
1009 				values[i - 1] = freq;
1010 			}
1011 			/* Compute an average TSC for the 1s to 2s delays. */
1012 			for (i = 10; i < 20; i++)
1013 				avg += values[i];
1014 			avg /= 10;
1015 			for (i = 0; i < 20; i++) {
1016 				kprintf("%ums: %lu (Diff from average: %ld)\n",
1017 				    (i + 1) * 100, values[i],
1018 				    (int64_t)(values[i] - avg));
1019 			}
1020 		}
1021 
1022 		if (calibrate_tsc_fast > 0) {
1023 			/* HPET would typically be >10MHz */
1024 			if (sys_cputimer->freq >= 10000000)
1025 				cnt = 200000;
1026 			else
1027 				cnt = 500000;
1028 		} else {
1029 			cnt = 1000000;
1030 		}
1031 
1032 		tsc_frequency = do_calibrate_cputimer(cnt, cputime_latency_tsc);
1033 		if (bootverbose && calibrate_timers_with_rtc) {
1034 			kprintf("TSC clock: %jd Hz (Method B)\n",
1035 			    (intmax_t)tsc_frequency);
1036 		}
1037 	}
1038 
1039 done:
1040 	if (tsc_present) {
1041 		kprintf("TSC%s clock: %jd Hz\n",
1042 		    tsc_invariant ? " invariant" : "",
1043 		    (intmax_t)tsc_frequency);
1044 	}
1045 	tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
1046 
1047 	EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown,
1048 			      NULL, SHUTDOWN_PRI_LAST);
1049 }
1050 
1051 /*
1052  * Sync the time of day back to the RTC on shutdown, but only if
1053  * we have already loaded it and have not crashed.
1054  */
1055 static void
1056 resettodr_on_shutdown(void *arg __unused)
1057 {
1058  	if (rtc_loaded && panicstr == NULL) {
1059 		resettodr();
1060 	}
1061 }
1062 
1063 /*
1064  * Initialize the time of day register, based on the time base which is, e.g.
1065  * from a filesystem.
1066  */
1067 void
1068 inittodr(time_t base)
1069 {
1070 	unsigned long	sec, days;
1071 	int		year, month;
1072 	int		y, m;
1073 	struct timespec ts;
1074 
1075 	if (base) {
1076 		ts.tv_sec = base;
1077 		ts.tv_nsec = 0;
1078 		set_timeofday(&ts);
1079 	}
1080 
1081 	/* Look if we have a RTC present and the time is valid */
1082 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
1083 		goto wrong_time;
1084 
1085 	/* wait for time update to complete */
1086 	/* If RTCSA_TUP is zero, we have at least 244us before next update */
1087 	crit_enter();
1088 	while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
1089 		crit_exit();
1090 		crit_enter();
1091 	}
1092 
1093 	days = 0;
1094 #ifdef USE_RTC_CENTURY
1095 	year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
1096 #else
1097 	year = readrtc(RTC_YEAR) + 1900;
1098 	if (year < 1970)
1099 		year += 100;
1100 #endif
1101 	if (year < 1970) {
1102 		crit_exit();
1103 		goto wrong_time;
1104 	}
1105 	month = readrtc(RTC_MONTH);
1106 	for (m = 1; m < month; m++)
1107 		days += daysinmonth[m-1];
1108 	if ((month > 2) && LEAPYEAR(year))
1109 		days ++;
1110 	days += readrtc(RTC_DAY) - 1;
1111 	for (y = 1970; y < year; y++)
1112 		days += DAYSPERYEAR + LEAPYEAR(y);
1113 	sec = ((( days * 24 +
1114 		  readrtc(RTC_HRS)) * 60 +
1115 		  readrtc(RTC_MIN)) * 60 +
1116 		  readrtc(RTC_SEC));
1117 	/* sec now contains the number of seconds, since Jan 1 1970,
1118 	   in the local time zone */
1119 
1120 	sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1121 
1122 	y = (int)(time_second - sec);
1123 	if (y <= -2 || y >= 2) {
1124 		/* badly off, adjust it */
1125 		ts.tv_sec = sec;
1126 		ts.tv_nsec = 0;
1127 		set_timeofday(&ts);
1128 	}
1129 	rtc_loaded = 1;
1130 	crit_exit();
1131 	return;
1132 
1133 wrong_time:
1134 	kprintf("Invalid time in real time clock.\n");
1135 	kprintf("Check and reset the date immediately!\n");
1136 }
1137 
1138 /*
1139  * Write system time back to RTC
1140  */
1141 void
1142 resettodr(void)
1143 {
1144 	struct timeval tv;
1145 	unsigned long tm;
1146 	int m;
1147 	int y;
1148 
1149 	if (disable_rtc_set)
1150 		return;
1151 
1152 	microtime(&tv);
1153 	tm = tv.tv_sec;
1154 
1155 	crit_enter();
1156 	/* Disable RTC updates and interrupts. */
1157 	writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
1158 
1159 	/* Calculate local time to put in RTC */
1160 
1161 	tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1162 
1163 	writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60;	/* Write back Seconds */
1164 	writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60;	/* Write back Minutes */
1165 	writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24;	/* Write back Hours   */
1166 
1167 	/* We have now the days since 01-01-1970 in tm */
1168 	writertc(RTC_WDAY, (tm+4)%7);			/* Write back Weekday */
1169 	for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
1170 	     tm >= m;
1171 	     y++,      m = DAYSPERYEAR + LEAPYEAR(y))
1172 	     tm -= m;
1173 
1174 	/* Now we have the years in y and the day-of-the-year in tm */
1175 	writertc(RTC_YEAR, bin2bcd(y%100));		/* Write back Year    */
1176 #ifdef USE_RTC_CENTURY
1177 	writertc(RTC_CENTURY, bin2bcd(y/100));		/* ... and Century    */
1178 #endif
1179 	for (m = 0; ; m++) {
1180 		int ml;
1181 
1182 		ml = daysinmonth[m];
1183 		if (m == 1 && LEAPYEAR(y))
1184 			ml++;
1185 		if (tm < ml)
1186 			break;
1187 		tm -= ml;
1188 	}
1189 
1190 	writertc(RTC_MONTH, bin2bcd(m + 1));            /* Write back Month   */
1191 	writertc(RTC_DAY, bin2bcd(tm + 1));             /* Write back Month Day */
1192 
1193 	/* Reenable RTC updates and interrupts. */
1194 	writertc(RTC_STATUSB, rtc_statusb);
1195 	crit_exit();
1196 }
1197 
1198 static int
1199 i8254_ioapic_trial(int irq, struct cputimer_intr *cti)
1200 {
1201 	sysclock_t base;
1202 	long lastcnt;
1203 
1204 	/*
1205 	 * Following code assumes the 8254 is the cpu timer,
1206 	 * so make sure it is.
1207 	 */
1208 	/*KKASSERT(sys_cputimer == &i8254_cputimer); (tested by CuteLarva) */
1209 	KKASSERT(cti == &i8254_cputimer_intr);
1210 
1211 	lastcnt = get_interrupt_counter(irq, mycpuid);
1212 
1213 	/*
1214 	 * Force an 8254 Timer0 interrupt and wait 1/100s for
1215 	 * it to happen, then see if we got it.
1216 	 */
1217 	kprintf("IOAPIC: testing 8254 interrupt delivery...");
1218 
1219 	i8254_intr_reload(cti, sys_cputimer->fromus(2));
1220 	base = sys_cputimer->count();
1221 	while (sys_cputimer->count() - base < sys_cputimer->freq / 100)
1222 		; /* nothing */
1223 
1224 	if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0) {
1225 		kprintf(" failed\n");
1226 		return ENOENT;
1227 	} else {
1228 		kprintf(" success\n");
1229 	}
1230 	return 0;
1231 }
1232 
1233 /*
1234  * Start both clocks running.  DragonFly note: the stat clock is no longer
1235  * used.  Instead, 8254 based systimers are used for all major clock
1236  * interrupts.
1237  */
1238 static void
1239 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected)
1240 {
1241 	void *clkdesc = NULL;
1242 	int irq = 0, mixed_mode = 0, error;
1243 
1244 	KKASSERT(mycpuid == 0);
1245 
1246 	if (!selected && i8254_intr_disable)
1247 		goto nointr;
1248 
1249 	/*
1250 	 * The stat interrupt mask is different without the
1251 	 * statistics clock.  Also, don't set the interrupt
1252 	 * flag which would normally cause the RTC to generate
1253 	 * interrupts.
1254 	 */
1255 	rtc_statusb = RTCSB_24HR;
1256 
1257 	/* Finish initializing 8254 timer 0. */
1258 	if (ioapic_enable) {
1259 		irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE,
1260 			INTR_POLARITY_HIGH);
1261 		if (irq < 0) {
1262 mixed_mode_setup:
1263 			error = ioapic_conf_legacy_extint(0);
1264 			if (!error) {
1265 				irq = machintr_legacy_intr_find(0,
1266 				    INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
1267 				if (irq < 0)
1268 					error = ENOENT;
1269 			}
1270 
1271 			if (error) {
1272 				if (!selected) {
1273 					kprintf("IOAPIC: setup mixed mode for "
1274 						"irq 0 failed: %d\n", error);
1275 					goto nointr;
1276 				} else {
1277 					panic("IOAPIC: setup mixed mode for "
1278 					      "irq 0 failed: %d\n", error);
1279 				}
1280 			}
1281 			mixed_mode = 1;
1282 		}
1283 		clkdesc = register_int(irq, clkintr, NULL, "clk",
1284 				       NULL,
1285 				       INTR_EXCL | INTR_CLOCK |
1286 				       INTR_NOPOLL | INTR_MPSAFE |
1287 				       INTR_NOENTROPY, 0);
1288 	} else {
1289 		register_int(0, clkintr, NULL, "clk", NULL,
1290 			     INTR_EXCL | INTR_CLOCK |
1291 			     INTR_NOPOLL | INTR_MPSAFE |
1292 			     INTR_NOENTROPY, 0);
1293 	}
1294 
1295 	/* Initialize RTC. */
1296 	writertc(RTC_STATUSA, rtc_statusa);
1297 	writertc(RTC_STATUSB, RTCSB_24HR);
1298 
1299 	if (ioapic_enable) {
1300 		error = i8254_ioapic_trial(irq, cti);
1301 		if (error) {
1302 			if (mixed_mode) {
1303 				if (!selected) {
1304 					kprintf("IOAPIC: mixed mode for irq %d "
1305 						"trial failed: %d\n",
1306 						irq, error);
1307 					goto nointr;
1308 				} else {
1309 					panic("IOAPIC: mixed mode for irq %d "
1310 					      "trial failed: %d\n", irq, error);
1311 				}
1312 			} else {
1313 				kprintf("IOAPIC: warning 8254 is not connected "
1314 					"to the correct pin, try mixed mode\n");
1315 				unregister_int(clkdesc, 0);
1316 				goto mixed_mode_setup;
1317 			}
1318 		}
1319 	}
1320 	return;
1321 
1322 nointr:
1323 	i8254_nointr = 1; /* don't try to register again */
1324 	cputimer_intr_deregister(cti);
1325 }
1326 
1327 void
1328 setstatclockrate(int newhz)
1329 {
1330 	if (newhz == RTC_PROFRATE)
1331 		rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF;
1332 	else
1333 		rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
1334 	writertc(RTC_STATUSA, rtc_statusa);
1335 }
1336 
1337 #if 0
1338 static unsigned
1339 tsc_get_timecount(struct timecounter *tc)
1340 {
1341 	return (rdtsc());
1342 }
1343 #endif
1344 
1345 #ifdef KERN_TIMESTAMP
1346 #define KERN_TIMESTAMP_SIZE 16384
1347 static u_long tsc[KERN_TIMESTAMP_SIZE] ;
1348 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc,
1349 	sizeof(tsc), "LU", "Kernel timestamps");
1350 void
1351 _TSTMP(u_int32_t x)
1352 {
1353 	static int i;
1354 
1355 	tsc[i] = (u_int32_t)rdtsc();
1356 	tsc[i+1] = x;
1357 	i = i + 2;
1358 	if (i >= KERN_TIMESTAMP_SIZE)
1359 		i = 0;
1360 	tsc[i] = 0; /* mark last entry */
1361 }
1362 #endif /* KERN_TIMESTAMP */
1363 
1364 /*
1365  *
1366  */
1367 
1368 static int
1369 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS)
1370 {
1371     sysclock_t count;
1372     uint64_t tscval;
1373     char buf[32];
1374 
1375     crit_enter();
1376     if (sys_cputimer == &i8254_cputimer)
1377 	count = sys_cputimer->count();
1378     else
1379 	count = 0;
1380     if (tsc_present)
1381 	tscval = rdtsc();
1382     else
1383 	tscval = 0;
1384     crit_exit();
1385     ksnprintf(buf, sizeof(buf), "%016lx %016lx", count, tscval);
1386     return(SYSCTL_OUT(req, buf, strlen(buf) + 1));
1387 }
1388 
1389 struct tsc_mpsync_info {
1390 	volatile int		tsc_ready_cnt;
1391 	volatile int		tsc_done_cnt;
1392 	volatile int		tsc_command;
1393 	volatile int		unused01[5];
1394 	struct {
1395 		uint64_t	v;
1396 		uint64_t	unused02;
1397 	} tsc_saved[MAXCPU];
1398 } __cachealign;
1399 
1400 #if 0
1401 static void
1402 tsc_mpsync_test_loop(struct tsc_mpsync_thr *info)
1403 {
1404 	struct globaldata *gd = mycpu;
1405 	tsc_uclock_t test_end, test_begin;
1406 	u_int i;
1407 
1408 	if (bootverbose) {
1409 		kprintf("cpu%d: TSC testing MP synchronization ...\n",
1410 		    gd->gd_cpuid);
1411 	}
1412 
1413 	test_begin = rdtsc_ordered();
1414 	/* Run test for 100ms */
1415 	test_end = test_begin + (tsc_frequency / 10);
1416 
1417 	arg->tsc_mpsync = 1;
1418 	arg->tsc_target = test_begin;
1419 
1420 #define TSC_TEST_TRYMAX		1000000	/* Make sure we could stop */
1421 #define TSC_TEST_TRYMIN		50000
1422 
1423 	for (i = 0; i < TSC_TEST_TRYMAX; ++i) {
1424 		struct lwkt_cpusync cs;
1425 
1426 		crit_enter();
1427 		lwkt_cpusync_init(&cs, gd->gd_other_cpus,
1428 		    tsc_mpsync_test_remote, arg);
1429 		lwkt_cpusync_interlock(&cs);
1430 		cpu_pause();
1431 		arg->tsc_target = rdtsc_ordered();
1432 		cpu_mfence();
1433 		lwkt_cpusync_deinterlock(&cs);
1434 		crit_exit();
1435 		cpu_pause();
1436 
1437 		if (!arg->tsc_mpsync) {
1438 			kprintf("cpu%d: TSC is not MP synchronized @%u\n",
1439 			    gd->gd_cpuid, i);
1440 			break;
1441 		}
1442 		if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN)
1443 			break;
1444 	}
1445 
1446 #undef TSC_TEST_TRYMIN
1447 #undef TSC_TEST_TRYMAX
1448 
1449 	if (arg->tsc_target == test_begin) {
1450 		kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid);
1451 		/* XXX disable TSC? */
1452 		tsc_invariant = 0;
1453 		arg->tsc_mpsync = 0;
1454 		return;
1455 	}
1456 
1457 	if (arg->tsc_mpsync && bootverbose) {
1458 		kprintf("cpu%d: TSC is MP synchronized after %u tries\n",
1459 		    gd->gd_cpuid, i);
1460 	}
1461 }
1462 
1463 #endif
1464 
1465 #define TSC_TEST_COUNT		50000
1466 
1467 static void
1468 tsc_mpsync_ap_thread(void *xinfo)
1469 {
1470 	struct tsc_mpsync_info *info = xinfo;
1471 	int cpu = mycpuid;
1472 	int i;
1473 
1474 	/*
1475 	 * Tell main loop that we are ready and wait for initiation
1476 	 */
1477 	atomic_add_int(&info->tsc_ready_cnt, 1);
1478 	while (info->tsc_command == 0) {
1479 		lwkt_force_switch();
1480 	}
1481 
1482 	/*
1483 	 * Run test for 10000 loops or until tsc_done_cnt != 0 (another
1484 	 * cpu has finished its test), then increment done.
1485 	 */
1486 	crit_enter();
1487 	for (i = 0; i < TSC_TEST_COUNT && info->tsc_done_cnt == 0; ++i) {
1488 		info->tsc_saved[cpu].v = rdtsc_ordered();
1489 	}
1490 	crit_exit();
1491 	atomic_add_int(&info->tsc_done_cnt, 1);
1492 
1493 	lwkt_exit();
1494 }
1495 
1496 static void
1497 tsc_mpsync_test(void)
1498 {
1499 	int cpu;
1500 	int try;
1501 
1502 	if (!tsc_invariant) {
1503 		/* Not even invariant TSC */
1504 		return;
1505 	}
1506 
1507 	if (ncpus == 1) {
1508 		/* Only one CPU */
1509 		tsc_mpsync = 1;
1510 		return;
1511 	}
1512 
1513 	/*
1514 	 * Forcing can be used w/qemu to reduce contention
1515 	 */
1516 	TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync);
1517 
1518 	if (tsc_mpsync == 0) {
1519 		switch (cpu_vendor_id) {
1520 		case CPU_VENDOR_INTEL:
1521 			/*
1522 			 * Intel probably works
1523 			 */
1524 			break;
1525 
1526 		case CPU_VENDOR_AMD:
1527 			/*
1528 			 * For AMD 15h and 16h (i.e. The Bulldozer and Jaguar
1529 			 * architectures) we have to watch out for
1530 			 * Erratum 778:
1531 			 *     "Processor Core Time Stamp Counters May
1532 			 *      Experience Drift"
1533 			 * This Erratum is only listed for cpus in Family
1534 			 * 15h < Model 30h and for 16h < Model 30h.
1535 			 *
1536 			 * AMD < Bulldozer probably doesn't work
1537 			 */
1538 			if (CPUID_TO_FAMILY(cpu_id) == 0x15 ||
1539 			    CPUID_TO_FAMILY(cpu_id) == 0x16) {
1540 				if (CPUID_TO_MODEL(cpu_id) < 0x30)
1541 					return;
1542 			} else if (CPUID_TO_FAMILY(cpu_id) < 0x17) {
1543 				return;
1544 			}
1545 			break;
1546 
1547 		default:
1548 			/* probably won't work */
1549 			return;
1550 		}
1551 	} else if (tsc_mpsync < 0) {
1552 		kprintf("TSC MP synchronization test is disabled\n");
1553 		tsc_mpsync = 0;
1554 		return;
1555 	}
1556 
1557 	/*
1558 	 * Test even if forced to 1 above.  If forced, we will use the TSC
1559 	 * even if the test fails.  (set forced to -1 to disable entirely).
1560 	 */
1561 	kprintf("TSC testing MP synchronization ...\n");
1562 
1563 	/*
1564 	 * Test TSC MP synchronization on APs.  Try up to 4 times.
1565 	 */
1566 	for (try = 0; try < 4; ++try) {
1567 		struct tsc_mpsync_info info;
1568 		uint64_t last;
1569 		int64_t xdelta;
1570 		int64_t delta;
1571 
1572 		bzero(&info, sizeof(info));
1573 
1574 		for (cpu = 0; cpu < ncpus; ++cpu) {
1575 			thread_t td;
1576 			lwkt_create(tsc_mpsync_ap_thread, &info, &td,
1577 				    NULL, TDF_NOSTART, cpu,
1578 				    "tsc mpsync %d", cpu);
1579 			lwkt_setpri_initial(td, curthread->td_pri);
1580 			lwkt_schedule(td);
1581 		}
1582 		while (info.tsc_ready_cnt != ncpus)
1583 			lwkt_force_switch();
1584 
1585 		/*
1586 		 * All threads are ready, start the test and wait for
1587 		 * completion.
1588 		 */
1589 		info.tsc_command = 1;
1590 		while (info.tsc_done_cnt != ncpus)
1591 			lwkt_force_switch();
1592 
1593 		/*
1594 		 * Process results
1595 		 */
1596 		last = info.tsc_saved[0].v;
1597 		delta = 0;
1598 		for (cpu = 0; cpu < ncpus; ++cpu) {
1599 			xdelta = (int64_t)(info.tsc_saved[cpu].v - last);
1600 			last = info.tsc_saved[cpu].v;
1601 			if (xdelta < 0)
1602 				xdelta = -xdelta;
1603 			delta += xdelta;
1604 
1605 		}
1606 
1607 		/*
1608 		 * Result from attempt.  If its too wild just stop now.
1609 		 * Also break out if we succeed, no need to try further.
1610 		 */
1611 		kprintf("TSC MPSYNC TEST %jd %d -> %jd (10uS=%jd)\n",
1612 			delta, ncpus, delta / ncpus,
1613 			tsc_frequency / 100000);
1614 		if (delta / ncpus > tsc_frequency / 100)
1615 			break;
1616 		if (delta / ncpus < tsc_frequency / 100000) {
1617 			tsc_mpsync = 1;
1618 			break;
1619 		}
1620 	}
1621 
1622 	if (tsc_mpsync)
1623 		kprintf("TSC is MP synchronized\n");
1624 	else
1625 		kprintf("TSC is not MP synchronized\n");
1626 }
1627 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL);
1628 
1629 static SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254");
1630 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0,
1631 	    "frequency");
1632 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD,
1633 	    0, 0, hw_i8254_timestamp, "A", "");
1634 
1635 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD,
1636 	    &tsc_present, 0, "TSC Available");
1637 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD,
1638 	    &tsc_invariant, 0, "Invariant TSC");
1639 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD,
1640 	    &tsc_mpsync, 0, "TSC is synchronized across CPUs");
1641 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD,
1642 	    &tsc_frequency, 0, "TSC Frequency");
1643