xref: /dragonfly/sys/platform/pc64/isa/clock.c (revision 5b8cd87c)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * Copyright (c) 2008 The DragonFly Project.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * William Jolitz and Don Ahn.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
34  * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $
35  */
36 
37 /*
38  * Routines to handle clock hardware.
39  */
40 
41 /*
42  * inittodr, settodr and support routines written
43  * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at>
44  *
45  * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94
46  */
47 
48 #if 0
49 #include "opt_clock.h"
50 #endif
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/eventhandler.h>
55 #include <sys/time.h>
56 #include <sys/kernel.h>
57 #include <sys/bus.h>
58 #include <sys/sysctl.h>
59 #include <sys/cons.h>
60 #include <sys/kbio.h>
61 #include <sys/systimer.h>
62 #include <sys/globaldata.h>
63 #include <sys/machintr.h>
64 #include <sys/interrupt.h>
65 
66 #include <sys/thread2.h>
67 
68 #include <machine/clock.h>
69 #include <machine/cputypes.h>
70 #include <machine/frame.h>
71 #include <machine/ipl.h>
72 #include <machine/limits.h>
73 #include <machine/md_var.h>
74 #include <machine/psl.h>
75 #include <machine/segments.h>
76 #include <machine/smp.h>
77 #include <machine/specialreg.h>
78 #include <machine/intr_machdep.h>
79 
80 #include <machine_base/apic/ioapic.h>
81 #include <machine_base/apic/ioapic_abi.h>
82 #include <machine_base/icu/icu.h>
83 #include <bus/isa/isa.h>
84 #include <bus/isa/rtc.h>
85 #include <machine_base/isa/timerreg.h>
86 
87 SET_DECLARE(timecounter_init_set, const timecounter_init_t);
88 TIMECOUNTER_INIT(placeholder, NULL);
89 
90 static void i8254_restore(void);
91 static void resettodr_on_shutdown(void *arg __unused);
92 
93 /*
94  * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
95  * can use a simple formula for leap years.
96  */
97 #define	LEAPYEAR(y) ((u_int)(y) % 4 == 0)
98 #define DAYSPERYEAR   (31+28+31+30+31+30+31+31+30+31+30+31)
99 
100 #ifndef TIMER_FREQ
101 #define TIMER_FREQ   1193182
102 #endif
103 
104 static uint8_t i8254_walltimer_sel;
105 static uint16_t i8254_walltimer_cntr;
106 
107 int	adjkerntz;		/* local offset from GMT in seconds */
108 int	disable_rtc_set;	/* disable resettodr() if != 0 */
109 int	tsc_present;
110 int	tsc_invariant;
111 int	tsc_mpsync;
112 int	wall_cmos_clock;	/* wall CMOS clock assumed if != 0 */
113 int	timer0_running;
114 tsc_uclock_t tsc_frequency;
115 tsc_uclock_t tsc_oneus_approx;	/* always at least 1, approx only */
116 
117 enum tstate { RELEASED, ACQUIRED };
118 enum tstate timer0_state;
119 enum tstate timer1_state;
120 enum tstate timer2_state;
121 
122 int	i8254_cputimer_disable;	/* No need to initialize i8254 cputimer. */
123 
124 static	int	beeping = 0;
125 static	const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
126 static	u_char	rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
127 static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
128 static  int	rtc_loaded;
129 
130 static	sysclock_t i8254_cputimer_div;
131 
132 static int i8254_nointr;
133 static int i8254_intr_disable = 1;
134 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable);
135 
136 static int calibrate_timers_with_rtc = 0;
137 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc);
138 
139 static int calibrate_tsc_fast = 1;
140 TUNABLE_INT("hw.calibrate_tsc_fast", &calibrate_tsc_fast);
141 
142 static int calibrate_test;
143 TUNABLE_INT("hw.tsc_calibrate_test", &calibrate_test);
144 
145 static struct callout sysbeepstop_ch;
146 
147 static sysclock_t i8254_cputimer_count(void);
148 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last);
149 static void i8254_cputimer_destruct(struct cputimer *cputimer);
150 
151 static struct cputimer	i8254_cputimer = {
152     .next		= SLIST_ENTRY_INITIALIZER,
153     .name		= "i8254",
154     .pri		= CPUTIMER_PRI_8254,
155     .type		= 0,	/* determined later */
156     .count		= i8254_cputimer_count,
157     .fromhz		= cputimer_default_fromhz,
158     .fromus		= cputimer_default_fromus,
159     .construct		= i8254_cputimer_construct,
160     .destruct		= i8254_cputimer_destruct,
161     .freq		= TIMER_FREQ
162 };
163 
164 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t);
165 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *);
166 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t);
167 
168 static struct cputimer_intr i8254_cputimer_intr = {
169     .freq = TIMER_FREQ,
170     .reload = i8254_intr_reload,
171     .enable = cputimer_intr_default_enable,
172     .config = i8254_intr_config,
173     .restart = cputimer_intr_default_restart,
174     .pmfixup = cputimer_intr_default_pmfixup,
175     .initclock = i8254_intr_initclock,
176     .pcpuhand = NULL,
177     .next = SLIST_ENTRY_INITIALIZER,
178     .name = "i8254",
179     .type = CPUTIMER_INTR_8254,
180     .prio = CPUTIMER_INTR_PRIO_8254,
181     .caps = CPUTIMER_INTR_CAP_PS,
182     .priv = NULL
183 };
184 
185 /*
186  * Use this to lwkt_switch() when the scheduler clock is not
187  * yet running, otherwise lwkt_switch() won't do anything.
188  * XXX needs cleaning up in lwkt_thread.c
189  */
190 static void
191 lwkt_force_switch(void)
192 {
193 	crit_enter();
194 	lwkt_schedulerclock(curthread);
195 	crit_exit();
196 	lwkt_switch();
197 }
198 
199 /*
200  * timer0 clock interrupt.  Timer0 is in one-shot mode and has stopped
201  * counting as of this interrupt.  We use timer1 in free-running mode (not
202  * generating any interrupts) as our main counter.  Each cpu has timeouts
203  * pending.
204  *
205  * This code is INTR_MPSAFE and may be called without the BGL held.
206  */
207 static void
208 clkintr(void *dummy, void *frame_arg)
209 {
210 	static sysclock_t sysclock_count;	/* NOTE! Must be static */
211 	struct globaldata *gd = mycpu;
212 	struct globaldata *gscan;
213 	int n;
214 
215 	/*
216 	 * SWSTROBE mode is a one-shot, the timer is no longer running
217 	 */
218 	timer0_running = 0;
219 
220 	/*
221 	 * XXX the dispatcher needs work.  right now we call systimer_intr()
222 	 * directly or via IPI for any cpu with systimers queued, which is
223 	 * usually *ALL* of them.  We need to use the LAPIC timer for this.
224 	 */
225 	sysclock_count = sys_cputimer->count();
226 	for (n = 0; n < ncpus; ++n) {
227 	    gscan = globaldata_find(n);
228 	    if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL)
229 		continue;
230 	    if (gscan != gd) {
231 		lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr,
232 				&sysclock_count, 1);
233 	    } else {
234 		systimer_intr(&sysclock_count, 0, frame_arg);
235 	    }
236 	}
237 }
238 
239 
240 /*
241  * NOTE! not MP safe.
242  */
243 int
244 acquire_timer2(int mode)
245 {
246 	if (timer2_state != RELEASED)
247 		return (-1);
248 	timer2_state = ACQUIRED;
249 
250 	/*
251 	 * This access to the timer registers is as atomic as possible
252 	 * because it is a single instruction.  We could do better if we
253 	 * knew the rate.
254 	 */
255 	outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f));
256 	return (0);
257 }
258 
259 int
260 release_timer2(void)
261 {
262 	if (timer2_state != ACQUIRED)
263 		return (-1);
264 	outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT);
265 	timer2_state = RELEASED;
266 	return (0);
267 }
268 
269 #include "opt_ddb.h"
270 #ifdef DDB
271 #include <ddb/ddb.h>
272 
273 DB_SHOW_COMMAND(rtc, rtc)
274 {
275 	kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n",
276 	       rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY),
277 	       rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC),
278 	       rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR));
279 }
280 #endif /* DDB */
281 
282 /*
283  * Return the current cpu timer count as a 32 bit integer.
284  */
285 static
286 sysclock_t
287 i8254_cputimer_count(void)
288 {
289 	static uint16_t cputimer_last;
290 	uint16_t count;
291 	sysclock_t ret;
292 
293 	clock_lock();
294 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH);
295 	count = (uint8_t)inb(i8254_walltimer_cntr);	/* get countdown */
296 	count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8);
297 	count = -count;					/* -> countup */
298 	if (count < cputimer_last)			/* rollover */
299 		i8254_cputimer.base += 0x00010000U;
300 	ret = i8254_cputimer.base | count;
301 	cputimer_last = count;
302 	clock_unlock();
303 
304 	return(ret);
305 }
306 
307 /*
308  * This function is called whenever the system timebase changes, allowing
309  * us to calculate what is needed to convert a system timebase tick
310  * into an 8254 tick for the interrupt timer.  If we can convert to a
311  * simple shift, multiplication, or division, we do so.  Otherwise 64
312  * bit arithmatic is required every time the interrupt timer is reloaded.
313  */
314 static void
315 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer)
316 {
317     sysclock_t freq;
318     sysclock_t div;
319 
320     /*
321      * Will a simple divide do the trick?
322      */
323     div = (timer->freq + (cti->freq / 2)) / cti->freq;
324     freq = cti->freq * div;
325 
326     if (freq >= timer->freq - 1 && freq <= timer->freq + 1)
327 	i8254_cputimer_div = div;
328     else
329 	i8254_cputimer_div = 0;
330 }
331 
332 /*
333  * Reload for the next timeout.  It is possible for the reload value
334  * to be 0 or negative, indicating that an immediate timer interrupt
335  * is desired.  For now make the minimum 2 ticks.
336  *
337  * We may have to convert from the system timebase to the 8254 timebase.
338  */
339 static void
340 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
341 {
342     uint16_t count;
343 
344     if (i8254_cputimer_div)
345 	reload /= i8254_cputimer_div;
346     else
347 	reload = muldivu64(reload, cti->freq, sys_cputimer->freq);
348 
349     if (reload < 2)
350 	reload = 2;		/* minimum count */
351     if (reload > 0xFFFF)
352 	reload = 0xFFFF;	/* almost full count (0 is full count) */
353 
354     clock_lock();
355     if (timer0_running) {
356 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);	/* count-down timer */
357 	count = (uint8_t)inb(TIMER_CNTR0);		/* lsb */
358 	count |= ((uint8_t)inb(TIMER_CNTR0) << 8);	/* msb */
359 	if (reload < count) {
360 	    outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
361 	    outb(TIMER_CNTR0, (uint8_t)reload); 	/* lsb */
362 	    outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
363 	}
364     } else {
365 	timer0_running = 1;
366 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
367 	outb(TIMER_CNTR0, (uint8_t)reload); 		/* lsb */
368 	outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
369     }
370     clock_unlock();
371 }
372 
373 /*
374  * DELAY(usec)	     - Spin for the specified number of microseconds.
375  * DRIVERSLEEP(usec) - Spin for the specified number of microseconds,
376  *		       but do a thread switch in the loop
377  *
378  * Relies on timer 1 counting down from (cputimer_freq / hz)
379  * Note: timer had better have been programmed before this is first used!
380  */
381 static void
382 DODELAY(int n, int doswitch)
383 {
384 	ssysclock_t delta, ticks_left;
385 	sysclock_t prev_tick, tick;
386 
387 #ifdef DELAYDEBUG
388 	int getit_calls = 1;
389 	int n1;
390 	static int state = 0;
391 
392 	if (state == 0) {
393 		state = 1;
394 		for (n1 = 1; n1 <= 10000000; n1 *= 10)
395 			DELAY(n1);
396 		state = 2;
397 	}
398 	if (state == 1)
399 		kprintf("DELAY(%d)...", n);
400 #endif
401 	/*
402 	 * Guard against the timer being uninitialized if we are called
403 	 * early for console i/o.
404 	 */
405 	if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
406 		i8254_restore();
407 
408 	/*
409 	 * Read the counter first, so that the rest of the setup overhead is
410 	 * counted.  Then calculate the number of hardware timer ticks
411 	 * required, rounding up to be sure we delay at least the requested
412 	 * number of microseconds.
413 	 */
414 	prev_tick = sys_cputimer->count();
415 	ticks_left = muldivu64(n, sys_cputimer->freq + 999999, 1000000);
416 
417 	/*
418 	 * Loop until done.
419 	 */
420 	while (ticks_left > 0) {
421 		tick = sys_cputimer->count();
422 #ifdef DELAYDEBUG
423 		++getit_calls;
424 #endif
425 		delta = tick - prev_tick;
426 		prev_tick = tick;
427 		if (delta < 0)
428 			delta = 0;
429 		ticks_left -= delta;
430 		if (doswitch && ticks_left > 0)
431 			lwkt_switch();
432 		cpu_pause();
433 	}
434 #ifdef DELAYDEBUG
435 	if (state == 1)
436 		kprintf(" %d calls to getit() at %d usec each\n",
437 		       getit_calls, (n + 5) / getit_calls);
438 #endif
439 }
440 
441 /*
442  * DELAY() never switches.
443  */
444 void
445 DELAY(int n)
446 {
447 	DODELAY(n, 0);
448 }
449 
450 /*
451  * Returns non-zero if the specified time period has elapsed.  Call
452  * first with last_clock set to 0.
453  */
454 int
455 CHECKTIMEOUT(TOTALDELAY *tdd)
456 {
457 	sysclock_t delta;
458 	int us;
459 
460 	if (tdd->started == 0) {
461 		if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
462 			i8254_restore();
463 		tdd->last_clock = sys_cputimer->count();
464 		tdd->started = 1;
465 		return(0);
466 	}
467 	delta = sys_cputimer->count() - tdd->last_clock;
468 	us = muldivu64(delta, 1000000, sys_cputimer->freq);
469 	tdd->last_clock += muldivu64(us, sys_cputimer->freq, 1000000);
470 	tdd->us -= us;
471 
472 	return (tdd->us < 0);
473 }
474 
475 
476 /*
477  * DRIVERSLEEP() does not switch if called with a spinlock held or
478  * from a hard interrupt.
479  */
480 void
481 DRIVERSLEEP(int usec)
482 {
483 	globaldata_t gd = mycpu;
484 
485 	if (gd->gd_intr_nesting_level || gd->gd_spinlocks) {
486 		DODELAY(usec, 0);
487 	} else {
488 		DODELAY(usec, 1);
489 	}
490 }
491 
492 static void
493 sysbeepstop(void *chan)
494 {
495 	outb(IO_PPI, inb(IO_PPI)&0xFC);	/* disable counter2 output to speaker */
496 	beeping = 0;
497 	release_timer2();
498 }
499 
500 int
501 sysbeep(int pitch, int period)
502 {
503 	if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
504 		return(-1);
505 	if (sysbeep_enable == 0)
506 		return(-1);
507 	/*
508 	 * Nobody else is using timer2, we do not need the clock lock
509 	 */
510 	outb(TIMER_CNTR2, pitch);
511 	outb(TIMER_CNTR2, (pitch>>8));
512 	if (!beeping) {
513 		/* enable counter2 output to speaker */
514 		outb(IO_PPI, inb(IO_PPI) | 3);
515 		beeping = period;
516 		callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL);
517 	}
518 	return (0);
519 }
520 
521 /*
522  * RTC support routines
523  */
524 
525 int
526 rtcin(int reg)
527 {
528 	u_char val;
529 
530 	crit_enter();
531 	outb(IO_RTC, reg);
532 	inb(0x84);
533 	val = inb(IO_RTC + 1);
534 	inb(0x84);
535 	crit_exit();
536 	return (val);
537 }
538 
539 static __inline void
540 writertc(u_char reg, u_char val)
541 {
542 	crit_enter();
543 	inb(0x84);
544 	outb(IO_RTC, reg);
545 	inb(0x84);
546 	outb(IO_RTC + 1, val);
547 	inb(0x84);		/* XXX work around wrong order in rtcin() */
548 	crit_exit();
549 }
550 
551 static __inline int
552 readrtc(int port)
553 {
554 	return(bcd2bin(rtcin(port)));
555 }
556 
557 static u_int
558 calibrate_clocks(void)
559 {
560 	tsc_uclock_t old_tsc;
561 	sysclock_t tot_count;
562 	sysclock_t count, prev_count;
563 	int sec, start_sec, timeout;
564 
565 	if (bootverbose)
566 	        kprintf("Calibrating clock(s) ...\n");
567 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
568 		goto fail;
569 	timeout = 100000000;
570 
571 	/* Read the mc146818A seconds counter. */
572 	for (;;) {
573 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
574 			sec = rtcin(RTC_SEC);
575 			break;
576 		}
577 		if (--timeout == 0)
578 			goto fail;
579 	}
580 
581 	/* Wait for the mC146818A seconds counter to change. */
582 	start_sec = sec;
583 	for (;;) {
584 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
585 			sec = rtcin(RTC_SEC);
586 			if (sec != start_sec)
587 				break;
588 		}
589 		if (--timeout == 0)
590 			goto fail;
591 	}
592 
593 	/* Start keeping track of the i8254 counter. */
594 	prev_count = sys_cputimer->count();
595 	tot_count = 0;
596 
597 	if (tsc_present)
598 		old_tsc = rdtsc();
599 	else
600 		old_tsc = 0;		/* shut up gcc */
601 
602 	/*
603 	 * Wait for the mc146818A seconds counter to change.  Read the i8254
604 	 * counter for each iteration since this is convenient and only
605 	 * costs a few usec of inaccuracy. The timing of the final reads
606 	 * of the counters almost matches the timing of the initial reads,
607 	 * so the main cause of inaccuracy is the varying latency from
608 	 * inside getit() or rtcin(RTC_STATUSA) to the beginning of the
609 	 * rtcin(RTC_SEC) that returns a changed seconds count.  The
610 	 * maximum inaccuracy from this cause is < 10 usec on 486's.
611 	 */
612 	start_sec = sec;
613 	for (;;) {
614 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP))
615 			sec = rtcin(RTC_SEC);
616 		count = sys_cputimer->count();
617 		tot_count += (sysclock_t)(count - prev_count);
618 		prev_count = count;
619 		if (sec != start_sec)
620 			break;
621 		if (--timeout == 0)
622 			goto fail;
623 	}
624 
625 	/*
626 	 * Read the cpu cycle counter.  The timing considerations are
627 	 * similar to those for the i8254 clock.
628 	 */
629 	if (tsc_present) {
630 		tsc_frequency = rdtsc() - old_tsc;
631 		if (bootverbose) {
632 			kprintf("TSC clock: %jd Hz (Method A)\n",
633 			    (intmax_t)tsc_frequency);
634 		}
635 	}
636 	tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
637 
638 	kprintf("i8254 clock: %lu Hz\n", tot_count);
639 	return (tot_count);
640 
641 fail:
642 	kprintf("failed, using default i8254 clock of %lu Hz\n",
643 		i8254_cputimer.freq);
644 	return (i8254_cputimer.freq);
645 }
646 
647 static void
648 i8254_restore(void)
649 {
650 	timer0_state = ACQUIRED;
651 
652 	clock_lock();
653 
654 	/*
655 	 * Timer0 is our fine-grained variable clock interrupt
656 	 */
657 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
658 	outb(TIMER_CNTR0, 2);	/* lsb */
659 	outb(TIMER_CNTR0, 0);	/* msb */
660 	clock_unlock();
661 
662 	if (!i8254_nointr) {
663 		cputimer_intr_register(&i8254_cputimer_intr);
664 		cputimer_intr_select(&i8254_cputimer_intr, 0);
665 	}
666 
667 	/*
668 	 * Timer1 or timer2 is our free-running clock, but only if another
669 	 * has not been selected.
670 	 */
671 	cputimer_register(&i8254_cputimer);
672 	cputimer_select(&i8254_cputimer, 0);
673 }
674 
675 static void
676 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
677 {
678  	int which;
679 
680 	/*
681 	 * Should we use timer 1 or timer 2 ?
682 	 */
683 	which = 0;
684 	TUNABLE_INT_FETCH("hw.i8254.walltimer", &which);
685 	if (which != 1 && which != 2)
686 		which = 2;
687 
688 	switch(which) {
689 	case 1:
690 		timer->name = "i8254_timer1";
691 		timer->type = CPUTIMER_8254_SEL1;
692 		i8254_walltimer_sel = TIMER_SEL1;
693 		i8254_walltimer_cntr = TIMER_CNTR1;
694 		timer1_state = ACQUIRED;
695 		break;
696 	case 2:
697 		timer->name = "i8254_timer2";
698 		timer->type = CPUTIMER_8254_SEL2;
699 		i8254_walltimer_sel = TIMER_SEL2;
700 		i8254_walltimer_cntr = TIMER_CNTR2;
701 		timer2_state = ACQUIRED;
702 		break;
703 	}
704 
705 	timer->base = (oldclock + 0xFFFF) & 0xFFFFFFFFFFFF0000LU;
706 
707 	clock_lock();
708 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT);
709 	outb(i8254_walltimer_cntr, 0);	/* lsb */
710 	outb(i8254_walltimer_cntr, 0);	/* msb */
711 	outb(IO_PPI, inb(IO_PPI) | 1);	/* bit 0: enable gate, bit 1: spkr */
712 	clock_unlock();
713 }
714 
715 static void
716 i8254_cputimer_destruct(struct cputimer *timer)
717 {
718 	switch(timer->type) {
719 	case CPUTIMER_8254_SEL1:
720 	    timer1_state = RELEASED;
721 	    break;
722 	case CPUTIMER_8254_SEL2:
723 	    timer2_state = RELEASED;
724 	    break;
725 	default:
726 	    break;
727 	}
728 	timer->type = 0;
729 }
730 
731 static void
732 rtc_restore(void)
733 {
734 	/* Restore all of the RTC's "status" (actually, control) registers. */
735 	writertc(RTC_STATUSB, RTCSB_24HR);
736 	writertc(RTC_STATUSA, rtc_statusa);
737 	writertc(RTC_STATUSB, rtc_statusb);
738 }
739 
740 /*
741  * Restore all the timers.
742  *
743  * This function is called to resynchronize our core timekeeping after a
744  * long halt, e.g. from apm_default_resume() and friends.  It is also
745  * called if after a BIOS call we have detected munging of the 8254.
746  * It is necessary because cputimer_count() counter's delta may have grown
747  * too large for nanouptime() and friends to handle, or (in the case of 8254
748  * munging) might cause the SYSTIMER code to prematurely trigger.
749  */
750 void
751 timer_restore(void)
752 {
753 	crit_enter();
754 	if (i8254_cputimer_disable == 0)
755 		i8254_restore();	/* restore timer_freq and hz */
756 	rtc_restore();			/* reenable RTC interrupts */
757 	crit_exit();
758 }
759 
760 #define MAX_MEASURE_RETRIES	100
761 
762 static u_int64_t
763 do_measure(u_int64_t timer_latency, u_int64_t *latency, sysclock_t *time,
764     int *retries)
765 {
766 	u_int64_t tsc1, tsc2;
767 	u_int64_t threshold;
768 	sysclock_t val;
769 	int cnt = 0;
770 
771 	do {
772 		if (cnt > MAX_MEASURE_RETRIES/2)
773 			threshold = timer_latency << 1;
774 		else
775 			threshold = timer_latency + (timer_latency >> 2);
776 
777 		cnt++;
778 		tsc1 = rdtsc_ordered();
779 		val = sys_cputimer->count();
780 		tsc2 = rdtsc_ordered();
781 	} while (timer_latency > 0 && cnt < MAX_MEASURE_RETRIES &&
782 	    tsc2 - tsc1 > threshold);
783 
784 	*retries = cnt - 1;
785 	*latency = tsc2 - tsc1;
786 	*time = val;
787 	return tsc1;
788 }
789 
790 static u_int64_t
791 do_calibrate_cputimer(u_int usecs, u_int64_t timer_latency)
792 {
793 	if (calibrate_tsc_fast) {
794 		u_int64_t old_tsc1, start_lat1, new_tsc1, end_lat1;
795 		u_int64_t old_tsc2, start_lat2, new_tsc2, end_lat2;
796 		u_int64_t freq1, freq2;
797 		sysclock_t start1, end1, start2, end2;
798 		int retries1, retries2, retries3, retries4;
799 
800 		DELAY(1000);
801 		old_tsc1 = do_measure(timer_latency, &start_lat1, &start1,
802 		    &retries1);
803 		DELAY(20000);
804 		old_tsc2 = do_measure(timer_latency, &start_lat2, &start2,
805 		    &retries2);
806 		DELAY(usecs);
807 		new_tsc1 = do_measure(timer_latency, &end_lat1, &end1,
808 		    &retries3);
809 		DELAY(20000);
810 		new_tsc2 = do_measure(timer_latency, &end_lat2, &end2,
811 		    &retries4);
812 
813 		old_tsc1 += start_lat1;
814 		old_tsc2 += start_lat2;
815 		freq1 = (new_tsc1 - old_tsc1) + (start_lat1 + end_lat1) / 2;
816 		freq2 = (new_tsc2 - old_tsc2) + (start_lat2 + end_lat2) / 2;
817 		end1 -= start1;
818 		end2 -= start2;
819 		/* This should in practice be safe from overflows. */
820 		freq1 = muldivu64(freq1, sys_cputimer->freq, end1);
821 		freq2 = muldivu64(freq2, sys_cputimer->freq, end2);
822 		if (calibrate_test && (retries1 > 0 || retries2 > 0)) {
823 			kprintf("%s: retries: %d, %d, %d, %d\n",
824 			    __func__, retries1, retries2, retries3, retries4);
825 		}
826 		if (calibrate_test) {
827 			kprintf("%s: freq1=%ju freq2=%ju avg=%ju\n",
828 			    __func__, freq1, freq2, (freq1 + freq2) / 2);
829 		}
830 		return (freq1 + freq2) / 2;
831 	} else {
832 		u_int64_t old_tsc, new_tsc;
833 		u_int64_t freq;
834 
835 		old_tsc = rdtsc_ordered();
836 		DELAY(usecs);
837 		new_tsc = rdtsc();
838 		freq = new_tsc - old_tsc;
839 		/* This should in practice be safe from overflows. */
840 		freq = (freq * 1000 * 1000) / usecs;
841 		return freq;
842 	}
843 }
844 
845 /*
846  * Initialize 8254 timer 0 early so that it can be used in DELAY().
847  */
848 void
849 startrtclock(void)
850 {
851 	const timecounter_init_t **list;
852 	sysclock_t delta, freq;
853 
854 	callout_init_mp(&sysbeepstop_ch);
855 
856 	/*
857 	 * Can we use the TSC?
858 	 *
859 	 * NOTE: If running under qemu, probably a good idea to force the
860 	 *	 TSC because we are not likely to detect it as being
861 	 *	 invariant or mpsyncd if you don't.  This will greatly
862 	 *	 reduce SMP contention.
863 	 */
864 	if (cpu_feature & CPUID_TSC) {
865 		tsc_present = 1;
866 		TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant);
867 
868 		if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
869 		     cpu_vendor_id == CPU_VENDOR_AMD) &&
870 		    cpu_exthigh >= 0x80000007) {
871 			u_int regs[4];
872 
873 			do_cpuid(0x80000007, regs);
874 			if (regs[3] & 0x100)
875 				tsc_invariant = 1;
876 		}
877 	} else {
878 		tsc_present = 0;
879 	}
880 
881 	/*
882 	 * Initial RTC state, don't do anything unexpected
883 	 */
884 	writertc(RTC_STATUSA, rtc_statusa);
885 	writertc(RTC_STATUSB, RTCSB_24HR);
886 
887 	SET_FOREACH(list, timecounter_init_set) {
888 		if ((*list)->configure != NULL)
889 			(*list)->configure();
890 	}
891 
892 	/*
893 	 * If tsc_frequency is already initialized now, and a flag is set
894 	 * that i8254 timer is unneeded, we are done.
895 	 */
896 	if (tsc_frequency != 0 && i8254_cputimer_disable != 0)
897 		goto done;
898 
899 	/*
900 	 * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to
901 	 * generate an interrupt, which we will ignore for now.
902 	 *
903 	 * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000
904 	 * (so it counts a full 2^16 and repeats).  We will use this timer
905 	 * for our counting.
906 	 */
907 	if (i8254_cputimer_disable == 0)
908 		i8254_restore();
909 
910 	kprintf("Using cputimer %s for TSC calibration\n", sys_cputimer->name);
911 
912 	/*
913 	 * When booting without verbose messages, it's pointless to run the
914 	 * calibrate_clocks() calibration code, when we don't use the
915 	 * results in any way. With bootverbose, we are at least printing
916 	 *  this information to the kernel log.
917 	 */
918 	if (i8254_cputimer_disable != 0 ||
919 	    (calibrate_timers_with_rtc == 0 && !bootverbose)) {
920 		goto skip_rtc_based;
921 	}
922 
923 	freq = calibrate_clocks();
924 #ifdef CLK_CALIBRATION_LOOP
925 	if (bootverbose) {
926 		int c;
927 
928 		cnpoll(TRUE);
929 		kprintf("Press a key on the console to "
930 			"abort clock calibration\n");
931 		while ((c = cncheckc()) == -1 || c == NOKEY)
932 			calibrate_clocks();
933 		cnpoll(FALSE);
934 	}
935 #endif
936 
937 	/*
938 	 * Use the calibrated i8254 frequency if it seems reasonable.
939 	 * Otherwise use the default, and don't use the calibrated i586
940 	 * frequency.
941 	 */
942 	delta = freq > i8254_cputimer.freq ?
943 		freq - i8254_cputimer.freq : i8254_cputimer.freq - freq;
944 	if (delta < i8254_cputimer.freq / 100) {
945 		if (calibrate_timers_with_rtc == 0) {
946 			kprintf(
947 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n");
948 			freq = i8254_cputimer.freq;
949 		}
950 		/*
951 		 * NOTE:
952 		 * Interrupt timer's freq must be adjusted
953 		 * before we change the cuptimer's frequency.
954 		 */
955 		i8254_cputimer_intr.freq = freq;
956 		cputimer_set_frequency(&i8254_cputimer, freq);
957 	} else {
958 		if (bootverbose)
959 			kprintf("%lu Hz differs from default of %lu Hz "
960 				"by more than 1%%\n",
961 			        freq, i8254_cputimer.freq);
962 		tsc_frequency = 0;
963 	}
964 
965 	if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) {
966 		kprintf("hw.calibrate_timers_with_rtc not "
967 			"set - using old calibration method\n");
968 		tsc_frequency = 0;
969 	}
970 
971 skip_rtc_based:
972 	if (tsc_present && tsc_frequency == 0) {
973 		u_int cnt;
974 		u_int64_t cputime_latency_tsc = 0, max = 0, min = 0;
975 		int i;
976 
977 		for (i = 0; i < 10; i++) {
978 			/* Warm up */
979 			(void)sys_cputimer->count();
980 		}
981 		for (i = 0; i < 100; i++) {
982 			u_int64_t old_tsc, new_tsc;
983 
984 			old_tsc = rdtsc_ordered();
985 			(void)sys_cputimer->count();
986 			new_tsc = rdtsc_ordered();
987 			cputime_latency_tsc += (new_tsc - old_tsc);
988 			if (max < (new_tsc - old_tsc))
989 				max = new_tsc - old_tsc;
990 			if (min == 0 || min > (new_tsc - old_tsc))
991 				min = new_tsc - old_tsc;
992 		}
993 		cputime_latency_tsc /= 100;
994 		kprintf(
995 		    "Timer latency (in TSC ticks): %lu min=%lu max=%lu\n",
996 		    cputime_latency_tsc, min, max);
997 		/* XXX Instead of this, properly filter out outliers. */
998 		cputime_latency_tsc = min;
999 
1000 		if (calibrate_test > 0) {
1001 			u_int64_t values[20], avg = 0;
1002 			for (i = 1; i <= 20; i++) {
1003 				u_int64_t freq;
1004 
1005 				freq = do_calibrate_cputimer(i * 100 * 1000,
1006 				    cputime_latency_tsc);
1007 				values[i - 1] = freq;
1008 			}
1009 			/* Compute an average TSC for the 1s to 2s delays. */
1010 			for (i = 10; i < 20; i++)
1011 				avg += values[i];
1012 			avg /= 10;
1013 			for (i = 0; i < 20; i++) {
1014 				kprintf("%ums: %lu (Diff from average: %ld)\n",
1015 				    (i + 1) * 100, values[i],
1016 				    (int64_t)(values[i] - avg));
1017 			}
1018 		}
1019 
1020 		if (calibrate_tsc_fast > 0) {
1021 			/* HPET would typically be >10MHz */
1022 			if (sys_cputimer->freq >= 10000000)
1023 				cnt = 200000;
1024 			else
1025 				cnt = 500000;
1026 		} else {
1027 			cnt = 1000000;
1028 		}
1029 
1030 		tsc_frequency = do_calibrate_cputimer(cnt, cputime_latency_tsc);
1031 		if (bootverbose && calibrate_timers_with_rtc) {
1032 			kprintf("TSC clock: %jd Hz (Method B)\n",
1033 			    (intmax_t)tsc_frequency);
1034 		}
1035 	}
1036 
1037 done:
1038 	if (tsc_present) {
1039 		kprintf("TSC%s clock: %jd Hz\n",
1040 		    tsc_invariant ? " invariant" : "",
1041 		    (intmax_t)tsc_frequency);
1042 	}
1043 	tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
1044 
1045 	EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown,
1046 			      NULL, SHUTDOWN_PRI_LAST);
1047 }
1048 
1049 /*
1050  * Sync the time of day back to the RTC on shutdown, but only if
1051  * we have already loaded it and have not crashed.
1052  */
1053 static void
1054 resettodr_on_shutdown(void *arg __unused)
1055 {
1056  	if (rtc_loaded && panicstr == NULL) {
1057 		resettodr();
1058 	}
1059 }
1060 
1061 /*
1062  * Initialize the time of day register, based on the time base which is, e.g.
1063  * from a filesystem.
1064  */
1065 void
1066 inittodr(time_t base)
1067 {
1068 	unsigned long	sec, days;
1069 	int		year, month;
1070 	int		y, m;
1071 	struct timespec ts;
1072 
1073 	if (base) {
1074 		ts.tv_sec = base;
1075 		ts.tv_nsec = 0;
1076 		set_timeofday(&ts);
1077 	}
1078 
1079 	/* Look if we have a RTC present and the time is valid */
1080 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
1081 		goto wrong_time;
1082 
1083 	/* wait for time update to complete */
1084 	/* If RTCSA_TUP is zero, we have at least 244us before next update */
1085 	crit_enter();
1086 	while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
1087 		crit_exit();
1088 		crit_enter();
1089 	}
1090 
1091 	days = 0;
1092 #ifdef USE_RTC_CENTURY
1093 	year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
1094 #else
1095 	year = readrtc(RTC_YEAR) + 1900;
1096 	if (year < 1970)
1097 		year += 100;
1098 #endif
1099 	if (year < 1970) {
1100 		crit_exit();
1101 		goto wrong_time;
1102 	}
1103 	month = readrtc(RTC_MONTH);
1104 	for (m = 1; m < month; m++)
1105 		days += daysinmonth[m-1];
1106 	if ((month > 2) && LEAPYEAR(year))
1107 		days ++;
1108 	days += readrtc(RTC_DAY) - 1;
1109 	for (y = 1970; y < year; y++)
1110 		days += DAYSPERYEAR + LEAPYEAR(y);
1111 	sec = ((( days * 24 +
1112 		  readrtc(RTC_HRS)) * 60 +
1113 		  readrtc(RTC_MIN)) * 60 +
1114 		  readrtc(RTC_SEC));
1115 	/* sec now contains the number of seconds, since Jan 1 1970,
1116 	   in the local time zone */
1117 
1118 	sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1119 
1120 	y = (int)(time_second - sec);
1121 	if (y <= -2 || y >= 2) {
1122 		/* badly off, adjust it */
1123 		ts.tv_sec = sec;
1124 		ts.tv_nsec = 0;
1125 		set_timeofday(&ts);
1126 	}
1127 	rtc_loaded = 1;
1128 	crit_exit();
1129 	return;
1130 
1131 wrong_time:
1132 	kprintf("Invalid time in real time clock.\n");
1133 	kprintf("Check and reset the date immediately!\n");
1134 }
1135 
1136 /*
1137  * Write system time back to RTC
1138  */
1139 void
1140 resettodr(void)
1141 {
1142 	struct timeval tv;
1143 	unsigned long tm;
1144 	int m;
1145 	int y;
1146 
1147 	if (disable_rtc_set)
1148 		return;
1149 
1150 	microtime(&tv);
1151 	tm = tv.tv_sec;
1152 
1153 	crit_enter();
1154 	/* Disable RTC updates and interrupts. */
1155 	writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
1156 
1157 	/* Calculate local time to put in RTC */
1158 
1159 	tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1160 
1161 	writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60;	/* Write back Seconds */
1162 	writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60;	/* Write back Minutes */
1163 	writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24;	/* Write back Hours   */
1164 
1165 	/* We have now the days since 01-01-1970 in tm */
1166 	writertc(RTC_WDAY, (tm+4)%7);			/* Write back Weekday */
1167 	for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
1168 	     tm >= m;
1169 	     y++,      m = DAYSPERYEAR + LEAPYEAR(y))
1170 	     tm -= m;
1171 
1172 	/* Now we have the years in y and the day-of-the-year in tm */
1173 	writertc(RTC_YEAR, bin2bcd(y%100));		/* Write back Year    */
1174 #ifdef USE_RTC_CENTURY
1175 	writertc(RTC_CENTURY, bin2bcd(y/100));		/* ... and Century    */
1176 #endif
1177 	for (m = 0; ; m++) {
1178 		int ml;
1179 
1180 		ml = daysinmonth[m];
1181 		if (m == 1 && LEAPYEAR(y))
1182 			ml++;
1183 		if (tm < ml)
1184 			break;
1185 		tm -= ml;
1186 	}
1187 
1188 	writertc(RTC_MONTH, bin2bcd(m + 1));            /* Write back Month   */
1189 	writertc(RTC_DAY, bin2bcd(tm + 1));             /* Write back Month Day */
1190 
1191 	/* Reenable RTC updates and interrupts. */
1192 	writertc(RTC_STATUSB, rtc_statusb);
1193 	crit_exit();
1194 }
1195 
1196 static int
1197 i8254_ioapic_trial(int irq, struct cputimer_intr *cti)
1198 {
1199 	sysclock_t base;
1200 	long lastcnt;
1201 
1202 	/*
1203 	 * Following code assumes the 8254 is the cpu timer,
1204 	 * so make sure it is.
1205 	 */
1206 	/*KKASSERT(sys_cputimer == &i8254_cputimer); (tested by CuteLarva) */
1207 	KKASSERT(cti == &i8254_cputimer_intr);
1208 
1209 	lastcnt = get_interrupt_counter(irq, mycpuid);
1210 
1211 	/*
1212 	 * Force an 8254 Timer0 interrupt and wait 1/100s for
1213 	 * it to happen, then see if we got it.
1214 	 */
1215 	kprintf("IOAPIC: testing 8254 interrupt delivery...");
1216 
1217 	i8254_intr_reload(cti, sys_cputimer->fromus(2));
1218 	base = sys_cputimer->count();
1219 	while (sys_cputimer->count() - base < sys_cputimer->freq / 100)
1220 		; /* nothing */
1221 
1222 	if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0) {
1223 		kprintf(" failed\n");
1224 		return ENOENT;
1225 	} else {
1226 		kprintf(" success\n");
1227 	}
1228 	return 0;
1229 }
1230 
1231 /*
1232  * Start both clocks running.  DragonFly note: the stat clock is no longer
1233  * used.  Instead, 8254 based systimers are used for all major clock
1234  * interrupts.
1235  */
1236 static void
1237 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected)
1238 {
1239 	void *clkdesc = NULL;
1240 	int irq = 0, mixed_mode = 0, error;
1241 
1242 	KKASSERT(mycpuid == 0);
1243 
1244 	if (!selected && i8254_intr_disable)
1245 		goto nointr;
1246 
1247 	/*
1248 	 * The stat interrupt mask is different without the
1249 	 * statistics clock.  Also, don't set the interrupt
1250 	 * flag which would normally cause the RTC to generate
1251 	 * interrupts.
1252 	 */
1253 	rtc_statusb = RTCSB_24HR;
1254 
1255 	/* Finish initializing 8254 timer 0. */
1256 	if (ioapic_enable) {
1257 		irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE,
1258 			INTR_POLARITY_HIGH);
1259 		if (irq < 0) {
1260 mixed_mode_setup:
1261 			error = ioapic_conf_legacy_extint(0);
1262 			if (!error) {
1263 				irq = machintr_legacy_intr_find(0,
1264 				    INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
1265 				if (irq < 0)
1266 					error = ENOENT;
1267 			}
1268 
1269 			if (error) {
1270 				if (!selected) {
1271 					kprintf("IOAPIC: setup mixed mode for "
1272 						"irq 0 failed: %d\n", error);
1273 					goto nointr;
1274 				} else {
1275 					panic("IOAPIC: setup mixed mode for "
1276 					      "irq 0 failed: %d\n", error);
1277 				}
1278 			}
1279 			mixed_mode = 1;
1280 		}
1281 		clkdesc = register_int(irq, clkintr, NULL, "clk",
1282 				       NULL,
1283 				       INTR_EXCL | INTR_CLOCK |
1284 				       INTR_NOPOLL | INTR_MPSAFE |
1285 				       INTR_NOENTROPY, 0);
1286 	} else {
1287 		register_int(0, clkintr, NULL, "clk", NULL,
1288 			     INTR_EXCL | INTR_CLOCK |
1289 			     INTR_NOPOLL | INTR_MPSAFE |
1290 			     INTR_NOENTROPY, 0);
1291 	}
1292 
1293 	/* Initialize RTC. */
1294 	writertc(RTC_STATUSA, rtc_statusa);
1295 	writertc(RTC_STATUSB, RTCSB_24HR);
1296 
1297 	if (ioapic_enable) {
1298 		error = i8254_ioapic_trial(irq, cti);
1299 		if (error) {
1300 			if (mixed_mode) {
1301 				if (!selected) {
1302 					kprintf("IOAPIC: mixed mode for irq %d "
1303 						"trial failed: %d\n",
1304 						irq, error);
1305 					goto nointr;
1306 				} else {
1307 					panic("IOAPIC: mixed mode for irq %d "
1308 					      "trial failed: %d\n", irq, error);
1309 				}
1310 			} else {
1311 				kprintf("IOAPIC: warning 8254 is not connected "
1312 					"to the correct pin, try mixed mode\n");
1313 				unregister_int(clkdesc, 0);
1314 				goto mixed_mode_setup;
1315 			}
1316 		}
1317 	}
1318 	return;
1319 
1320 nointr:
1321 	i8254_nointr = 1; /* don't try to register again */
1322 	cputimer_intr_deregister(cti);
1323 }
1324 
1325 void
1326 setstatclockrate(int newhz)
1327 {
1328 	if (newhz == RTC_PROFRATE)
1329 		rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF;
1330 	else
1331 		rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
1332 	writertc(RTC_STATUSA, rtc_statusa);
1333 }
1334 
1335 #if 0
1336 static unsigned
1337 tsc_get_timecount(struct timecounter *tc)
1338 {
1339 	return (rdtsc());
1340 }
1341 #endif
1342 
1343 #ifdef KERN_TIMESTAMP
1344 #define KERN_TIMESTAMP_SIZE 16384
1345 static u_long tsc[KERN_TIMESTAMP_SIZE] ;
1346 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc,
1347 	sizeof(tsc), "LU", "Kernel timestamps");
1348 void
1349 _TSTMP(u_int32_t x)
1350 {
1351 	static int i;
1352 
1353 	tsc[i] = (u_int32_t)rdtsc();
1354 	tsc[i+1] = x;
1355 	i = i + 2;
1356 	if (i >= KERN_TIMESTAMP_SIZE)
1357 		i = 0;
1358 	tsc[i] = 0; /* mark last entry */
1359 }
1360 #endif /* KERN_TIMESTAMP */
1361 
1362 /*
1363  *
1364  */
1365 
1366 static int
1367 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS)
1368 {
1369     sysclock_t count;
1370     uint64_t tscval;
1371     char buf[32];
1372 
1373     crit_enter();
1374     if (sys_cputimer == &i8254_cputimer)
1375 	count = sys_cputimer->count();
1376     else
1377 	count = 0;
1378     if (tsc_present)
1379 	tscval = rdtsc();
1380     else
1381 	tscval = 0;
1382     crit_exit();
1383     ksnprintf(buf, sizeof(buf), "%016lx %016lx", count, tscval);
1384     return(SYSCTL_OUT(req, buf, strlen(buf) + 1));
1385 }
1386 
1387 struct tsc_mpsync_info {
1388 	volatile int		tsc_ready_cnt;
1389 	volatile int		tsc_done_cnt;
1390 	volatile int		tsc_command;
1391 	volatile int		unused01[5];
1392 	struct {
1393 		uint64_t	v;
1394 		uint64_t	unused02;
1395 	} tsc_saved[MAXCPU];
1396 } __cachealign;
1397 
1398 #if 0
1399 static void
1400 tsc_mpsync_test_loop(struct tsc_mpsync_thr *info)
1401 {
1402 	struct globaldata *gd = mycpu;
1403 	tsc_uclock_t test_end, test_begin;
1404 	u_int i;
1405 
1406 	if (bootverbose) {
1407 		kprintf("cpu%d: TSC testing MP synchronization ...\n",
1408 		    gd->gd_cpuid);
1409 	}
1410 
1411 	test_begin = rdtsc_ordered();
1412 	/* Run test for 100ms */
1413 	test_end = test_begin + (tsc_frequency / 10);
1414 
1415 	arg->tsc_mpsync = 1;
1416 	arg->tsc_target = test_begin;
1417 
1418 #define TSC_TEST_TRYMAX		1000000	/* Make sure we could stop */
1419 #define TSC_TEST_TRYMIN		50000
1420 
1421 	for (i = 0; i < TSC_TEST_TRYMAX; ++i) {
1422 		struct lwkt_cpusync cs;
1423 
1424 		crit_enter();
1425 		lwkt_cpusync_init(&cs, gd->gd_other_cpus,
1426 		    tsc_mpsync_test_remote, arg);
1427 		lwkt_cpusync_interlock(&cs);
1428 		cpu_pause();
1429 		arg->tsc_target = rdtsc_ordered();
1430 		cpu_mfence();
1431 		lwkt_cpusync_deinterlock(&cs);
1432 		crit_exit();
1433 		cpu_pause();
1434 
1435 		if (!arg->tsc_mpsync) {
1436 			kprintf("cpu%d: TSC is not MP synchronized @%u\n",
1437 			    gd->gd_cpuid, i);
1438 			break;
1439 		}
1440 		if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN)
1441 			break;
1442 	}
1443 
1444 #undef TSC_TEST_TRYMIN
1445 #undef TSC_TEST_TRYMAX
1446 
1447 	if (arg->tsc_target == test_begin) {
1448 		kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid);
1449 		/* XXX disable TSC? */
1450 		tsc_invariant = 0;
1451 		arg->tsc_mpsync = 0;
1452 		return;
1453 	}
1454 
1455 	if (arg->tsc_mpsync && bootverbose) {
1456 		kprintf("cpu%d: TSC is MP synchronized after %u tries\n",
1457 		    gd->gd_cpuid, i);
1458 	}
1459 }
1460 
1461 #endif
1462 
1463 #define TSC_TEST_COUNT		50000
1464 
1465 static void
1466 tsc_mpsync_ap_thread(void *xinfo)
1467 {
1468 	struct tsc_mpsync_info *info = xinfo;
1469 	int cpu = mycpuid;
1470 	int i;
1471 
1472 	/*
1473 	 * Tell main loop that we are ready and wait for initiation
1474 	 */
1475 	atomic_add_int(&info->tsc_ready_cnt, 1);
1476 	while (info->tsc_command == 0) {
1477 		lwkt_force_switch();
1478 	}
1479 
1480 	/*
1481 	 * Run test for 10000 loops or until tsc_done_cnt != 0 (another
1482 	 * cpu has finished its test), then increment done.
1483 	 */
1484 	crit_enter();
1485 	for (i = 0; i < TSC_TEST_COUNT && info->tsc_done_cnt == 0; ++i) {
1486 		info->tsc_saved[cpu].v = rdtsc_ordered();
1487 	}
1488 	crit_exit();
1489 	atomic_add_int(&info->tsc_done_cnt, 1);
1490 
1491 	lwkt_exit();
1492 }
1493 
1494 static void
1495 tsc_mpsync_test(void)
1496 {
1497 	int cpu;
1498 	int try;
1499 
1500 	if (!tsc_invariant) {
1501 		/* Not even invariant TSC */
1502 		return;
1503 	}
1504 
1505 	if (ncpus == 1) {
1506 		/* Only one CPU */
1507 		tsc_mpsync = 1;
1508 		return;
1509 	}
1510 
1511 	/*
1512 	 * Forcing can be used w/qemu to reduce contention
1513 	 */
1514 	TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync);
1515 
1516 	if (tsc_mpsync == 0) {
1517 		switch (cpu_vendor_id) {
1518 		case CPU_VENDOR_INTEL:
1519 			/*
1520 			 * Intel probably works
1521 			 */
1522 			break;
1523 
1524 		case CPU_VENDOR_AMD:
1525 			/*
1526 			 * For AMD 15h and 16h (i.e. The Bulldozer and Jaguar
1527 			 * architectures) we have to watch out for
1528 			 * Erratum 778:
1529 			 *     "Processor Core Time Stamp Counters May
1530 			 *      Experience Drift"
1531 			 * This Erratum is only listed for cpus in Family
1532 			 * 15h < Model 30h and for 16h < Model 30h.
1533 			 *
1534 			 * AMD < Bulldozer probably doesn't work
1535 			 */
1536 			if (CPUID_TO_FAMILY(cpu_id) == 0x15 ||
1537 			    CPUID_TO_FAMILY(cpu_id) == 0x16) {
1538 				if (CPUID_TO_MODEL(cpu_id) < 0x30)
1539 					return;
1540 			} else if (CPUID_TO_FAMILY(cpu_id) < 0x17) {
1541 				return;
1542 			}
1543 			break;
1544 
1545 		default:
1546 			/* probably won't work */
1547 			return;
1548 		}
1549 	} else if (tsc_mpsync < 0) {
1550 		kprintf("TSC MP synchronization test is disabled\n");
1551 		tsc_mpsync = 0;
1552 		return;
1553 	}
1554 
1555 	/*
1556 	 * Test even if forced to 1 above.  If forced, we will use the TSC
1557 	 * even if the test fails.  (set forced to -1 to disable entirely).
1558 	 */
1559 	kprintf("TSC testing MP synchronization ...\n");
1560 
1561 	/*
1562 	 * Test TSC MP synchronization on APs.  Try up to 4 times.
1563 	 */
1564 	for (try = 0; try < 4; ++try) {
1565 		struct tsc_mpsync_info info;
1566 		uint64_t last;
1567 		int64_t xdelta;
1568 		int64_t delta;
1569 
1570 		bzero(&info, sizeof(info));
1571 
1572 		for (cpu = 0; cpu < ncpus; ++cpu) {
1573 			thread_t td;
1574 			lwkt_create(tsc_mpsync_ap_thread, &info, &td,
1575 				    NULL, TDF_NOSTART, cpu,
1576 				    "tsc mpsync %d", cpu);
1577 			lwkt_setpri_initial(td, curthread->td_pri);
1578 			lwkt_schedule(td);
1579 		}
1580 		while (info.tsc_ready_cnt != ncpus)
1581 			lwkt_force_switch();
1582 
1583 		/*
1584 		 * All threads are ready, start the test and wait for
1585 		 * completion.
1586 		 */
1587 		info.tsc_command = 1;
1588 		while (info.tsc_done_cnt != ncpus)
1589 			lwkt_force_switch();
1590 
1591 		/*
1592 		 * Process results
1593 		 */
1594 		last = info.tsc_saved[0].v;
1595 		delta = 0;
1596 		for (cpu = 0; cpu < ncpus; ++cpu) {
1597 			xdelta = (int64_t)(info.tsc_saved[cpu].v - last);
1598 			last = info.tsc_saved[cpu].v;
1599 			if (xdelta < 0)
1600 				xdelta = -xdelta;
1601 			delta += xdelta;
1602 
1603 		}
1604 
1605 		/*
1606 		 * Result from attempt.  If its too wild just stop now.
1607 		 * Also break out if we succeed, no need to try further.
1608 		 */
1609 		kprintf("TSC MPSYNC TEST %jd %d -> %jd (10uS=%jd)\n",
1610 			delta, ncpus, delta / ncpus,
1611 			tsc_frequency / 100000);
1612 		if (delta / ncpus > tsc_frequency / 100)
1613 			break;
1614 		if (delta / ncpus < tsc_frequency / 100000) {
1615 			tsc_mpsync = 1;
1616 			break;
1617 		}
1618 	}
1619 
1620 	if (tsc_mpsync)
1621 		kprintf("TSC is MP synchronized\n");
1622 	else
1623 		kprintf("TSC is not MP synchronized\n");
1624 }
1625 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL);
1626 
1627 SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254");
1628 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0,
1629 	    "frequency");
1630 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD,
1631 	    0, 0, hw_i8254_timestamp, "A", "");
1632 
1633 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD,
1634 	    &tsc_present, 0, "TSC Available");
1635 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD,
1636 	    &tsc_invariant, 0, "Invariant TSC");
1637 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD,
1638 	    &tsc_mpsync, 0, "TSC is synchronized across CPUs");
1639 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD,
1640 	    &tsc_frequency, 0, "TSC Frequency");
1641