xref: /dragonfly/sys/platform/pc64/isa/clock.c (revision 7d3e9a5b)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * Copyright (c) 2008-2021 The DragonFly Project.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * William Jolitz and Don Ahn.
7  *
8  * This code is derived from software contributed to The DragonFly Project
9  * by Matthew Dillon <dillon@backplane.com>
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
36  * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $
37  */
38 
39 /*
40  * Routines to handle clock hardware.
41  */
42 
43 /*
44  * inittodr, settodr and support routines written
45  * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at>
46  *
47  * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94
48  */
49 
50 #if 0
51 #include "opt_clock.h"
52 #endif
53 
54 #include <sys/param.h>
55 #include <sys/systm.h>
56 #include <sys/eventhandler.h>
57 #include <sys/time.h>
58 #include <sys/kernel.h>
59 #include <sys/bus.h>
60 #include <sys/sysctl.h>
61 #include <sys/cons.h>
62 #include <sys/kbio.h>
63 #include <sys/systimer.h>
64 #include <sys/globaldata.h>
65 #include <sys/machintr.h>
66 #include <sys/interrupt.h>
67 
68 #include <sys/thread2.h>
69 
70 #include <machine/clock.h>
71 #include <machine/cputypes.h>
72 #include <machine/frame.h>
73 #include <machine/ipl.h>
74 #include <machine/limits.h>
75 #include <machine/md_var.h>
76 #include <machine/psl.h>
77 #include <machine/segments.h>
78 #include <machine/smp.h>
79 #include <machine/specialreg.h>
80 #include <machine/intr_machdep.h>
81 
82 #include <machine_base/apic/ioapic.h>
83 #include <machine_base/apic/ioapic_abi.h>
84 #include <machine_base/icu/icu.h>
85 #include <bus/isa/isa.h>
86 #include <bus/isa/rtc.h>
87 #include <machine_base/isa/timerreg.h>
88 
89 SET_DECLARE(timecounter_init_set, const timecounter_init_t);
90 TIMECOUNTER_INIT(placeholder, NULL);
91 
92 static void i8254_restore(void);
93 static void resettodr_on_shutdown(void *arg __unused);
94 
95 /*
96  * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
97  * can use a simple formula for leap years.
98  */
99 #define	LEAPYEAR(y) ((u_int)(y) % 4 == 0)
100 #define DAYSPERYEAR   (31+28+31+30+31+30+31+31+30+31+30+31)
101 
102 #ifndef TIMER_FREQ
103 #define TIMER_FREQ   1193182
104 #endif
105 
106 static uint8_t i8254_walltimer_sel;
107 static uint16_t i8254_walltimer_cntr;
108 static int timer0_running;
109 
110 int	adjkerntz;		/* local offset from GMT in seconds */
111 int	disable_rtc_set;	/* disable resettodr() if != 0 */
112 int	tsc_present;
113 int	tsc_invariant;
114 int	tsc_mpsync;
115 int	wall_cmos_clock;	/* wall CMOS clock assumed if != 0 */
116 tsc_uclock_t tsc_frequency;
117 tsc_uclock_t tsc_oneus_approx;	/* always at least 1, approx only */
118 
119 enum tstate { RELEASED, ACQUIRED };
120 static enum tstate timer0_state;
121 static enum tstate timer1_state;
122 static enum tstate timer2_state;
123 
124 int	i8254_cputimer_disable;	/* No need to initialize i8254 cputimer. */
125 
126 static	int	beeping = 0;
127 static	const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
128 static	u_char	rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
129 static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
130 static  int	rtc_loaded;
131 
132 static	sysclock_t i8254_cputimer_div;
133 
134 static int i8254_nointr;
135 static int i8254_intr_disable = 1;
136 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable);
137 
138 static int calibrate_timers_with_rtc = 0;
139 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc);
140 
141 static int calibrate_tsc_fast = 1;
142 TUNABLE_INT("hw.calibrate_tsc_fast", &calibrate_tsc_fast);
143 
144 static int calibrate_test;
145 TUNABLE_INT("hw.tsc_calibrate_test", &calibrate_test);
146 
147 static struct callout sysbeepstop_ch;
148 
149 static sysclock_t i8254_cputimer_count(void);
150 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last);
151 static void i8254_cputimer_destruct(struct cputimer *cputimer);
152 
153 static struct cputimer	i8254_cputimer = {
154     .next		= SLIST_ENTRY_INITIALIZER,
155     .name		= "i8254",
156     .pri		= CPUTIMER_PRI_8254,
157     .type		= 0,	/* determined later */
158     .count		= i8254_cputimer_count,
159     .fromhz		= cputimer_default_fromhz,
160     .fromus		= cputimer_default_fromus,
161     .construct		= i8254_cputimer_construct,
162     .destruct		= i8254_cputimer_destruct,
163     .freq		= TIMER_FREQ
164 };
165 
166 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t);
167 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *);
168 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t);
169 
170 static struct cputimer_intr i8254_cputimer_intr = {
171     .freq = TIMER_FREQ,
172     .reload = i8254_intr_reload,
173     .enable = cputimer_intr_default_enable,
174     .config = i8254_intr_config,
175     .restart = cputimer_intr_default_restart,
176     .pmfixup = cputimer_intr_default_pmfixup,
177     .initclock = i8254_intr_initclock,
178     .pcpuhand = NULL,
179     .next = SLIST_ENTRY_INITIALIZER,
180     .name = "i8254",
181     .type = CPUTIMER_INTR_8254,
182     .prio = CPUTIMER_INTR_PRIO_8254,
183     .caps = CPUTIMER_INTR_CAP_PS,
184     .priv = NULL
185 };
186 
187 /*
188  * Use this to lwkt_switch() when the scheduler clock is not
189  * yet running, otherwise lwkt_switch() won't do anything.
190  * XXX needs cleaning up in lwkt_thread.c
191  */
192 static void
193 lwkt_force_switch(void)
194 {
195 	crit_enter();
196 	lwkt_schedulerclock(curthread);
197 	crit_exit();
198 	lwkt_switch();
199 }
200 
201 /*
202  * timer0 clock interrupt.  Timer0 is in one-shot mode and has stopped
203  * counting as of this interrupt.  We use timer1 in free-running mode (not
204  * generating any interrupts) as our main counter.  Each cpu has timeouts
205  * pending.
206  *
207  * This code is INTR_MPSAFE and may be called without the BGL held.
208  */
209 static void
210 clkintr(void *dummy, void *frame_arg)
211 {
212 	static sysclock_t sysclock_count;	/* NOTE! Must be static */
213 	struct globaldata *gd = mycpu;
214 	struct globaldata *gscan;
215 	int n;
216 
217 	/*
218 	 * SWSTROBE mode is a one-shot, the timer is no longer running
219 	 */
220 	timer0_running = 0;
221 
222 	/*
223 	 * XXX the dispatcher needs work.  right now we call systimer_intr()
224 	 * directly or via IPI for any cpu with systimers queued, which is
225 	 * usually *ALL* of them.  We need to use the LAPIC timer for this.
226 	 */
227 	sysclock_count = sys_cputimer->count();
228 	for (n = 0; n < ncpus; ++n) {
229 	    gscan = globaldata_find(n);
230 	    if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL)
231 		continue;
232 	    if (gscan != gd) {
233 		lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr,
234 				&sysclock_count, 1);
235 	    } else {
236 		systimer_intr(&sysclock_count, 0, frame_arg);
237 	    }
238 	}
239 }
240 
241 
242 /*
243  * NOTE! not MP safe.
244  */
245 int
246 acquire_timer2(int mode)
247 {
248 	if (timer2_state != RELEASED)
249 		return (-1);
250 	timer2_state = ACQUIRED;
251 
252 	/*
253 	 * This access to the timer registers is as atomic as possible
254 	 * because it is a single instruction.  We could do better if we
255 	 * knew the rate.
256 	 */
257 	outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f));
258 	return (0);
259 }
260 
261 int
262 release_timer2(void)
263 {
264 	if (timer2_state != ACQUIRED)
265 		return (-1);
266 	outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT);
267 	timer2_state = RELEASED;
268 	return (0);
269 }
270 
271 #include "opt_ddb.h"
272 #ifdef DDB
273 #include <ddb/ddb.h>
274 
275 DB_SHOW_COMMAND(rtc, rtc)
276 {
277 	kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n",
278 	       rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY),
279 	       rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC),
280 	       rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR));
281 }
282 #endif /* DDB */
283 
284 /*
285  * Return the current cpu timer count as a 32 bit integer.
286  */
287 static
288 sysclock_t
289 i8254_cputimer_count(void)
290 {
291 	static uint16_t cputimer_last;
292 	uint16_t count;
293 	sysclock_t ret;
294 
295 	clock_lock();
296 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH);
297 	count = (uint8_t)inb(i8254_walltimer_cntr);	/* get countdown */
298 	count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8);
299 	count = -count;					/* -> countup */
300 	if (count < cputimer_last)			/* rollover */
301 		i8254_cputimer.base += 0x00010000U;
302 	ret = i8254_cputimer.base | count;
303 	cputimer_last = count;
304 	clock_unlock();
305 
306 	return(ret);
307 }
308 
309 /*
310  * This function is called whenever the system timebase changes, allowing
311  * us to calculate what is needed to convert a system timebase tick
312  * into an 8254 tick for the interrupt timer.  If we can convert to a
313  * simple shift, multiplication, or division, we do so.  Otherwise 64
314  * bit arithmatic is required every time the interrupt timer is reloaded.
315  */
316 static void
317 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer)
318 {
319     sysclock_t freq;
320     sysclock_t div;
321 
322     /*
323      * Will a simple divide do the trick?
324      */
325     div = (timer->freq + (cti->freq / 2)) / cti->freq;
326     freq = cti->freq * div;
327 
328     if (freq >= timer->freq - 1 && freq <= timer->freq + 1)
329 	i8254_cputimer_div = div;
330     else
331 	i8254_cputimer_div = 0;
332 }
333 
334 /*
335  * Reload for the next timeout.  It is possible for the reload value
336  * to be 0 or negative, indicating that an immediate timer interrupt
337  * is desired.  For now make the minimum 2 ticks.
338  *
339  * We may have to convert from the system timebase to the 8254 timebase.
340  */
341 static void
342 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
343 {
344     uint16_t count;
345 
346     if ((ssysclock_t)reload < 0)
347 	    reload = 1;
348     if (i8254_cputimer_div)
349 	reload /= i8254_cputimer_div;
350     else
351 	reload = muldivu64(reload, cti->freq, sys_cputimer->freq);
352 
353     if (reload < 2)
354 	reload = 2;		/* minimum count */
355     if (reload > 0xFFFF)
356 	reload = 0xFFFF;	/* almost full count (0 is full count) */
357 
358     clock_lock();
359     if (timer0_running) {
360 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);	/* count-down timer */
361 	count = (uint8_t)inb(TIMER_CNTR0);		/* lsb */
362 	count |= ((uint8_t)inb(TIMER_CNTR0) << 8);	/* msb */
363 	if (reload < count) {
364 	    outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
365 	    outb(TIMER_CNTR0, (uint8_t)reload); 	/* lsb */
366 	    outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
367 	}
368     } else {
369 	timer0_running = 1;
370 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
371 	outb(TIMER_CNTR0, (uint8_t)reload); 		/* lsb */
372 	outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
373     }
374     clock_unlock();
375 }
376 
377 /*
378  * DELAY(usec)	     - Spin for the specified number of microseconds.
379  * DRIVERSLEEP(usec) - Spin for the specified number of microseconds,
380  *		       but do a thread switch in the loop
381  *
382  * Relies on timer 1 counting down from (cputimer_freq / hz)
383  * Note: timer had better have been programmed before this is first used!
384  */
385 static void
386 DODELAY(int n, int doswitch)
387 {
388 	ssysclock_t delta, ticks_left;
389 	sysclock_t prev_tick, tick;
390 
391 #ifdef DELAYDEBUG
392 	int getit_calls = 1;
393 	int n1;
394 	static int state = 0;
395 
396 	if (state == 0) {
397 		state = 1;
398 		for (n1 = 1; n1 <= 10000000; n1 *= 10)
399 			DELAY(n1);
400 		state = 2;
401 	}
402 	if (state == 1)
403 		kprintf("DELAY(%d)...", n);
404 #endif
405 	/*
406 	 * Guard against the timer being uninitialized if we are called
407 	 * early for console i/o.
408 	 */
409 	if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
410 		i8254_restore();
411 
412 	/*
413 	 * Read the counter first, so that the rest of the setup overhead is
414 	 * counted.  Then calculate the number of hardware timer ticks
415 	 * required, rounding up to be sure we delay at least the requested
416 	 * number of microseconds.
417 	 */
418 	prev_tick = sys_cputimer->count();
419 	ticks_left = muldivu64(n, sys_cputimer->freq + 999999, 1000000);
420 
421 	/*
422 	 * Loop until done.
423 	 */
424 	while (ticks_left > 0) {
425 		tick = sys_cputimer->count();
426 #ifdef DELAYDEBUG
427 		++getit_calls;
428 #endif
429 		delta = tick - prev_tick;
430 		prev_tick = tick;
431 		if (delta < 0)
432 			delta = 0;
433 		ticks_left -= delta;
434 		if (doswitch && ticks_left > 0)
435 			lwkt_switch();
436 		cpu_pause();
437 	}
438 #ifdef DELAYDEBUG
439 	if (state == 1)
440 		kprintf(" %d calls to getit() at %d usec each\n",
441 		       getit_calls, (n + 5) / getit_calls);
442 #endif
443 }
444 
445 /*
446  * DELAY() never switches.
447  */
448 void
449 DELAY(int n)
450 {
451 	DODELAY(n, 0);
452 }
453 
454 /*
455  * Returns non-zero if the specified time period has elapsed.  Call
456  * first with last_clock set to 0.
457  */
458 int
459 CHECKTIMEOUT(TOTALDELAY *tdd)
460 {
461 	sysclock_t delta;
462 	int us;
463 
464 	if (tdd->started == 0) {
465 		if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
466 			i8254_restore();
467 		tdd->last_clock = sys_cputimer->count();
468 		tdd->started = 1;
469 		return(0);
470 	}
471 	delta = sys_cputimer->count() - tdd->last_clock;
472 	us = muldivu64(delta, 1000000, sys_cputimer->freq);
473 	tdd->last_clock += muldivu64(us, sys_cputimer->freq, 1000000);
474 	tdd->us -= us;
475 
476 	return (tdd->us < 0);
477 }
478 
479 
480 /*
481  * DRIVERSLEEP() does not switch if called with a spinlock held or
482  * from a hard interrupt.
483  */
484 void
485 DRIVERSLEEP(int usec)
486 {
487 	globaldata_t gd = mycpu;
488 
489 	if (gd->gd_intr_nesting_level || gd->gd_spinlocks) {
490 		DODELAY(usec, 0);
491 	} else {
492 		DODELAY(usec, 1);
493 	}
494 }
495 
496 static void
497 sysbeepstop(void *chan)
498 {
499 	outb(IO_PPI, inb(IO_PPI)&0xFC);	/* disable counter2 output to speaker */
500 	beeping = 0;
501 	release_timer2();
502 }
503 
504 int
505 sysbeep(int pitch, int period)
506 {
507 	if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
508 		return(-1);
509 	if (sysbeep_enable == 0)
510 		return(-1);
511 	/*
512 	 * Nobody else is using timer2, we do not need the clock lock
513 	 */
514 	outb(TIMER_CNTR2, pitch);
515 	outb(TIMER_CNTR2, (pitch>>8));
516 	if (!beeping) {
517 		/* enable counter2 output to speaker */
518 		outb(IO_PPI, inb(IO_PPI) | 3);
519 		beeping = period;
520 		callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL);
521 	}
522 	return (0);
523 }
524 
525 /*
526  * RTC support routines
527  */
528 
529 int
530 rtcin(int reg)
531 {
532 	u_char val;
533 
534 	crit_enter();
535 	outb(IO_RTC, reg);
536 	inb(0x84);
537 	val = inb(IO_RTC + 1);
538 	inb(0x84);
539 	crit_exit();
540 	return (val);
541 }
542 
543 static __inline void
544 writertc(u_char reg, u_char val)
545 {
546 	crit_enter();
547 	inb(0x84);
548 	outb(IO_RTC, reg);
549 	inb(0x84);
550 	outb(IO_RTC + 1, val);
551 	inb(0x84);		/* XXX work around wrong order in rtcin() */
552 	crit_exit();
553 }
554 
555 static __inline int
556 readrtc(int port)
557 {
558 	return(bcd2bin(rtcin(port)));
559 }
560 
561 static u_int
562 calibrate_clocks(void)
563 {
564 	tsc_uclock_t old_tsc;
565 	sysclock_t tot_count;
566 	sysclock_t count, prev_count;
567 	int sec, start_sec, timeout;
568 
569 	if (bootverbose)
570 	        kprintf("Calibrating clock(s) ...\n");
571 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
572 		goto fail;
573 	timeout = 100000000;
574 
575 	/* Read the mc146818A seconds counter. */
576 	for (;;) {
577 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
578 			sec = rtcin(RTC_SEC);
579 			break;
580 		}
581 		if (--timeout == 0)
582 			goto fail;
583 	}
584 
585 	/* Wait for the mC146818A seconds counter to change. */
586 	start_sec = sec;
587 	for (;;) {
588 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
589 			sec = rtcin(RTC_SEC);
590 			if (sec != start_sec)
591 				break;
592 		}
593 		if (--timeout == 0)
594 			goto fail;
595 	}
596 
597 	/* Start keeping track of the i8254 counter. */
598 	prev_count = sys_cputimer->count();
599 	tot_count = 0;
600 
601 	if (tsc_present)
602 		old_tsc = rdtsc();
603 	else
604 		old_tsc = 0;		/* shut up gcc */
605 
606 	/*
607 	 * Wait for the mc146818A seconds counter to change.  Read the i8254
608 	 * counter for each iteration since this is convenient and only
609 	 * costs a few usec of inaccuracy. The timing of the final reads
610 	 * of the counters almost matches the timing of the initial reads,
611 	 * so the main cause of inaccuracy is the varying latency from
612 	 * inside getit() or rtcin(RTC_STATUSA) to the beginning of the
613 	 * rtcin(RTC_SEC) that returns a changed seconds count.  The
614 	 * maximum inaccuracy from this cause is < 10 usec on 486's.
615 	 */
616 	start_sec = sec;
617 	for (;;) {
618 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP))
619 			sec = rtcin(RTC_SEC);
620 		count = sys_cputimer->count();
621 		tot_count += (sysclock_t)(count - prev_count);
622 		prev_count = count;
623 		if (sec != start_sec)
624 			break;
625 		if (--timeout == 0)
626 			goto fail;
627 	}
628 
629 	/*
630 	 * Read the cpu cycle counter.  The timing considerations are
631 	 * similar to those for the i8254 clock.
632 	 */
633 	if (tsc_present) {
634 		tsc_frequency = rdtsc() - old_tsc;
635 		if (bootverbose) {
636 			kprintf("TSC clock: %jd Hz (Method A)\n",
637 			    (intmax_t)tsc_frequency);
638 		}
639 	}
640 	tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
641 
642 	kprintf("i8254 clock: %lu Hz\n", tot_count);
643 	return (tot_count);
644 
645 fail:
646 	kprintf("failed, using default i8254 clock of %lu Hz\n",
647 		i8254_cputimer.freq);
648 	return (i8254_cputimer.freq);
649 }
650 
651 static void
652 i8254_restore(void)
653 {
654 	timer0_state = ACQUIRED;
655 
656 	clock_lock();
657 
658 	/*
659 	 * Timer0 is our fine-grained variable clock interrupt
660 	 */
661 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
662 	outb(TIMER_CNTR0, 2);	/* lsb */
663 	outb(TIMER_CNTR0, 0);	/* msb */
664 	clock_unlock();
665 
666 	if (!i8254_nointr) {
667 		cputimer_intr_register(&i8254_cputimer_intr);
668 		cputimer_intr_select(&i8254_cputimer_intr, 0);
669 	}
670 
671 	/*
672 	 * Timer1 or timer2 is our free-running clock, but only if another
673 	 * has not been selected.
674 	 */
675 	cputimer_register(&i8254_cputimer);
676 	cputimer_select(&i8254_cputimer, 0);
677 }
678 
679 static void
680 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
681 {
682  	int which;
683 
684 	/*
685 	 * Should we use timer 1 or timer 2 ?
686 	 */
687 	which = 0;
688 	TUNABLE_INT_FETCH("hw.i8254.walltimer", &which);
689 	if (which != 1 && which != 2)
690 		which = 2;
691 
692 	switch(which) {
693 	case 1:
694 		timer->name = "i8254_timer1";
695 		timer->type = CPUTIMER_8254_SEL1;
696 		i8254_walltimer_sel = TIMER_SEL1;
697 		i8254_walltimer_cntr = TIMER_CNTR1;
698 		timer1_state = ACQUIRED;
699 		break;
700 	case 2:
701 		timer->name = "i8254_timer2";
702 		timer->type = CPUTIMER_8254_SEL2;
703 		i8254_walltimer_sel = TIMER_SEL2;
704 		i8254_walltimer_cntr = TIMER_CNTR2;
705 		timer2_state = ACQUIRED;
706 		break;
707 	}
708 
709 	timer->base = (oldclock + 0xFFFF) & 0xFFFFFFFFFFFF0000LU;
710 
711 	clock_lock();
712 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT);
713 	outb(i8254_walltimer_cntr, 0);	/* lsb */
714 	outb(i8254_walltimer_cntr, 0);	/* msb */
715 	outb(IO_PPI, inb(IO_PPI) | 1);	/* bit 0: enable gate, bit 1: spkr */
716 	clock_unlock();
717 }
718 
719 static void
720 i8254_cputimer_destruct(struct cputimer *timer)
721 {
722 	switch(timer->type) {
723 	case CPUTIMER_8254_SEL1:
724 	    timer1_state = RELEASED;
725 	    break;
726 	case CPUTIMER_8254_SEL2:
727 	    timer2_state = RELEASED;
728 	    break;
729 	default:
730 	    break;
731 	}
732 	timer->type = 0;
733 }
734 
735 static void
736 rtc_restore(void)
737 {
738 	/* Restore all of the RTC's "status" (actually, control) registers. */
739 	writertc(RTC_STATUSB, RTCSB_24HR);
740 	writertc(RTC_STATUSA, rtc_statusa);
741 	writertc(RTC_STATUSB, rtc_statusb);
742 }
743 
744 /*
745  * Restore all the timers.
746  *
747  * This function is called to resynchronize our core timekeeping after a
748  * long halt, e.g. from apm_default_resume() and friends.  It is also
749  * called if after a BIOS call we have detected munging of the 8254.
750  * It is necessary because cputimer_count() counter's delta may have grown
751  * too large for nanouptime() and friends to handle, or (in the case of 8254
752  * munging) might cause the SYSTIMER code to prematurely trigger.
753  */
754 void
755 timer_restore(void)
756 {
757 	crit_enter();
758 	if (i8254_cputimer_disable == 0)
759 		i8254_restore();	/* restore timer_freq and hz */
760 	rtc_restore();			/* reenable RTC interrupts */
761 	crit_exit();
762 }
763 
764 #define MAX_MEASURE_RETRIES	100
765 
766 static u_int64_t
767 do_measure(u_int64_t timer_latency, u_int64_t *latency, sysclock_t *time,
768     int *retries)
769 {
770 	u_int64_t tsc1, tsc2;
771 	u_int64_t threshold;
772 	sysclock_t val;
773 	int cnt = 0;
774 
775 	do {
776 		if (cnt > MAX_MEASURE_RETRIES/2)
777 			threshold = timer_latency << 1;
778 		else
779 			threshold = timer_latency + (timer_latency >> 2);
780 
781 		cnt++;
782 		tsc1 = rdtsc_ordered();
783 		val = sys_cputimer->count();
784 		tsc2 = rdtsc_ordered();
785 	} while (timer_latency > 0 && cnt < MAX_MEASURE_RETRIES &&
786 	    tsc2 - tsc1 > threshold);
787 
788 	*retries = cnt - 1;
789 	*latency = tsc2 - tsc1;
790 	*time = val;
791 	return tsc1;
792 }
793 
794 static u_int64_t
795 do_calibrate_cputimer(u_int usecs, u_int64_t timer_latency)
796 {
797 	if (calibrate_tsc_fast) {
798 		u_int64_t old_tsc1, start_lat1, new_tsc1, end_lat1;
799 		u_int64_t old_tsc2, start_lat2, new_tsc2, end_lat2;
800 		u_int64_t freq1, freq2;
801 		sysclock_t start1, end1, start2, end2;
802 		int retries1, retries2, retries3, retries4;
803 
804 		DELAY(1000);
805 		old_tsc1 = do_measure(timer_latency, &start_lat1, &start1,
806 		    &retries1);
807 		DELAY(20000);
808 		old_tsc2 = do_measure(timer_latency, &start_lat2, &start2,
809 		    &retries2);
810 		DELAY(usecs);
811 		new_tsc1 = do_measure(timer_latency, &end_lat1, &end1,
812 		    &retries3);
813 		DELAY(20000);
814 		new_tsc2 = do_measure(timer_latency, &end_lat2, &end2,
815 		    &retries4);
816 
817 		old_tsc1 += start_lat1;
818 		old_tsc2 += start_lat2;
819 		freq1 = (new_tsc1 - old_tsc1) + (start_lat1 + end_lat1) / 2;
820 		freq2 = (new_tsc2 - old_tsc2) + (start_lat2 + end_lat2) / 2;
821 		end1 -= start1;
822 		end2 -= start2;
823 		/* This should in practice be safe from overflows. */
824 		freq1 = muldivu64(freq1, sys_cputimer->freq, end1);
825 		freq2 = muldivu64(freq2, sys_cputimer->freq, end2);
826 		if (calibrate_test && (retries1 > 0 || retries2 > 0)) {
827 			kprintf("%s: retries: %d, %d, %d, %d\n",
828 			    __func__, retries1, retries2, retries3, retries4);
829 		}
830 		if (calibrate_test) {
831 			kprintf("%s: freq1=%ju freq2=%ju avg=%ju\n",
832 			    __func__, freq1, freq2, (freq1 + freq2) / 2);
833 		}
834 		return (freq1 + freq2) / 2;
835 	} else {
836 		u_int64_t old_tsc, new_tsc;
837 		u_int64_t freq;
838 
839 		old_tsc = rdtsc_ordered();
840 		DELAY(usecs);
841 		new_tsc = rdtsc();
842 		freq = new_tsc - old_tsc;
843 		/* This should in practice be safe from overflows. */
844 		freq = (freq * 1000 * 1000) / usecs;
845 		return freq;
846 	}
847 }
848 
849 /*
850  * Initialize 8254 timer 0 early so that it can be used in DELAY().
851  */
852 void
853 startrtclock(void)
854 {
855 	const timecounter_init_t **list;
856 	sysclock_t delta, freq;
857 
858 	callout_init_mp(&sysbeepstop_ch);
859 
860 	/*
861 	 * Can we use the TSC?
862 	 *
863 	 * NOTE: If running under qemu, probably a good idea to force the
864 	 *	 TSC because we are not likely to detect it as being
865 	 *	 invariant or mpsyncd if you don't.  This will greatly
866 	 *	 reduce SMP contention.
867 	 */
868 	if (cpu_feature & CPUID_TSC) {
869 		tsc_present = 1;
870 		TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant);
871 
872 		if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
873 		     cpu_vendor_id == CPU_VENDOR_AMD) &&
874 		    cpu_exthigh >= 0x80000007) {
875 			u_int regs[4];
876 
877 			do_cpuid(0x80000007, regs);
878 			if (regs[3] & 0x100)
879 				tsc_invariant = 1;
880 		}
881 	} else {
882 		tsc_present = 0;
883 	}
884 
885 	/*
886 	 * Initial RTC state, don't do anything unexpected
887 	 */
888 	writertc(RTC_STATUSA, rtc_statusa);
889 	writertc(RTC_STATUSB, RTCSB_24HR);
890 
891 	SET_FOREACH(list, timecounter_init_set) {
892 		if ((*list)->configure != NULL)
893 			(*list)->configure();
894 	}
895 
896 	/*
897 	 * If tsc_frequency is already initialized now, and a flag is set
898 	 * that i8254 timer is unneeded, we are done.
899 	 */
900 	if (tsc_frequency != 0 && i8254_cputimer_disable != 0)
901 		goto done;
902 
903 	/*
904 	 * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to
905 	 * generate an interrupt, which we will ignore for now.
906 	 *
907 	 * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000
908 	 * (so it counts a full 2^16 and repeats).  We will use this timer
909 	 * for our counting.
910 	 */
911 	if (i8254_cputimer_disable == 0)
912 		i8254_restore();
913 
914 	kprintf("Using cputimer %s for TSC calibration\n", sys_cputimer->name);
915 
916 	/*
917 	 * When booting without verbose messages, it's pointless to run the
918 	 * calibrate_clocks() calibration code, when we don't use the
919 	 * results in any way. With bootverbose, we are at least printing
920 	 *  this information to the kernel log.
921 	 */
922 	if (i8254_cputimer_disable != 0 ||
923 	    (calibrate_timers_with_rtc == 0 && !bootverbose)) {
924 		goto skip_rtc_based;
925 	}
926 
927 	freq = calibrate_clocks();
928 #ifdef CLK_CALIBRATION_LOOP
929 	if (bootverbose) {
930 		int c;
931 
932 		cnpoll(TRUE);
933 		kprintf("Press a key on the console to "
934 			"abort clock calibration\n");
935 		while ((c = cncheckc()) == -1 || c == NOKEY)
936 			calibrate_clocks();
937 		cnpoll(FALSE);
938 	}
939 #endif
940 
941 	/*
942 	 * Use the calibrated i8254 frequency if it seems reasonable.
943 	 * Otherwise use the default, and don't use the calibrated i586
944 	 * frequency.
945 	 */
946 	delta = freq > i8254_cputimer.freq ?
947 		freq - i8254_cputimer.freq : i8254_cputimer.freq - freq;
948 	if (delta < i8254_cputimer.freq / 100) {
949 		if (calibrate_timers_with_rtc == 0) {
950 			kprintf(
951 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n");
952 			freq = i8254_cputimer.freq;
953 		}
954 		/*
955 		 * NOTE:
956 		 * Interrupt timer's freq must be adjusted
957 		 * before we change the cuptimer's frequency.
958 		 */
959 		i8254_cputimer_intr.freq = freq;
960 		cputimer_set_frequency(&i8254_cputimer, freq);
961 	} else {
962 		if (bootverbose)
963 			kprintf("%lu Hz differs from default of %lu Hz "
964 				"by more than 1%%\n",
965 			        freq, i8254_cputimer.freq);
966 		tsc_frequency = 0;
967 	}
968 
969 	if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) {
970 		kprintf("hw.calibrate_timers_with_rtc not "
971 			"set - using old calibration method\n");
972 		tsc_frequency = 0;
973 	}
974 
975 skip_rtc_based:
976 	if (tsc_present && tsc_frequency == 0) {
977 		u_int cnt;
978 		u_int64_t cputime_latency_tsc = 0, max = 0, min = 0;
979 		int i;
980 
981 		for (i = 0; i < 10; i++) {
982 			/* Warm up */
983 			(void)sys_cputimer->count();
984 		}
985 		for (i = 0; i < 100; i++) {
986 			u_int64_t old_tsc, new_tsc;
987 
988 			old_tsc = rdtsc_ordered();
989 			(void)sys_cputimer->count();
990 			new_tsc = rdtsc_ordered();
991 			cputime_latency_tsc += (new_tsc - old_tsc);
992 			if (max < (new_tsc - old_tsc))
993 				max = new_tsc - old_tsc;
994 			if (min == 0 || min > (new_tsc - old_tsc))
995 				min = new_tsc - old_tsc;
996 		}
997 		cputime_latency_tsc /= 100;
998 		kprintf(
999 		    "Timer latency (in TSC ticks): %lu min=%lu max=%lu\n",
1000 		    cputime_latency_tsc, min, max);
1001 		/* XXX Instead of this, properly filter out outliers. */
1002 		cputime_latency_tsc = min;
1003 
1004 		if (calibrate_test > 0) {
1005 			u_int64_t values[20], avg = 0;
1006 			for (i = 1; i <= 20; i++) {
1007 				u_int64_t freq;
1008 
1009 				freq = do_calibrate_cputimer(i * 100 * 1000,
1010 				    cputime_latency_tsc);
1011 				values[i - 1] = freq;
1012 			}
1013 			/* Compute an average TSC for the 1s to 2s delays. */
1014 			for (i = 10; i < 20; i++)
1015 				avg += values[i];
1016 			avg /= 10;
1017 			for (i = 0; i < 20; i++) {
1018 				kprintf("%ums: %lu (Diff from average: %ld)\n",
1019 				    (i + 1) * 100, values[i],
1020 				    (int64_t)(values[i] - avg));
1021 			}
1022 		}
1023 
1024 		if (calibrate_tsc_fast > 0) {
1025 			/* HPET would typically be >10MHz */
1026 			if (sys_cputimer->freq >= 10000000)
1027 				cnt = 200000;
1028 			else
1029 				cnt = 500000;
1030 		} else {
1031 			cnt = 1000000;
1032 		}
1033 
1034 		tsc_frequency = do_calibrate_cputimer(cnt, cputime_latency_tsc);
1035 		if (bootverbose && calibrate_timers_with_rtc) {
1036 			kprintf("TSC clock: %jd Hz (Method B)\n",
1037 			    (intmax_t)tsc_frequency);
1038 		}
1039 	}
1040 
1041 done:
1042 	if (tsc_present) {
1043 		kprintf("TSC%s clock: %jd Hz\n",
1044 		    tsc_invariant ? " invariant" : "",
1045 		    (intmax_t)tsc_frequency);
1046 	}
1047 	tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
1048 
1049 	EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown,
1050 			      NULL, SHUTDOWN_PRI_LAST);
1051 }
1052 
1053 /*
1054  * Sync the time of day back to the RTC on shutdown, but only if
1055  * we have already loaded it and have not crashed.
1056  */
1057 static void
1058 resettodr_on_shutdown(void *arg __unused)
1059 {
1060  	if (rtc_loaded && panicstr == NULL) {
1061 		resettodr();
1062 	}
1063 }
1064 
1065 /*
1066  * Initialize the time of day register, based on the time base which is, e.g.
1067  * from a filesystem.
1068  */
1069 void
1070 inittodr(time_t base)
1071 {
1072 	unsigned long	sec, days;
1073 	int		year, month;
1074 	int		y, m;
1075 	struct timespec ts;
1076 
1077 	if (base) {
1078 		ts.tv_sec = base;
1079 		ts.tv_nsec = 0;
1080 		set_timeofday(&ts);
1081 	}
1082 
1083 	/* Look if we have a RTC present and the time is valid */
1084 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
1085 		goto wrong_time;
1086 
1087 	/* wait for time update to complete */
1088 	/* If RTCSA_TUP is zero, we have at least 244us before next update */
1089 	crit_enter();
1090 	while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
1091 		crit_exit();
1092 		crit_enter();
1093 	}
1094 
1095 	days = 0;
1096 #ifdef USE_RTC_CENTURY
1097 	year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
1098 #else
1099 	year = readrtc(RTC_YEAR) + 1900;
1100 	if (year < 1970)
1101 		year += 100;
1102 #endif
1103 	if (year < 1970) {
1104 		crit_exit();
1105 		goto wrong_time;
1106 	}
1107 	month = readrtc(RTC_MONTH);
1108 	for (m = 1; m < month; m++)
1109 		days += daysinmonth[m-1];
1110 	if ((month > 2) && LEAPYEAR(year))
1111 		days ++;
1112 	days += readrtc(RTC_DAY) - 1;
1113 	for (y = 1970; y < year; y++)
1114 		days += DAYSPERYEAR + LEAPYEAR(y);
1115 	sec = ((( days * 24 +
1116 		  readrtc(RTC_HRS)) * 60 +
1117 		  readrtc(RTC_MIN)) * 60 +
1118 		  readrtc(RTC_SEC));
1119 	/* sec now contains the number of seconds, since Jan 1 1970,
1120 	   in the local time zone */
1121 
1122 	sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1123 
1124 	y = (int)(time_second - sec);
1125 	if (y <= -2 || y >= 2) {
1126 		/* badly off, adjust it */
1127 		ts.tv_sec = sec;
1128 		ts.tv_nsec = 0;
1129 		set_timeofday(&ts);
1130 	}
1131 	rtc_loaded = 1;
1132 	crit_exit();
1133 	return;
1134 
1135 wrong_time:
1136 	kprintf("Invalid time in real time clock.\n");
1137 	kprintf("Check and reset the date immediately!\n");
1138 }
1139 
1140 /*
1141  * Write system time back to RTC
1142  */
1143 void
1144 resettodr(void)
1145 {
1146 	struct timeval tv;
1147 	unsigned long tm;
1148 	int m;
1149 	int y;
1150 
1151 	if (disable_rtc_set)
1152 		return;
1153 
1154 	microtime(&tv);
1155 	tm = tv.tv_sec;
1156 
1157 	crit_enter();
1158 	/* Disable RTC updates and interrupts. */
1159 	writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
1160 
1161 	/* Calculate local time to put in RTC */
1162 
1163 	tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1164 
1165 	writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60;	/* Write back Seconds */
1166 	writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60;	/* Write back Minutes */
1167 	writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24;	/* Write back Hours   */
1168 
1169 	/* We have now the days since 01-01-1970 in tm */
1170 	writertc(RTC_WDAY, (tm+4)%7);			/* Write back Weekday */
1171 	for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
1172 	     tm >= m;
1173 	     y++,      m = DAYSPERYEAR + LEAPYEAR(y))
1174 	     tm -= m;
1175 
1176 	/* Now we have the years in y and the day-of-the-year in tm */
1177 	writertc(RTC_YEAR, bin2bcd(y%100));		/* Write back Year    */
1178 #ifdef USE_RTC_CENTURY
1179 	writertc(RTC_CENTURY, bin2bcd(y/100));		/* ... and Century    */
1180 #endif
1181 	for (m = 0; ; m++) {
1182 		int ml;
1183 
1184 		ml = daysinmonth[m];
1185 		if (m == 1 && LEAPYEAR(y))
1186 			ml++;
1187 		if (tm < ml)
1188 			break;
1189 		tm -= ml;
1190 	}
1191 
1192 	writertc(RTC_MONTH, bin2bcd(m + 1));            /* Write back Month   */
1193 	writertc(RTC_DAY, bin2bcd(tm + 1));             /* Write back Month Day */
1194 
1195 	/* Reenable RTC updates and interrupts. */
1196 	writertc(RTC_STATUSB, rtc_statusb);
1197 	crit_exit();
1198 }
1199 
1200 static int
1201 i8254_ioapic_trial(int irq, struct cputimer_intr *cti)
1202 {
1203 	sysclock_t base;
1204 	long lastcnt;
1205 
1206 	/*
1207 	 * Following code assumes the 8254 is the cpu timer,
1208 	 * so make sure it is.
1209 	 */
1210 	/*KKASSERT(sys_cputimer == &i8254_cputimer); (tested by CuteLarva) */
1211 	KKASSERT(cti == &i8254_cputimer_intr);
1212 
1213 	lastcnt = get_interrupt_counter(irq, mycpuid);
1214 
1215 	/*
1216 	 * Force an 8254 Timer0 interrupt and wait 1/100s for
1217 	 * it to happen, then see if we got it.
1218 	 */
1219 	kprintf("IOAPIC: testing 8254 interrupt delivery...");
1220 
1221 	i8254_intr_reload(cti, sys_cputimer->fromus(2));
1222 	base = sys_cputimer->count();
1223 	while (sys_cputimer->count() - base < sys_cputimer->freq / 100)
1224 		; /* nothing */
1225 
1226 	if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0) {
1227 		kprintf(" failed\n");
1228 		return ENOENT;
1229 	} else {
1230 		kprintf(" success\n");
1231 	}
1232 	return 0;
1233 }
1234 
1235 /*
1236  * Start both clocks running.  DragonFly note: the stat clock is no longer
1237  * used.  Instead, 8254 based systimers are used for all major clock
1238  * interrupts.
1239  */
1240 static void
1241 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected)
1242 {
1243 	void *clkdesc = NULL;
1244 	int irq = 0, mixed_mode = 0, error;
1245 
1246 	KKASSERT(mycpuid == 0);
1247 
1248 	if (!selected && i8254_intr_disable)
1249 		goto nointr;
1250 
1251 	/*
1252 	 * The stat interrupt mask is different without the
1253 	 * statistics clock.  Also, don't set the interrupt
1254 	 * flag which would normally cause the RTC to generate
1255 	 * interrupts.
1256 	 */
1257 	rtc_statusb = RTCSB_24HR;
1258 
1259 	/* Finish initializing 8254 timer 0. */
1260 	if (ioapic_enable) {
1261 		irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE,
1262 			INTR_POLARITY_HIGH);
1263 		if (irq < 0) {
1264 mixed_mode_setup:
1265 			error = ioapic_conf_legacy_extint(0);
1266 			if (!error) {
1267 				irq = machintr_legacy_intr_find(0,
1268 				    INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
1269 				if (irq < 0)
1270 					error = ENOENT;
1271 			}
1272 
1273 			if (error) {
1274 				if (!selected) {
1275 					kprintf("IOAPIC: setup mixed mode for "
1276 						"irq 0 failed: %d\n", error);
1277 					goto nointr;
1278 				} else {
1279 					panic("IOAPIC: setup mixed mode for "
1280 					      "irq 0 failed: %d\n", error);
1281 				}
1282 			}
1283 			mixed_mode = 1;
1284 		}
1285 		clkdesc = register_int(irq, clkintr, NULL, "clk",
1286 				       NULL,
1287 				       INTR_EXCL | INTR_CLOCK |
1288 				       INTR_NOPOLL | INTR_MPSAFE |
1289 				       INTR_NOENTROPY, 0);
1290 	} else {
1291 		register_int(0, clkintr, NULL, "clk", NULL,
1292 			     INTR_EXCL | INTR_CLOCK |
1293 			     INTR_NOPOLL | INTR_MPSAFE |
1294 			     INTR_NOENTROPY, 0);
1295 	}
1296 
1297 	/* Initialize RTC. */
1298 	writertc(RTC_STATUSA, rtc_statusa);
1299 	writertc(RTC_STATUSB, RTCSB_24HR);
1300 
1301 	if (ioapic_enable) {
1302 		error = i8254_ioapic_trial(irq, cti);
1303 		if (error) {
1304 			if (mixed_mode) {
1305 				if (!selected) {
1306 					kprintf("IOAPIC: mixed mode for irq %d "
1307 						"trial failed: %d\n",
1308 						irq, error);
1309 					goto nointr;
1310 				} else {
1311 					panic("IOAPIC: mixed mode for irq %d "
1312 					      "trial failed: %d\n", irq, error);
1313 				}
1314 			} else {
1315 				kprintf("IOAPIC: warning 8254 is not connected "
1316 					"to the correct pin, try mixed mode\n");
1317 				unregister_int(clkdesc, 0);
1318 				goto mixed_mode_setup;
1319 			}
1320 		}
1321 	}
1322 	return;
1323 
1324 nointr:
1325 	i8254_nointr = 1; /* don't try to register again */
1326 	cputimer_intr_deregister(cti);
1327 }
1328 
1329 void
1330 setstatclockrate(int newhz)
1331 {
1332 	if (newhz == RTC_PROFRATE)
1333 		rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF;
1334 	else
1335 		rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
1336 	writertc(RTC_STATUSA, rtc_statusa);
1337 }
1338 
1339 #if 0
1340 static unsigned
1341 tsc_get_timecount(struct timecounter *tc)
1342 {
1343 	return (rdtsc());
1344 }
1345 #endif
1346 
1347 #ifdef KERN_TIMESTAMP
1348 #define KERN_TIMESTAMP_SIZE 16384
1349 static u_long tsc[KERN_TIMESTAMP_SIZE] ;
1350 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc,
1351 	sizeof(tsc), "LU", "Kernel timestamps");
1352 void
1353 _TSTMP(u_int32_t x)
1354 {
1355 	static int i;
1356 
1357 	tsc[i] = (u_int32_t)rdtsc();
1358 	tsc[i+1] = x;
1359 	i = i + 2;
1360 	if (i >= KERN_TIMESTAMP_SIZE)
1361 		i = 0;
1362 	tsc[i] = 0; /* mark last entry */
1363 }
1364 #endif /* KERN_TIMESTAMP */
1365 
1366 /*
1367  *
1368  */
1369 
1370 static int
1371 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS)
1372 {
1373     sysclock_t count;
1374     uint64_t tscval;
1375     char buf[32];
1376 
1377     crit_enter();
1378     if (sys_cputimer == &i8254_cputimer)
1379 	count = sys_cputimer->count();
1380     else
1381 	count = 0;
1382     if (tsc_present)
1383 	tscval = rdtsc();
1384     else
1385 	tscval = 0;
1386     crit_exit();
1387     ksnprintf(buf, sizeof(buf), "%016lx %016lx", count, tscval);
1388     return(SYSCTL_OUT(req, buf, strlen(buf) + 1));
1389 }
1390 
1391 struct tsc_mpsync_info {
1392 	volatile int		tsc_ready_cnt;
1393 	volatile int		tsc_done_cnt;
1394 	volatile int		tsc_command;
1395 	volatile int		unused01[5];
1396 	struct {
1397 		uint64_t	v;
1398 		uint64_t	unused02;
1399 	} tsc_saved[MAXCPU];
1400 } __cachealign;
1401 
1402 #if 0
1403 static void
1404 tsc_mpsync_test_loop(struct tsc_mpsync_thr *info)
1405 {
1406 	struct globaldata *gd = mycpu;
1407 	tsc_uclock_t test_end, test_begin;
1408 	u_int i;
1409 
1410 	if (bootverbose) {
1411 		kprintf("cpu%d: TSC testing MP synchronization ...\n",
1412 		    gd->gd_cpuid);
1413 	}
1414 
1415 	test_begin = rdtsc_ordered();
1416 	/* Run test for 100ms */
1417 	test_end = test_begin + (tsc_frequency / 10);
1418 
1419 	arg->tsc_mpsync = 1;
1420 	arg->tsc_target = test_begin;
1421 
1422 #define TSC_TEST_TRYMAX		1000000	/* Make sure we could stop */
1423 #define TSC_TEST_TRYMIN		50000
1424 
1425 	for (i = 0; i < TSC_TEST_TRYMAX; ++i) {
1426 		struct lwkt_cpusync cs;
1427 
1428 		crit_enter();
1429 		lwkt_cpusync_init(&cs, gd->gd_other_cpus,
1430 		    tsc_mpsync_test_remote, arg);
1431 		lwkt_cpusync_interlock(&cs);
1432 		cpu_pause();
1433 		arg->tsc_target = rdtsc_ordered();
1434 		cpu_mfence();
1435 		lwkt_cpusync_deinterlock(&cs);
1436 		crit_exit();
1437 		cpu_pause();
1438 
1439 		if (!arg->tsc_mpsync) {
1440 			kprintf("cpu%d: TSC is not MP synchronized @%u\n",
1441 			    gd->gd_cpuid, i);
1442 			break;
1443 		}
1444 		if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN)
1445 			break;
1446 	}
1447 
1448 #undef TSC_TEST_TRYMIN
1449 #undef TSC_TEST_TRYMAX
1450 
1451 	if (arg->tsc_target == test_begin) {
1452 		kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid);
1453 		/* XXX disable TSC? */
1454 		tsc_invariant = 0;
1455 		arg->tsc_mpsync = 0;
1456 		return;
1457 	}
1458 
1459 	if (arg->tsc_mpsync && bootverbose) {
1460 		kprintf("cpu%d: TSC is MP synchronized after %u tries\n",
1461 		    gd->gd_cpuid, i);
1462 	}
1463 }
1464 
1465 #endif
1466 
1467 #define TSC_TEST_COUNT		50000
1468 
1469 static void
1470 tsc_mpsync_ap_thread(void *xinfo)
1471 {
1472 	struct tsc_mpsync_info *info = xinfo;
1473 	int cpu = mycpuid;
1474 	int i;
1475 
1476 	/*
1477 	 * Tell main loop that we are ready and wait for initiation
1478 	 */
1479 	atomic_add_int(&info->tsc_ready_cnt, 1);
1480 	while (info->tsc_command == 0) {
1481 		lwkt_force_switch();
1482 	}
1483 
1484 	/*
1485 	 * Run test for 10000 loops or until tsc_done_cnt != 0 (another
1486 	 * cpu has finished its test), then increment done.
1487 	 */
1488 	crit_enter();
1489 	for (i = 0; i < TSC_TEST_COUNT && info->tsc_done_cnt == 0; ++i) {
1490 		info->tsc_saved[cpu].v = rdtsc_ordered();
1491 	}
1492 	crit_exit();
1493 	atomic_add_int(&info->tsc_done_cnt, 1);
1494 
1495 	lwkt_exit();
1496 }
1497 
1498 static void
1499 tsc_mpsync_test(void)
1500 {
1501 	enum { TSCOK, TSCNEG, TSCSPAN } error = TSCOK;
1502 	int cpu;
1503 	int try;
1504 
1505 	if (!tsc_invariant) {
1506 		/* Not even invariant TSC */
1507 		kprintf("TSC is not invariant, "
1508 			"no further tests will be performed\n");
1509 		return;
1510 	}
1511 
1512 	if (ncpus == 1) {
1513 		/* Only one CPU */
1514 		tsc_mpsync = 1;
1515 		return;
1516 	}
1517 
1518 	/*
1519 	 * Forcing can be used w/qemu to reduce contention
1520 	 */
1521 	TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync);
1522 
1523 	if (tsc_mpsync == 0) {
1524 		switch (cpu_vendor_id) {
1525 		case CPU_VENDOR_INTEL:
1526 			/*
1527 			 * Intel probably works
1528 			 */
1529 			break;
1530 
1531 		case CPU_VENDOR_AMD:
1532 			/*
1533 			 * For AMD 15h and 16h (i.e. The Bulldozer and Jaguar
1534 			 * architectures) we have to watch out for
1535 			 * Erratum 778:
1536 			 *     "Processor Core Time Stamp Counters May
1537 			 *      Experience Drift"
1538 			 * This Erratum is only listed for cpus in Family
1539 			 * 15h < Model 30h and for 16h < Model 30h.
1540 			 *
1541 			 * AMD < Bulldozer probably doesn't work
1542 			 */
1543 			if (CPUID_TO_FAMILY(cpu_id) == 0x15 ||
1544 			    CPUID_TO_FAMILY(cpu_id) == 0x16) {
1545 				if (CPUID_TO_MODEL(cpu_id) < 0x30)
1546 					return;
1547 			} else if (CPUID_TO_FAMILY(cpu_id) < 0x17) {
1548 				return;
1549 			}
1550 			break;
1551 
1552 		default:
1553 			/* probably won't work */
1554 			return;
1555 		}
1556 	} else if (tsc_mpsync < 0) {
1557 		kprintf("TSC MP synchronization test is disabled\n");
1558 		tsc_mpsync = 0;
1559 		return;
1560 	}
1561 
1562 	/*
1563 	 * Test even if forced to 1 above.  If forced, we will use the TSC
1564 	 * even if the test fails.  (set forced to -1 to disable entirely).
1565 	 */
1566 	kprintf("TSC testing MP synchronization ...\n");
1567 	kprintf("TSC testing MP: NOTE! CPU pwrsave will inflate latencies!\n");
1568 
1569 	/*
1570 	 * Test that the TSC is monotonically increasing across CPU
1571 	 * switches.  Otherwise time will get really messed up if the
1572 	 * TSC is selected as the timebase.
1573 	 *
1574 	 * Test 4 times
1575 	 */
1576 	for (try = 0; tsc_frequency && try < 4; ++try) {
1577 		tsc_uclock_t last;
1578 		tsc_uclock_t next;
1579 		tsc_sclock_t delta;
1580 		tsc_sclock_t lo_delta = 0x7FFFFFFFFFFFFFFFLL;
1581 		tsc_sclock_t hi_delta = -0x7FFFFFFFFFFFFFFFLL;
1582 
1583 		last = rdtsc();
1584 		for (cpu = 0; cpu < ncpus; ++cpu) {
1585 			lwkt_migratecpu(cpu);
1586 			next = rdtsc();
1587 			if (cpu == 0) {
1588 				last = next;
1589 				continue;
1590 			}
1591 
1592 			delta = next - last;
1593 			if (delta < 0) {
1594 				kprintf("TSC cpu-delta NEGATIVE: "
1595 					"cpu %d to %d (%ld)\n",
1596 					cpu - 1, cpu, delta);
1597 				error = TSCNEG;
1598 			}
1599 			if (lo_delta > delta)
1600 				lo_delta = delta;
1601 			if (hi_delta < delta)
1602 				hi_delta = delta;
1603 			last = next;
1604 		}
1605 		last = rdtsc();
1606 		for (cpu = ncpus - 2; cpu >= 0; --cpu) {
1607 			lwkt_migratecpu(cpu);
1608 			next = rdtsc();
1609 			delta = next - last;
1610 			if (delta <= 0) {
1611 				kprintf("TSC cpu-delta WAS NEGATIVE! "
1612 					"cpu %d to %d (%ld)\n",
1613 					cpu + 1, cpu, delta);
1614 				error = TSCNEG;
1615 			}
1616 			if (lo_delta > delta)
1617 				lo_delta = delta;
1618 			if (hi_delta < delta)
1619 				hi_delta = delta;
1620 			last = next;
1621 		}
1622 		kprintf("TSC cpu-delta test complete, %ldnS to %ldnS ",
1623 			muldivu64(lo_delta, 1000000000, tsc_frequency),
1624 			muldivu64(hi_delta, 1000000000, tsc_frequency));
1625 		if (error != TSCOK) {
1626 			kprintf("FAILURE\n");
1627 			break;
1628 		}
1629 		kprintf("SUCCESS\n");
1630 	}
1631 
1632 	/*
1633 	 * Test TSC MP synchronization on APs.
1634 	 *
1635 	 * Test 4 times.
1636 	 */
1637 	for (try = 0; tsc_frequency && try < 4; ++try) {
1638 		struct tsc_mpsync_info info;
1639 		uint64_t last;
1640 		int64_t xworst;
1641 		int64_t xdelta;
1642 		int64_t delta;
1643 
1644 		bzero(&info, sizeof(info));
1645 
1646 		for (cpu = 0; cpu < ncpus; ++cpu) {
1647 			thread_t td;
1648 			lwkt_create(tsc_mpsync_ap_thread, &info, &td,
1649 				    NULL, TDF_NOSTART, cpu,
1650 				    "tsc mpsync %d", cpu);
1651 			lwkt_setpri_initial(td, curthread->td_pri);
1652 			lwkt_schedule(td);
1653 		}
1654 		while (info.tsc_ready_cnt != ncpus)
1655 			lwkt_force_switch();
1656 
1657 		/*
1658 		 * All threads are ready, start the test and wait for
1659 		 * completion.
1660 		 */
1661 		info.tsc_command = 1;
1662 		while (info.tsc_done_cnt != ncpus)
1663 			lwkt_force_switch();
1664 
1665 		/*
1666 		 * Process results
1667 		 */
1668 		last = info.tsc_saved[0].v;
1669 		delta = 0;
1670 		xworst = 0;
1671 		for (cpu = 0; cpu < ncpus; ++cpu) {
1672 			xdelta = (int64_t)(info.tsc_saved[cpu].v - last);
1673 			last = info.tsc_saved[cpu].v;
1674 			if (xdelta < 0)
1675 				xdelta = -xdelta;
1676 			if (xworst < xdelta)
1677 				xworst = xdelta;
1678 			delta += xdelta;
1679 
1680 		}
1681 
1682 		/*
1683 		 * Result from attempt.  Break-out if we succeeds, otherwise
1684 		 * try again (up to 4 times).  This might be in a VM so we
1685 		 * need to be robust.
1686 		 */
1687 		kprintf("TSC cpu concurrency test complete, worst=%ldns, "
1688 			"avg=%ldns ",
1689 			muldivu64(xworst, 1000000000, tsc_frequency),
1690 			muldivu64(delta / ncpus, 1000000000, tsc_frequency));
1691 		if (delta / ncpus > tsc_frequency / 100) {
1692 			kprintf("FAILURE\n");
1693 		}
1694 		if (delta / ncpus < tsc_frequency / 100000) {
1695 			kprintf("SUCCESS\n");
1696 			if (error == TSCOK)
1697 				tsc_mpsync = 1;
1698 			break;
1699 		}
1700 		kprintf("INDETERMINATE\n");
1701 	}
1702 
1703 	if (tsc_mpsync)
1704 		kprintf("TSC is MP synchronized\n");
1705 	else
1706 		kprintf("TSC is not MP synchronized\n");
1707 }
1708 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL);
1709 
1710 static SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254");
1711 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0,
1712 	    "frequency");
1713 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD,
1714 	    0, 0, hw_i8254_timestamp, "A", "");
1715 
1716 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD,
1717 	    &tsc_present, 0, "TSC Available");
1718 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD,
1719 	    &tsc_invariant, 0, "Invariant TSC");
1720 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD,
1721 	    &tsc_mpsync, 0, "TSC is synchronized across CPUs");
1722 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD,
1723 	    &tsc_frequency, 0, "TSC Frequency");
1724