xref: /dragonfly/sys/platform/pc64/isa/clock.c (revision 75a74ed8)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * Copyright (c) 2008 The DragonFly Project.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * William Jolitz and Don Ahn.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
34  * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $
35  */
36 
37 /*
38  * Routines to handle clock hardware.
39  */
40 
41 /*
42  * inittodr, settodr and support routines written
43  * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at>
44  *
45  * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94
46  */
47 
48 #if 0
49 #include "opt_clock.h"
50 #endif
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/eventhandler.h>
55 #include <sys/time.h>
56 #include <sys/kernel.h>
57 #include <sys/bus.h>
58 #include <sys/sysctl.h>
59 #include <sys/cons.h>
60 #include <sys/kbio.h>
61 #include <sys/systimer.h>
62 #include <sys/globaldata.h>
63 #include <sys/machintr.h>
64 #include <sys/interrupt.h>
65 
66 #include <sys/thread2.h>
67 
68 #include <machine/clock.h>
69 #include <machine/cputypes.h>
70 #include <machine/frame.h>
71 #include <machine/ipl.h>
72 #include <machine/limits.h>
73 #include <machine/md_var.h>
74 #include <machine/psl.h>
75 #include <machine/segments.h>
76 #include <machine/smp.h>
77 #include <machine/specialreg.h>
78 #include <machine/intr_machdep.h>
79 
80 #include <machine_base/apic/ioapic.h>
81 #include <machine_base/apic/ioapic_abi.h>
82 #include <machine_base/icu/icu.h>
83 #include <bus/isa/isa.h>
84 #include <bus/isa/rtc.h>
85 #include <machine_base/isa/timerreg.h>
86 
87 SET_DECLARE(timecounter_init_set, const timecounter_init_t);
88 TIMECOUNTER_INIT(placeholder, NULL);
89 
90 static void i8254_restore(void);
91 static void resettodr_on_shutdown(void *arg __unused);
92 
93 /*
94  * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
95  * can use a simple formula for leap years.
96  */
97 #define	LEAPYEAR(y) ((u_int)(y) % 4 == 0)
98 #define DAYSPERYEAR   (31+28+31+30+31+30+31+31+30+31+30+31)
99 
100 #ifndef TIMER_FREQ
101 #define TIMER_FREQ   1193182
102 #endif
103 
104 static uint8_t i8254_walltimer_sel;
105 static uint16_t i8254_walltimer_cntr;
106 
107 int	adjkerntz;		/* local offset from GMT in seconds */
108 int	disable_rtc_set;	/* disable resettodr() if != 0 */
109 int	tsc_present;
110 int	tsc_invariant;
111 int	tsc_mpsync;
112 int	wall_cmos_clock;	/* wall CMOS clock assumed if != 0 */
113 int	timer0_running;
114 tsc_uclock_t tsc_frequency;
115 tsc_uclock_t tsc_oneus_approx;	/* always at least 1, approx only */
116 
117 enum tstate { RELEASED, ACQUIRED };
118 enum tstate timer0_state;
119 enum tstate timer1_state;
120 enum tstate timer2_state;
121 
122 int	i8254_cputimer_disable;	/* No need to initialize i8254 cputimer. */
123 
124 static	int	beeping = 0;
125 static	const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
126 static	u_char	rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
127 static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
128 static  int	rtc_loaded;
129 
130 static int i8254_cputimer_div;
131 
132 static int i8254_nointr;
133 static int i8254_intr_disable = 1;
134 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable);
135 
136 static int calibrate_timers_with_rtc = 0;
137 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc);
138 
139 static int calibrate_tsc_fast = 1;
140 TUNABLE_INT("hw.calibrate_tsc_fast", &calibrate_tsc_fast);
141 
142 static int calibrate_test;
143 TUNABLE_INT("hw.tsc_calibrate_test", &calibrate_test);
144 
145 static struct callout sysbeepstop_ch;
146 
147 static sysclock_t i8254_cputimer_count(void);
148 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last);
149 static void i8254_cputimer_destruct(struct cputimer *cputimer);
150 
151 static struct cputimer	i8254_cputimer = {
152     .next		= SLIST_ENTRY_INITIALIZER,
153     .name		= "i8254",
154     .pri		= CPUTIMER_PRI_8254,
155     .type		= 0,	/* determined later */
156     .count		= i8254_cputimer_count,
157     .fromhz		= cputimer_default_fromhz,
158     .fromus		= cputimer_default_fromus,
159     .construct		= i8254_cputimer_construct,
160     .destruct		= i8254_cputimer_destruct,
161     .freq		= TIMER_FREQ
162 };
163 
164 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t);
165 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *);
166 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t);
167 
168 static struct cputimer_intr i8254_cputimer_intr = {
169     .freq = TIMER_FREQ,
170     .reload = i8254_intr_reload,
171     .enable = cputimer_intr_default_enable,
172     .config = i8254_intr_config,
173     .restart = cputimer_intr_default_restart,
174     .pmfixup = cputimer_intr_default_pmfixup,
175     .initclock = i8254_intr_initclock,
176     .pcpuhand = NULL,
177     .next = SLIST_ENTRY_INITIALIZER,
178     .name = "i8254",
179     .type = CPUTIMER_INTR_8254,
180     .prio = CPUTIMER_INTR_PRIO_8254,
181     .caps = CPUTIMER_INTR_CAP_PS,
182     .priv = NULL
183 };
184 
185 /*
186  * Use this to lwkt_switch() when the scheduler clock is not
187  * yet running, otherwise lwkt_switch() won't do anything.
188  * XXX needs cleaning up in lwkt_thread.c
189  */
190 static void
191 lwkt_force_switch(void)
192 {
193 	crit_enter();
194 	lwkt_schedulerclock(curthread);
195 	crit_exit();
196 	lwkt_switch();
197 }
198 
199 /*
200  * timer0 clock interrupt.  Timer0 is in one-shot mode and has stopped
201  * counting as of this interrupt.  We use timer1 in free-running mode (not
202  * generating any interrupts) as our main counter.  Each cpu has timeouts
203  * pending.
204  *
205  * This code is INTR_MPSAFE and may be called without the BGL held.
206  */
207 static void
208 clkintr(void *dummy, void *frame_arg)
209 {
210 	static sysclock_t sysclock_count;	/* NOTE! Must be static */
211 	struct globaldata *gd = mycpu;
212 	struct globaldata *gscan;
213 	int n;
214 
215 	/*
216 	 * SWSTROBE mode is a one-shot, the timer is no longer running
217 	 */
218 	timer0_running = 0;
219 
220 	/*
221 	 * XXX the dispatcher needs work.  right now we call systimer_intr()
222 	 * directly or via IPI for any cpu with systimers queued, which is
223 	 * usually *ALL* of them.  We need to use the LAPIC timer for this.
224 	 */
225 	sysclock_count = sys_cputimer->count();
226 	for (n = 0; n < ncpus; ++n) {
227 	    gscan = globaldata_find(n);
228 	    if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL)
229 		continue;
230 	    if (gscan != gd) {
231 		lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr,
232 				&sysclock_count, 1);
233 	    } else {
234 		systimer_intr(&sysclock_count, 0, frame_arg);
235 	    }
236 	}
237 }
238 
239 
240 /*
241  * NOTE! not MP safe.
242  */
243 int
244 acquire_timer2(int mode)
245 {
246 	if (timer2_state != RELEASED)
247 		return (-1);
248 	timer2_state = ACQUIRED;
249 
250 	/*
251 	 * This access to the timer registers is as atomic as possible
252 	 * because it is a single instruction.  We could do better if we
253 	 * knew the rate.
254 	 */
255 	outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f));
256 	return (0);
257 }
258 
259 int
260 release_timer2(void)
261 {
262 	if (timer2_state != ACQUIRED)
263 		return (-1);
264 	outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT);
265 	timer2_state = RELEASED;
266 	return (0);
267 }
268 
269 #include "opt_ddb.h"
270 #ifdef DDB
271 #include <ddb/ddb.h>
272 
273 DB_SHOW_COMMAND(rtc, rtc)
274 {
275 	kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n",
276 	       rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY),
277 	       rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC),
278 	       rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR));
279 }
280 #endif /* DDB */
281 
282 /*
283  * Return the current cpu timer count as a 32 bit integer.
284  */
285 static
286 sysclock_t
287 i8254_cputimer_count(void)
288 {
289 	static uint16_t cputimer_last;
290 	uint16_t count;
291 	sysclock_t ret;
292 
293 	clock_lock();
294 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH);
295 	count = (uint8_t)inb(i8254_walltimer_cntr);		/* get countdown */
296 	count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8);
297 	count = -count;					/* -> countup */
298 	if (count < cputimer_last)			/* rollover */
299 		i8254_cputimer.base += 0x00010000;
300 	ret = i8254_cputimer.base | count;
301 	cputimer_last = count;
302 	clock_unlock();
303 	return(ret);
304 }
305 
306 /*
307  * This function is called whenever the system timebase changes, allowing
308  * us to calculate what is needed to convert a system timebase tick
309  * into an 8254 tick for the interrupt timer.  If we can convert to a
310  * simple shift, multiplication, or division, we do so.  Otherwise 64
311  * bit arithmatic is required every time the interrupt timer is reloaded.
312  */
313 static void
314 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer)
315 {
316     int freq;
317     int div;
318 
319     /*
320      * Will a simple divide do the trick?
321      */
322     div = (timer->freq + (cti->freq / 2)) / cti->freq;
323     freq = cti->freq * div;
324 
325     if (freq >= timer->freq - 1 && freq <= timer->freq + 1)
326 	i8254_cputimer_div = div;
327     else
328 	i8254_cputimer_div = 0;
329 }
330 
331 /*
332  * Reload for the next timeout.  It is possible for the reload value
333  * to be 0 or negative, indicating that an immediate timer interrupt
334  * is desired.  For now make the minimum 2 ticks.
335  *
336  * We may have to convert from the system timebase to the 8254 timebase.
337  */
338 static void
339 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload)
340 {
341     uint16_t count;
342 
343     if (i8254_cputimer_div)
344 	reload /= i8254_cputimer_div;
345     else
346 	reload = (int64_t)reload * cti->freq / sys_cputimer->freq;
347 
348     if ((int)reload < 2)
349 	reload = 2;
350 
351     clock_lock();
352     if (timer0_running) {
353 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);	/* count-down timer */
354 	count = (uint8_t)inb(TIMER_CNTR0);		/* lsb */
355 	count |= ((uint8_t)inb(TIMER_CNTR0) << 8);	/* msb */
356 	if (reload < count) {
357 	    outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
358 	    outb(TIMER_CNTR0, (uint8_t)reload); 	/* lsb */
359 	    outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
360 	}
361     } else {
362 	timer0_running = 1;
363 	if (reload > 0xFFFF)
364 	    reload = 0;		/* full count */
365 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
366 	outb(TIMER_CNTR0, (uint8_t)reload); 		/* lsb */
367 	outb(TIMER_CNTR0, (uint8_t)(reload >> 8));	/* msb */
368     }
369     clock_unlock();
370 }
371 
372 /*
373  * DELAY(usec)	     - Spin for the specified number of microseconds.
374  * DRIVERSLEEP(usec) - Spin for the specified number of microseconds,
375  *		       but do a thread switch in the loop
376  *
377  * Relies on timer 1 counting down from (cputimer_freq / hz)
378  * Note: timer had better have been programmed before this is first used!
379  */
380 static void
381 DODELAY(int n, int doswitch)
382 {
383 	ssysclock_t delta, ticks_left;
384 	sysclock_t prev_tick, tick;
385 
386 #ifdef DELAYDEBUG
387 	int getit_calls = 1;
388 	int n1;
389 	static int state = 0;
390 
391 	if (state == 0) {
392 		state = 1;
393 		for (n1 = 1; n1 <= 10000000; n1 *= 10)
394 			DELAY(n1);
395 		state = 2;
396 	}
397 	if (state == 1)
398 		kprintf("DELAY(%d)...", n);
399 #endif
400 	/*
401 	 * Guard against the timer being uninitialized if we are called
402 	 * early for console i/o.
403 	 */
404 	if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
405 		i8254_restore();
406 
407 	/*
408 	 * Read the counter first, so that the rest of the setup overhead is
409 	 * counted.  Then calculate the number of hardware timer ticks
410 	 * required, rounding up to be sure we delay at least the requested
411 	 * number of microseconds.
412 	 */
413 	prev_tick = sys_cputimer->count();
414 	ticks_left = ((u_int)n * (int64_t)sys_cputimer->freq + 999999) /
415 		     1000000;
416 
417 	/*
418 	 * Loop until done.
419 	 */
420 	while (ticks_left > 0) {
421 		tick = sys_cputimer->count();
422 #ifdef DELAYDEBUG
423 		++getit_calls;
424 #endif
425 		delta = tick - prev_tick;
426 		prev_tick = tick;
427 		if (delta < 0)
428 			delta = 0;
429 		ticks_left -= delta;
430 		if (doswitch && ticks_left > 0)
431 			lwkt_switch();
432 		cpu_pause();
433 	}
434 #ifdef DELAYDEBUG
435 	if (state == 1)
436 		kprintf(" %d calls to getit() at %d usec each\n",
437 		       getit_calls, (n + 5) / getit_calls);
438 #endif
439 }
440 
441 /*
442  * DELAY() never switches.
443  */
444 void
445 DELAY(int n)
446 {
447 	DODELAY(n, 0);
448 }
449 
450 /*
451  * Returns non-zero if the specified time period has elapsed.  Call
452  * first with last_clock set to 0.
453  */
454 int
455 CHECKTIMEOUT(TOTALDELAY *tdd)
456 {
457 	sysclock_t delta;
458 	int us;
459 
460 	if (tdd->started == 0) {
461 		if (timer0_state == RELEASED && i8254_cputimer_disable == 0)
462 			i8254_restore();
463 		tdd->last_clock = sys_cputimer->count();
464 		tdd->started = 1;
465 		return(0);
466 	}
467 	delta = sys_cputimer->count() - tdd->last_clock;
468 	us = (u_int64_t)delta * (u_int64_t)1000000 /
469 	     (u_int64_t)sys_cputimer->freq;
470 	tdd->last_clock += (u_int64_t)us * (u_int64_t)sys_cputimer->freq /
471 			   1000000;
472 	tdd->us -= us;
473 	return (tdd->us < 0);
474 }
475 
476 
477 /*
478  * DRIVERSLEEP() does not switch if called with a spinlock held or
479  * from a hard interrupt.
480  */
481 void
482 DRIVERSLEEP(int usec)
483 {
484 	globaldata_t gd = mycpu;
485 
486 	if (gd->gd_intr_nesting_level || gd->gd_spinlocks) {
487 		DODELAY(usec, 0);
488 	} else {
489 		DODELAY(usec, 1);
490 	}
491 }
492 
493 static void
494 sysbeepstop(void *chan)
495 {
496 	outb(IO_PPI, inb(IO_PPI)&0xFC);	/* disable counter2 output to speaker */
497 	beeping = 0;
498 	release_timer2();
499 }
500 
501 int
502 sysbeep(int pitch, int period)
503 {
504 	if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
505 		return(-1);
506 	if (sysbeep_enable == 0)
507 		return(-1);
508 	/*
509 	 * Nobody else is using timer2, we do not need the clock lock
510 	 */
511 	outb(TIMER_CNTR2, pitch);
512 	outb(TIMER_CNTR2, (pitch>>8));
513 	if (!beeping) {
514 		/* enable counter2 output to speaker */
515 		outb(IO_PPI, inb(IO_PPI) | 3);
516 		beeping = period;
517 		callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL);
518 	}
519 	return (0);
520 }
521 
522 /*
523  * RTC support routines
524  */
525 
526 int
527 rtcin(int reg)
528 {
529 	u_char val;
530 
531 	crit_enter();
532 	outb(IO_RTC, reg);
533 	inb(0x84);
534 	val = inb(IO_RTC + 1);
535 	inb(0x84);
536 	crit_exit();
537 	return (val);
538 }
539 
540 static __inline void
541 writertc(u_char reg, u_char val)
542 {
543 	crit_enter();
544 	inb(0x84);
545 	outb(IO_RTC, reg);
546 	inb(0x84);
547 	outb(IO_RTC + 1, val);
548 	inb(0x84);		/* XXX work around wrong order in rtcin() */
549 	crit_exit();
550 }
551 
552 static __inline int
553 readrtc(int port)
554 {
555 	return(bcd2bin(rtcin(port)));
556 }
557 
558 static u_int
559 calibrate_clocks(void)
560 {
561 	tsc_uclock_t old_tsc;
562 	u_int tot_count;
563 	sysclock_t count, prev_count;
564 	int sec, start_sec, timeout;
565 
566 	if (bootverbose)
567 	        kprintf("Calibrating clock(s) ...\n");
568 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
569 		goto fail;
570 	timeout = 100000000;
571 
572 	/* Read the mc146818A seconds counter. */
573 	for (;;) {
574 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
575 			sec = rtcin(RTC_SEC);
576 			break;
577 		}
578 		if (--timeout == 0)
579 			goto fail;
580 	}
581 
582 	/* Wait for the mC146818A seconds counter to change. */
583 	start_sec = sec;
584 	for (;;) {
585 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) {
586 			sec = rtcin(RTC_SEC);
587 			if (sec != start_sec)
588 				break;
589 		}
590 		if (--timeout == 0)
591 			goto fail;
592 	}
593 
594 	/* Start keeping track of the i8254 counter. */
595 	prev_count = sys_cputimer->count();
596 	tot_count = 0;
597 
598 	if (tsc_present)
599 		old_tsc = rdtsc();
600 	else
601 		old_tsc = 0;		/* shut up gcc */
602 
603 	/*
604 	 * Wait for the mc146818A seconds counter to change.  Read the i8254
605 	 * counter for each iteration since this is convenient and only
606 	 * costs a few usec of inaccuracy. The timing of the final reads
607 	 * of the counters almost matches the timing of the initial reads,
608 	 * so the main cause of inaccuracy is the varying latency from
609 	 * inside getit() or rtcin(RTC_STATUSA) to the beginning of the
610 	 * rtcin(RTC_SEC) that returns a changed seconds count.  The
611 	 * maximum inaccuracy from this cause is < 10 usec on 486's.
612 	 */
613 	start_sec = sec;
614 	for (;;) {
615 		if (!(rtcin(RTC_STATUSA) & RTCSA_TUP))
616 			sec = rtcin(RTC_SEC);
617 		count = sys_cputimer->count();
618 		tot_count += (int)(count - prev_count);
619 		prev_count = count;
620 		if (sec != start_sec)
621 			break;
622 		if (--timeout == 0)
623 			goto fail;
624 	}
625 
626 	/*
627 	 * Read the cpu cycle counter.  The timing considerations are
628 	 * similar to those for the i8254 clock.
629 	 */
630 	if (tsc_present) {
631 		tsc_frequency = rdtsc() - old_tsc;
632 		if (bootverbose) {
633 			kprintf("TSC clock: %jd Hz (Method A)\n",
634 			    (intmax_t)tsc_frequency);
635 		}
636 	}
637 	tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
638 
639 	kprintf("i8254 clock: %u Hz\n", tot_count);
640 	return (tot_count);
641 
642 fail:
643 	kprintf("failed, using default i8254 clock of %u Hz\n",
644 		i8254_cputimer.freq);
645 	return (i8254_cputimer.freq);
646 }
647 
648 static void
649 i8254_restore(void)
650 {
651 	timer0_state = ACQUIRED;
652 
653 	clock_lock();
654 
655 	/*
656 	 * Timer0 is our fine-grained variable clock interrupt
657 	 */
658 	outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT);
659 	outb(TIMER_CNTR0, 2);	/* lsb */
660 	outb(TIMER_CNTR0, 0);	/* msb */
661 	clock_unlock();
662 
663 	if (!i8254_nointr) {
664 		cputimer_intr_register(&i8254_cputimer_intr);
665 		cputimer_intr_select(&i8254_cputimer_intr, 0);
666 	}
667 
668 	/*
669 	 * Timer1 or timer2 is our free-running clock, but only if another
670 	 * has not been selected.
671 	 */
672 	cputimer_register(&i8254_cputimer);
673 	cputimer_select(&i8254_cputimer, 0);
674 }
675 
676 static void
677 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock)
678 {
679  	int which;
680 
681 	/*
682 	 * Should we use timer 1 or timer 2 ?
683 	 */
684 	which = 0;
685 	TUNABLE_INT_FETCH("hw.i8254.walltimer", &which);
686 	if (which != 1 && which != 2)
687 		which = 2;
688 
689 	switch(which) {
690 	case 1:
691 		timer->name = "i8254_timer1";
692 		timer->type = CPUTIMER_8254_SEL1;
693 		i8254_walltimer_sel = TIMER_SEL1;
694 		i8254_walltimer_cntr = TIMER_CNTR1;
695 		timer1_state = ACQUIRED;
696 		break;
697 	case 2:
698 		timer->name = "i8254_timer2";
699 		timer->type = CPUTIMER_8254_SEL2;
700 		i8254_walltimer_sel = TIMER_SEL2;
701 		i8254_walltimer_cntr = TIMER_CNTR2;
702 		timer2_state = ACQUIRED;
703 		break;
704 	}
705 
706 	timer->base = (oldclock + 0xFFFF) & ~0xFFFF;
707 
708 	clock_lock();
709 	outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT);
710 	outb(i8254_walltimer_cntr, 0);	/* lsb */
711 	outb(i8254_walltimer_cntr, 0);	/* msb */
712 	outb(IO_PPI, inb(IO_PPI) | 1);	/* bit 0: enable gate, bit 1: spkr */
713 	clock_unlock();
714 }
715 
716 static void
717 i8254_cputimer_destruct(struct cputimer *timer)
718 {
719 	switch(timer->type) {
720 	case CPUTIMER_8254_SEL1:
721 	    timer1_state = RELEASED;
722 	    break;
723 	case CPUTIMER_8254_SEL2:
724 	    timer2_state = RELEASED;
725 	    break;
726 	default:
727 	    break;
728 	}
729 	timer->type = 0;
730 }
731 
732 static void
733 rtc_restore(void)
734 {
735 	/* Restore all of the RTC's "status" (actually, control) registers. */
736 	writertc(RTC_STATUSB, RTCSB_24HR);
737 	writertc(RTC_STATUSA, rtc_statusa);
738 	writertc(RTC_STATUSB, rtc_statusb);
739 }
740 
741 /*
742  * Restore all the timers.
743  *
744  * This function is called to resynchronize our core timekeeping after a
745  * long halt, e.g. from apm_default_resume() and friends.  It is also
746  * called if after a BIOS call we have detected munging of the 8254.
747  * It is necessary because cputimer_count() counter's delta may have grown
748  * too large for nanouptime() and friends to handle, or (in the case of 8254
749  * munging) might cause the SYSTIMER code to prematurely trigger.
750  */
751 void
752 timer_restore(void)
753 {
754 	crit_enter();
755 	if (i8254_cputimer_disable == 0)
756 		i8254_restore();	/* restore timer_freq and hz */
757 	rtc_restore();			/* reenable RTC interrupts */
758 	crit_exit();
759 }
760 
761 #define MAX_MEASURE_RETRIES	100
762 
763 static u_int64_t
764 do_measure(u_int64_t timer_latency, u_int64_t *latency, sysclock_t *time,
765     int *retries)
766 {
767 	u_int64_t tsc1, tsc2;
768 	u_int64_t threshold;
769 	sysclock_t val;
770 	int cnt = 0;
771 
772 	do {
773 		if (cnt > MAX_MEASURE_RETRIES/2)
774 			threshold = timer_latency << 1;
775 		else
776 			threshold = timer_latency + (timer_latency >> 2);
777 
778 		cnt++;
779 		tsc1 = rdtsc_ordered();
780 		val = sys_cputimer->count();
781 		tsc2 = rdtsc_ordered();
782 	} while (timer_latency > 0 && cnt < MAX_MEASURE_RETRIES &&
783 	    tsc2 - tsc1 > threshold);
784 
785 	*retries = cnt - 1;
786 	*latency = tsc2 - tsc1;
787 	*time = val;
788 	return tsc1;
789 }
790 
791 static u_int64_t
792 do_calibrate_cputimer(u_int usecs, u_int64_t timer_latency)
793 {
794 	if (calibrate_tsc_fast) {
795 		u_int64_t old_tsc1, start_lat1, new_tsc1, end_lat1;
796 		u_int64_t old_tsc2, start_lat2, new_tsc2, end_lat2;
797 		u_int64_t freq1, freq2;
798 		sysclock_t start1, end1, start2, end2;
799 		int retries1, retries2, retries3, retries4;
800 
801 		DELAY(1000);
802 		old_tsc1 = do_measure(timer_latency, &start_lat1, &start1,
803 		    &retries1);
804 		DELAY(20000);
805 		old_tsc2 = do_measure(timer_latency, &start_lat2, &start2,
806 		    &retries2);
807 		DELAY(usecs);
808 		new_tsc1 = do_measure(timer_latency, &end_lat1, &end1,
809 		    &retries3);
810 		DELAY(20000);
811 		new_tsc2 = do_measure(timer_latency, &end_lat2, &end2,
812 		    &retries4);
813 
814 		old_tsc1 += start_lat1;
815 		old_tsc2 += start_lat2;
816 		freq1 = (new_tsc1 - old_tsc1) + (start_lat1 + end_lat1) / 2;
817 		freq2 = (new_tsc2 - old_tsc2) + (start_lat2 + end_lat2) / 2;
818 		end1 -= start1;
819 		end2 -= start2;
820 		/* This should in practice be safe from overflows. */
821 		freq1 = (freq1 * sys_cputimer->freq) / end1;
822 		freq2 = (freq2 * sys_cputimer->freq) / end2;
823 		if (calibrate_test && (retries1 > 0 || retries2 > 0)) {
824 			kprintf("%s: retries: %d, %d, %d, %d\n",
825 			    __func__, retries1, retries2, retries3, retries4);
826 		}
827 		if (calibrate_test) {
828 			kprintf("%s: freq1=%ju freq2=%ju avg=%ju\n",
829 			    __func__, freq1, freq2, (freq1 + freq2) / 2);
830 		}
831 		return (freq1 + freq2) / 2;
832 	} else {
833 		u_int64_t old_tsc, new_tsc;
834 		u_int64_t freq;
835 
836 		old_tsc = rdtsc_ordered();
837 		DELAY(usecs);
838 		new_tsc = rdtsc();
839 		freq = new_tsc - old_tsc;
840 		/* This should in practice be safe from overflows. */
841 		freq = (freq * 1000 * 1000) / usecs;
842 		return freq;
843 	}
844 }
845 
846 /*
847  * Initialize 8254 timer 0 early so that it can be used in DELAY().
848  */
849 void
850 startrtclock(void)
851 {
852 	const timecounter_init_t **list;
853 	u_int delta, freq;
854 
855 	callout_init_mp(&sysbeepstop_ch);
856 
857 	/*
858 	 * Can we use the TSC?
859 	 *
860 	 * NOTE: If running under qemu, probably a good idea to force the
861 	 *	 TSC because we are not likely to detect it as being
862 	 *	 invariant or mpsyncd if you don't.  This will greatly
863 	 *	 reduce SMP contention.
864 	 */
865 	if (cpu_feature & CPUID_TSC) {
866 		tsc_present = 1;
867 		TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant);
868 
869 		if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
870 		     cpu_vendor_id == CPU_VENDOR_AMD) &&
871 		    cpu_exthigh >= 0x80000007) {
872 			u_int regs[4];
873 
874 			do_cpuid(0x80000007, regs);
875 			if (regs[3] & 0x100)
876 				tsc_invariant = 1;
877 		}
878 	} else {
879 		tsc_present = 0;
880 	}
881 
882 	/*
883 	 * Initial RTC state, don't do anything unexpected
884 	 */
885 	writertc(RTC_STATUSA, rtc_statusa);
886 	writertc(RTC_STATUSB, RTCSB_24HR);
887 
888 	SET_FOREACH(list, timecounter_init_set) {
889 		if ((*list)->configure != NULL)
890 			(*list)->configure();
891 	}
892 
893 	/*
894 	 * If tsc_frequency is already initialized now, and a flag is set
895 	 * that i8254 timer is unneeded, we are done.
896 	 */
897 	if (tsc_frequency != 0 && i8254_cputimer_disable != 0)
898 		goto done;
899 
900 	/*
901 	 * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to
902 	 * generate an interrupt, which we will ignore for now.
903 	 *
904 	 * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000
905 	 * (so it counts a full 2^16 and repeats).  We will use this timer
906 	 * for our counting.
907 	 */
908 	if (i8254_cputimer_disable == 0)
909 		i8254_restore();
910 
911 	kprintf("Using cputimer %s for TSC calibration\n", sys_cputimer->name);
912 
913 	/*
914 	 * When booting without verbose messages, it's pointless to run the
915 	 * calibrate_clocks() calibration code, when we don't use the
916 	 * results in any way. With bootverbose, we are at least printing
917 	 *  this information to the kernel log.
918 	 */
919 	if (i8254_cputimer_disable != 0 ||
920 	    (calibrate_timers_with_rtc == 0 && !bootverbose)) {
921 		goto skip_rtc_based;
922 	}
923 
924 	freq = calibrate_clocks();
925 #ifdef CLK_CALIBRATION_LOOP
926 	if (bootverbose) {
927 		int c;
928 
929 		cnpoll(TRUE);
930 		kprintf("Press a key on the console to "
931 			"abort clock calibration\n");
932 		while ((c = cncheckc()) == -1 || c == NOKEY)
933 			calibrate_clocks();
934 		cnpoll(FALSE);
935 	}
936 #endif
937 
938 	/*
939 	 * Use the calibrated i8254 frequency if it seems reasonable.
940 	 * Otherwise use the default, and don't use the calibrated i586
941 	 * frequency.
942 	 */
943 	delta = freq > i8254_cputimer.freq ?
944 			freq - i8254_cputimer.freq : i8254_cputimer.freq - freq;
945 	if (delta < i8254_cputimer.freq / 100) {
946 		if (calibrate_timers_with_rtc == 0) {
947 			kprintf(
948 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n");
949 			freq = i8254_cputimer.freq;
950 		}
951 		/*
952 		 * NOTE:
953 		 * Interrupt timer's freq must be adjusted
954 		 * before we change the cuptimer's frequency.
955 		 */
956 		i8254_cputimer_intr.freq = freq;
957 		cputimer_set_frequency(&i8254_cputimer, freq);
958 	} else {
959 		if (bootverbose)
960 			kprintf("%d Hz differs from default of %d Hz "
961 				"by more than 1%%\n",
962 			        freq, i8254_cputimer.freq);
963 		tsc_frequency = 0;
964 	}
965 
966 	if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) {
967 		kprintf("hw.calibrate_timers_with_rtc not "
968 			"set - using old calibration method\n");
969 		tsc_frequency = 0;
970 	}
971 
972 skip_rtc_based:
973 	if (tsc_present && tsc_frequency == 0) {
974 		u_int cnt;
975 		u_int64_t cputime_latency_tsc = 0, max = 0, min = 0;
976 		int i;
977 
978 		for (i = 0; i < 10; i++) {
979 			/* Warm up */
980 			(void)sys_cputimer->count();
981 		}
982 		for (i = 0; i < 100; i++) {
983 			u_int64_t old_tsc, new_tsc;
984 
985 			old_tsc = rdtsc_ordered();
986 			(void)sys_cputimer->count();
987 			new_tsc = rdtsc_ordered();
988 			cputime_latency_tsc += (new_tsc - old_tsc);
989 			if (max < (new_tsc - old_tsc))
990 				max = new_tsc - old_tsc;
991 			if (min == 0 || min > (new_tsc - old_tsc))
992 				min = new_tsc - old_tsc;
993 		}
994 		cputime_latency_tsc /= 100;
995 		kprintf(
996 		    "Timer latency (in TSC ticks): %lu min=%lu max=%lu\n",
997 		    cputime_latency_tsc, min, max);
998 		/* XXX Instead of this, properly filter out outliers. */
999 		cputime_latency_tsc = min;
1000 
1001 		if (calibrate_test > 0) {
1002 			u_int64_t values[20], avg = 0;
1003 			for (i = 1; i <= 20; i++) {
1004 				u_int64_t freq;
1005 
1006 				freq = do_calibrate_cputimer(i * 100 * 1000,
1007 				    cputime_latency_tsc);
1008 				values[i - 1] = freq;
1009 			}
1010 			/* Compute an average TSC for the 1s to 2s delays. */
1011 			for (i = 10; i < 20; i++)
1012 				avg += values[i];
1013 			avg /= 10;
1014 			for (i = 0; i < 20; i++) {
1015 				kprintf("%ums: %lu (Diff from average: %ld)\n",
1016 				    (i + 1) * 100, values[i],
1017 				    (int64_t)(values[i] - avg));
1018 			}
1019 		}
1020 
1021 		if (calibrate_tsc_fast > 0) {
1022 			/* HPET would typically be >10MHz */
1023 			if (sys_cputimer->freq >= 10000000)
1024 				cnt = 200000;
1025 			else
1026 				cnt = 500000;
1027 		} else {
1028 			cnt = 1000000;
1029 		}
1030 
1031 		tsc_frequency = do_calibrate_cputimer(cnt, cputime_latency_tsc);
1032 		if (bootverbose && calibrate_timers_with_rtc) {
1033 			kprintf("TSC clock: %jd Hz (Method B)\n",
1034 			    (intmax_t)tsc_frequency);
1035 		}
1036 	}
1037 
1038 done:
1039 	if (tsc_present) {
1040 		kprintf("TSC%s clock: %jd Hz\n",
1041 		    tsc_invariant ? " invariant" : "",
1042 		    (intmax_t)tsc_frequency);
1043 	}
1044 	tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000;
1045 
1046 	EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown,
1047 			      NULL, SHUTDOWN_PRI_LAST);
1048 }
1049 
1050 /*
1051  * Sync the time of day back to the RTC on shutdown, but only if
1052  * we have already loaded it and have not crashed.
1053  */
1054 static void
1055 resettodr_on_shutdown(void *arg __unused)
1056 {
1057  	if (rtc_loaded && panicstr == NULL) {
1058 		resettodr();
1059 	}
1060 }
1061 
1062 /*
1063  * Initialize the time of day register, based on the time base which is, e.g.
1064  * from a filesystem.
1065  */
1066 void
1067 inittodr(time_t base)
1068 {
1069 	unsigned long	sec, days;
1070 	int		year, month;
1071 	int		y, m;
1072 	struct timespec ts;
1073 
1074 	if (base) {
1075 		ts.tv_sec = base;
1076 		ts.tv_nsec = 0;
1077 		set_timeofday(&ts);
1078 	}
1079 
1080 	/* Look if we have a RTC present and the time is valid */
1081 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
1082 		goto wrong_time;
1083 
1084 	/* wait for time update to complete */
1085 	/* If RTCSA_TUP is zero, we have at least 244us before next update */
1086 	crit_enter();
1087 	while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
1088 		crit_exit();
1089 		crit_enter();
1090 	}
1091 
1092 	days = 0;
1093 #ifdef USE_RTC_CENTURY
1094 	year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
1095 #else
1096 	year = readrtc(RTC_YEAR) + 1900;
1097 	if (year < 1970)
1098 		year += 100;
1099 #endif
1100 	if (year < 1970) {
1101 		crit_exit();
1102 		goto wrong_time;
1103 	}
1104 	month = readrtc(RTC_MONTH);
1105 	for (m = 1; m < month; m++)
1106 		days += daysinmonth[m-1];
1107 	if ((month > 2) && LEAPYEAR(year))
1108 		days ++;
1109 	days += readrtc(RTC_DAY) - 1;
1110 	for (y = 1970; y < year; y++)
1111 		days += DAYSPERYEAR + LEAPYEAR(y);
1112 	sec = ((( days * 24 +
1113 		  readrtc(RTC_HRS)) * 60 +
1114 		  readrtc(RTC_MIN)) * 60 +
1115 		  readrtc(RTC_SEC));
1116 	/* sec now contains the number of seconds, since Jan 1 1970,
1117 	   in the local time zone */
1118 
1119 	sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1120 
1121 	y = (int)(time_second - sec);
1122 	if (y <= -2 || y >= 2) {
1123 		/* badly off, adjust it */
1124 		ts.tv_sec = sec;
1125 		ts.tv_nsec = 0;
1126 		set_timeofday(&ts);
1127 	}
1128 	rtc_loaded = 1;
1129 	crit_exit();
1130 	return;
1131 
1132 wrong_time:
1133 	kprintf("Invalid time in real time clock.\n");
1134 	kprintf("Check and reset the date immediately!\n");
1135 }
1136 
1137 /*
1138  * Write system time back to RTC
1139  */
1140 void
1141 resettodr(void)
1142 {
1143 	struct timeval tv;
1144 	unsigned long tm;
1145 	int m;
1146 	int y;
1147 
1148 	if (disable_rtc_set)
1149 		return;
1150 
1151 	microtime(&tv);
1152 	tm = tv.tv_sec;
1153 
1154 	crit_enter();
1155 	/* Disable RTC updates and interrupts. */
1156 	writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
1157 
1158 	/* Calculate local time to put in RTC */
1159 
1160 	tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
1161 
1162 	writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60;	/* Write back Seconds */
1163 	writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60;	/* Write back Minutes */
1164 	writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24;	/* Write back Hours   */
1165 
1166 	/* We have now the days since 01-01-1970 in tm */
1167 	writertc(RTC_WDAY, (tm+4)%7);			/* Write back Weekday */
1168 	for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
1169 	     tm >= m;
1170 	     y++,      m = DAYSPERYEAR + LEAPYEAR(y))
1171 	     tm -= m;
1172 
1173 	/* Now we have the years in y and the day-of-the-year in tm */
1174 	writertc(RTC_YEAR, bin2bcd(y%100));		/* Write back Year    */
1175 #ifdef USE_RTC_CENTURY
1176 	writertc(RTC_CENTURY, bin2bcd(y/100));		/* ... and Century    */
1177 #endif
1178 	for (m = 0; ; m++) {
1179 		int ml;
1180 
1181 		ml = daysinmonth[m];
1182 		if (m == 1 && LEAPYEAR(y))
1183 			ml++;
1184 		if (tm < ml)
1185 			break;
1186 		tm -= ml;
1187 	}
1188 
1189 	writertc(RTC_MONTH, bin2bcd(m + 1));            /* Write back Month   */
1190 	writertc(RTC_DAY, bin2bcd(tm + 1));             /* Write back Month Day */
1191 
1192 	/* Reenable RTC updates and interrupts. */
1193 	writertc(RTC_STATUSB, rtc_statusb);
1194 	crit_exit();
1195 }
1196 
1197 static int
1198 i8254_ioapic_trial(int irq, struct cputimer_intr *cti)
1199 {
1200 	sysclock_t base;
1201 	long lastcnt;
1202 
1203 	/*
1204 	 * Following code assumes the 8254 is the cpu timer,
1205 	 * so make sure it is.
1206 	 */
1207 	KKASSERT(sys_cputimer == &i8254_cputimer);
1208 	KKASSERT(cti == &i8254_cputimer_intr);
1209 
1210 	lastcnt = get_interrupt_counter(irq, mycpuid);
1211 
1212 	/*
1213 	 * Force an 8254 Timer0 interrupt and wait 1/100s for
1214 	 * it to happen, then see if we got it.
1215 	 */
1216 	kprintf("IOAPIC: testing 8254 interrupt delivery\n");
1217 
1218 	i8254_intr_reload(cti, 2);
1219 	base = sys_cputimer->count();
1220 	while (sys_cputimer->count() - base < sys_cputimer->freq / 100)
1221 		; /* nothing */
1222 
1223 	if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0)
1224 		return ENOENT;
1225 	return 0;
1226 }
1227 
1228 /*
1229  * Start both clocks running.  DragonFly note: the stat clock is no longer
1230  * used.  Instead, 8254 based systimers are used for all major clock
1231  * interrupts.
1232  */
1233 static void
1234 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected)
1235 {
1236 	void *clkdesc = NULL;
1237 	int irq = 0, mixed_mode = 0, error;
1238 
1239 	KKASSERT(mycpuid == 0);
1240 
1241 	if (!selected && i8254_intr_disable)
1242 		goto nointr;
1243 
1244 	/*
1245 	 * The stat interrupt mask is different without the
1246 	 * statistics clock.  Also, don't set the interrupt
1247 	 * flag which would normally cause the RTC to generate
1248 	 * interrupts.
1249 	 */
1250 	rtc_statusb = RTCSB_24HR;
1251 
1252 	/* Finish initializing 8254 timer 0. */
1253 	if (ioapic_enable) {
1254 		irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE,
1255 			INTR_POLARITY_HIGH);
1256 		if (irq < 0) {
1257 mixed_mode_setup:
1258 			error = ioapic_conf_legacy_extint(0);
1259 			if (!error) {
1260 				irq = machintr_legacy_intr_find(0,
1261 				    INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH);
1262 				if (irq < 0)
1263 					error = ENOENT;
1264 			}
1265 
1266 			if (error) {
1267 				if (!selected) {
1268 					kprintf("IOAPIC: setup mixed mode for "
1269 						"irq 0 failed: %d\n", error);
1270 					goto nointr;
1271 				} else {
1272 					panic("IOAPIC: setup mixed mode for "
1273 					      "irq 0 failed: %d\n", error);
1274 				}
1275 			}
1276 			mixed_mode = 1;
1277 		}
1278 		clkdesc = register_int(irq, clkintr, NULL, "clk",
1279 				       NULL,
1280 				       INTR_EXCL | INTR_CLOCK |
1281 				       INTR_NOPOLL | INTR_MPSAFE |
1282 				       INTR_NOENTROPY, 0);
1283 	} else {
1284 		register_int(0, clkintr, NULL, "clk", NULL,
1285 			     INTR_EXCL | INTR_CLOCK |
1286 			     INTR_NOPOLL | INTR_MPSAFE |
1287 			     INTR_NOENTROPY, 0);
1288 	}
1289 
1290 	/* Initialize RTC. */
1291 	writertc(RTC_STATUSA, rtc_statusa);
1292 	writertc(RTC_STATUSB, RTCSB_24HR);
1293 
1294 	if (ioapic_enable) {
1295 		error = i8254_ioapic_trial(irq, cti);
1296 		if (error) {
1297 			if (mixed_mode) {
1298 				if (!selected) {
1299 					kprintf("IOAPIC: mixed mode for irq %d "
1300 						"trial failed: %d\n",
1301 						irq, error);
1302 					goto nointr;
1303 				} else {
1304 					panic("IOAPIC: mixed mode for irq %d "
1305 					      "trial failed: %d\n", irq, error);
1306 				}
1307 			} else {
1308 				kprintf("IOAPIC: warning 8254 is not connected "
1309 					"to the correct pin, try mixed mode\n");
1310 				unregister_int(clkdesc, 0);
1311 				goto mixed_mode_setup;
1312 			}
1313 		}
1314 	}
1315 	return;
1316 
1317 nointr:
1318 	i8254_nointr = 1; /* don't try to register again */
1319 	cputimer_intr_deregister(cti);
1320 }
1321 
1322 void
1323 setstatclockrate(int newhz)
1324 {
1325 	if (newhz == RTC_PROFRATE)
1326 		rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF;
1327 	else
1328 		rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
1329 	writertc(RTC_STATUSA, rtc_statusa);
1330 }
1331 
1332 #if 0
1333 static unsigned
1334 tsc_get_timecount(struct timecounter *tc)
1335 {
1336 	return (rdtsc());
1337 }
1338 #endif
1339 
1340 #ifdef KERN_TIMESTAMP
1341 #define KERN_TIMESTAMP_SIZE 16384
1342 static u_long tsc[KERN_TIMESTAMP_SIZE] ;
1343 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc,
1344 	sizeof(tsc), "LU", "Kernel timestamps");
1345 void
1346 _TSTMP(u_int32_t x)
1347 {
1348 	static int i;
1349 
1350 	tsc[i] = (u_int32_t)rdtsc();
1351 	tsc[i+1] = x;
1352 	i = i + 2;
1353 	if (i >= KERN_TIMESTAMP_SIZE)
1354 		i = 0;
1355 	tsc[i] = 0; /* mark last entry */
1356 }
1357 #endif /* KERN_TIMESTAMP */
1358 
1359 /*
1360  *
1361  */
1362 
1363 static int
1364 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS)
1365 {
1366     sysclock_t count;
1367     uint64_t tscval;
1368     char buf[32];
1369 
1370     crit_enter();
1371     if (sys_cputimer == &i8254_cputimer)
1372 	count = sys_cputimer->count();
1373     else
1374 	count = 0;
1375     if (tsc_present)
1376 	tscval = rdtsc();
1377     else
1378 	tscval = 0;
1379     crit_exit();
1380     ksnprintf(buf, sizeof(buf), "%08x %016llx", count, (long long)tscval);
1381     return(SYSCTL_OUT(req, buf, strlen(buf) + 1));
1382 }
1383 
1384 struct tsc_mpsync_info {
1385 	volatile int		tsc_ready_cnt;
1386 	volatile int		tsc_done_cnt;
1387 	volatile int		tsc_command;
1388 	volatile int		unused01[5];
1389 	struct {
1390 		uint64_t	v;
1391 		uint64_t	unused02;
1392 	} tsc_saved[MAXCPU];
1393 } __cachealign;
1394 
1395 #if 0
1396 static void
1397 tsc_mpsync_test_loop(struct tsc_mpsync_thr *info)
1398 {
1399 	struct globaldata *gd = mycpu;
1400 	tsc_uclock_t test_end, test_begin;
1401 	u_int i;
1402 
1403 	if (bootverbose) {
1404 		kprintf("cpu%d: TSC testing MP synchronization ...\n",
1405 		    gd->gd_cpuid);
1406 	}
1407 
1408 	test_begin = rdtsc_ordered();
1409 	/* Run test for 100ms */
1410 	test_end = test_begin + (tsc_frequency / 10);
1411 
1412 	arg->tsc_mpsync = 1;
1413 	arg->tsc_target = test_begin;
1414 
1415 #define TSC_TEST_TRYMAX		1000000	/* Make sure we could stop */
1416 #define TSC_TEST_TRYMIN		50000
1417 
1418 	for (i = 0; i < TSC_TEST_TRYMAX; ++i) {
1419 		struct lwkt_cpusync cs;
1420 
1421 		crit_enter();
1422 		lwkt_cpusync_init(&cs, gd->gd_other_cpus,
1423 		    tsc_mpsync_test_remote, arg);
1424 		lwkt_cpusync_interlock(&cs);
1425 		cpu_pause();
1426 		arg->tsc_target = rdtsc_ordered();
1427 		cpu_mfence();
1428 		lwkt_cpusync_deinterlock(&cs);
1429 		crit_exit();
1430 		cpu_pause();
1431 
1432 		if (!arg->tsc_mpsync) {
1433 			kprintf("cpu%d: TSC is not MP synchronized @%u\n",
1434 			    gd->gd_cpuid, i);
1435 			break;
1436 		}
1437 		if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN)
1438 			break;
1439 	}
1440 
1441 #undef TSC_TEST_TRYMIN
1442 #undef TSC_TEST_TRYMAX
1443 
1444 	if (arg->tsc_target == test_begin) {
1445 		kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid);
1446 		/* XXX disable TSC? */
1447 		tsc_invariant = 0;
1448 		arg->tsc_mpsync = 0;
1449 		return;
1450 	}
1451 
1452 	if (arg->tsc_mpsync && bootverbose) {
1453 		kprintf("cpu%d: TSC is MP synchronized after %u tries\n",
1454 		    gd->gd_cpuid, i);
1455 	}
1456 }
1457 
1458 #endif
1459 
1460 #define TSC_TEST_COUNT		50000
1461 
1462 static void
1463 tsc_mpsync_ap_thread(void *xinfo)
1464 {
1465 	struct tsc_mpsync_info *info = xinfo;
1466 	int cpu = mycpuid;
1467 	int i;
1468 
1469 	/*
1470 	 * Tell main loop that we are ready and wait for initiation
1471 	 */
1472 	atomic_add_int(&info->tsc_ready_cnt, 1);
1473 	while (info->tsc_command == 0) {
1474 		lwkt_force_switch();
1475 	}
1476 
1477 	/*
1478 	 * Run test for 10000 loops or until tsc_done_cnt != 0 (another
1479 	 * cpu has finished its test), then increment done.
1480 	 */
1481 	crit_enter();
1482 	for (i = 0; i < TSC_TEST_COUNT && info->tsc_done_cnt == 0; ++i) {
1483 		info->tsc_saved[cpu].v = rdtsc_ordered();
1484 	}
1485 	crit_exit();
1486 	atomic_add_int(&info->tsc_done_cnt, 1);
1487 
1488 	lwkt_exit();
1489 }
1490 
1491 static void
1492 tsc_mpsync_test(void)
1493 {
1494 	int cpu;
1495 	int try;
1496 
1497 	if (!tsc_invariant) {
1498 		/* Not even invariant TSC */
1499 		return;
1500 	}
1501 
1502 	if (ncpus == 1) {
1503 		/* Only one CPU */
1504 		tsc_mpsync = 1;
1505 		return;
1506 	}
1507 
1508 	/*
1509 	 * Forcing can be used w/qemu to reduce contention
1510 	 */
1511 	TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync);
1512 
1513 	if (tsc_mpsync == 0) {
1514 		switch (cpu_vendor_id) {
1515 		case CPU_VENDOR_INTEL:
1516 			/*
1517 			 * Intel probably works
1518 			 */
1519 			break;
1520 
1521 		case CPU_VENDOR_AMD:
1522 			/*
1523 			 * For AMD 15h and 16h (i.e. The Bulldozer and Jaguar
1524 			 * architectures) we have to watch out for
1525 			 * Erratum 778:
1526 			 *     "Processor Core Time Stamp Counters May
1527 			 *      Experience Drift"
1528 			 * This Erratum is only listed for cpus in Family
1529 			 * 15h < Model 30h and for 16h < Model 30h.
1530 			 *
1531 			 * AMD < Bulldozer probably doesn't work
1532 			 */
1533 			if (CPUID_TO_FAMILY(cpu_id) == 0x15 ||
1534 			    CPUID_TO_FAMILY(cpu_id) == 0x16) {
1535 				if (CPUID_TO_MODEL(cpu_id) < 0x30)
1536 					return;
1537 			} else if (CPUID_TO_FAMILY(cpu_id) < 0x17) {
1538 				return;
1539 			}
1540 			break;
1541 
1542 		default:
1543 			/* probably won't work */
1544 			return;
1545 		}
1546 	} else if (tsc_mpsync < 0) {
1547 		kprintf("TSC MP synchronization test is disabled\n");
1548 		tsc_mpsync = 0;
1549 		return;
1550 	}
1551 
1552 	/*
1553 	 * Test even if forced to 1 above.  If forced, we will use the TSC
1554 	 * even if the test fails.  (set forced to -1 to disable entirely).
1555 	 */
1556 	kprintf("TSC testing MP synchronization ...\n");
1557 
1558 	/*
1559 	 * Test TSC MP synchronization on APs.  Try up to 4 times.
1560 	 */
1561 	for (try = 0; try < 4; ++try) {
1562 		struct tsc_mpsync_info info;
1563 		uint64_t last;
1564 		int64_t xdelta;
1565 		int64_t delta;
1566 
1567 		bzero(&info, sizeof(info));
1568 
1569 		for (cpu = 0; cpu < ncpus; ++cpu) {
1570 			thread_t td;
1571 			lwkt_create(tsc_mpsync_ap_thread, &info, &td,
1572 				    NULL, TDF_NOSTART, cpu,
1573 				    "tsc mpsync %d", cpu);
1574 			lwkt_setpri_initial(td, curthread->td_pri);
1575 			lwkt_schedule(td);
1576 		}
1577 		while (info.tsc_ready_cnt != ncpus)
1578 			lwkt_force_switch();
1579 
1580 		/*
1581 		 * All threads are ready, start the test and wait for
1582 		 * completion.
1583 		 */
1584 		info.tsc_command = 1;
1585 		while (info.tsc_done_cnt != ncpus)
1586 			lwkt_force_switch();
1587 
1588 		/*
1589 		 * Process results
1590 		 */
1591 		last = info.tsc_saved[0].v;
1592 		delta = 0;
1593 		for (cpu = 0; cpu < ncpus; ++cpu) {
1594 			xdelta = (int64_t)(info.tsc_saved[cpu].v - last);
1595 			last = info.tsc_saved[cpu].v;
1596 			if (xdelta < 0)
1597 				xdelta = -xdelta;
1598 			delta += xdelta;
1599 
1600 		}
1601 
1602 		/*
1603 		 * Result from attempt.  If its too wild just stop now.
1604 		 * Also break out if we succeed, no need to try further.
1605 		 */
1606 		kprintf("TSC MPSYNC TEST %jd %d -> %jd (10uS=%jd)\n",
1607 			delta, ncpus, delta / ncpus,
1608 			tsc_frequency / 100000);
1609 		if (delta / ncpus > tsc_frequency / 100)
1610 			break;
1611 		if (delta / ncpus < tsc_frequency / 100000) {
1612 			tsc_mpsync = 1;
1613 			break;
1614 		}
1615 	}
1616 
1617 	if (tsc_mpsync)
1618 		kprintf("TSC is MP synchronized\n");
1619 	else
1620 		kprintf("TSC is not MP synchronized\n");
1621 }
1622 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL);
1623 
1624 SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254");
1625 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0,
1626 	    "frequency");
1627 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD,
1628 	    0, 0, hw_i8254_timestamp, "A", "");
1629 
1630 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD,
1631 	    &tsc_present, 0, "TSC Available");
1632 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD,
1633 	    &tsc_invariant, 0, "Invariant TSC");
1634 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD,
1635 	    &tsc_mpsync, 0, "TSC is synchronized across CPUs");
1636 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD,
1637 	    &tsc_frequency, 0, "TSC Frequency");
1638