1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * Copyright (c) 2008 The DragonFly Project. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * William Jolitz and Don Ahn. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 34 * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $ 35 */ 36 37 /* 38 * Routines to handle clock hardware. 39 */ 40 41 /* 42 * inittodr, settodr and support routines written 43 * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at> 44 * 45 * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94 46 */ 47 48 #if 0 49 #include "opt_clock.h" 50 #endif 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/eventhandler.h> 55 #include <sys/time.h> 56 #include <sys/kernel.h> 57 #include <sys/bus.h> 58 #include <sys/sysctl.h> 59 #include <sys/cons.h> 60 #include <sys/kbio.h> 61 #include <sys/systimer.h> 62 #include <sys/globaldata.h> 63 #include <sys/machintr.h> 64 #include <sys/interrupt.h> 65 66 #include <sys/thread2.h> 67 68 #include <machine/clock.h> 69 #include <machine/cputypes.h> 70 #include <machine/frame.h> 71 #include <machine/ipl.h> 72 #include <machine/limits.h> 73 #include <machine/md_var.h> 74 #include <machine/psl.h> 75 #include <machine/segments.h> 76 #include <machine/smp.h> 77 #include <machine/specialreg.h> 78 #include <machine/intr_machdep.h> 79 80 #include <machine_base/apic/ioapic.h> 81 #include <machine_base/apic/ioapic_abi.h> 82 #include <machine_base/icu/icu.h> 83 #include <bus/isa/isa.h> 84 #include <bus/isa/rtc.h> 85 #include <machine_base/isa/timerreg.h> 86 87 SET_DECLARE(timecounter_init_set, const timecounter_init_t); 88 TIMECOUNTER_INIT(placeholder, NULL); 89 90 static void i8254_restore(void); 91 static void resettodr_on_shutdown(void *arg __unused); 92 93 /* 94 * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we 95 * can use a simple formula for leap years. 96 */ 97 #define LEAPYEAR(y) ((u_int)(y) % 4 == 0) 98 #define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31) 99 100 #ifndef TIMER_FREQ 101 #define TIMER_FREQ 1193182 102 #endif 103 104 static uint8_t i8254_walltimer_sel; 105 static uint16_t i8254_walltimer_cntr; 106 107 int adjkerntz; /* local offset from GMT in seconds */ 108 int disable_rtc_set; /* disable resettodr() if != 0 */ 109 int tsc_present; 110 int tsc_invariant; 111 int tsc_mpsync; 112 int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ 113 int timer0_running; 114 tsc_uclock_t tsc_frequency; 115 tsc_uclock_t tsc_oneus_approx; /* always at least 1, approx only */ 116 117 enum tstate { RELEASED, ACQUIRED }; 118 enum tstate timer0_state; 119 enum tstate timer1_state; 120 enum tstate timer2_state; 121 122 int i8254_cputimer_disable; /* No need to initialize i8254 cputimer. */ 123 124 static int beeping = 0; 125 static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; 126 static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; 127 static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; 128 static int rtc_loaded; 129 130 static sysclock_t i8254_cputimer_div; 131 132 static int i8254_nointr; 133 static int i8254_intr_disable = 1; 134 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable); 135 136 static int calibrate_timers_with_rtc = 0; 137 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc); 138 139 static int calibrate_tsc_fast = 1; 140 TUNABLE_INT("hw.calibrate_tsc_fast", &calibrate_tsc_fast); 141 142 static int calibrate_test; 143 TUNABLE_INT("hw.tsc_calibrate_test", &calibrate_test); 144 145 static struct callout sysbeepstop_ch; 146 147 static sysclock_t i8254_cputimer_count(void); 148 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last); 149 static void i8254_cputimer_destruct(struct cputimer *cputimer); 150 151 static struct cputimer i8254_cputimer = { 152 .next = SLIST_ENTRY_INITIALIZER, 153 .name = "i8254", 154 .pri = CPUTIMER_PRI_8254, 155 .type = 0, /* determined later */ 156 .count = i8254_cputimer_count, 157 .fromhz = cputimer_default_fromhz, 158 .fromus = cputimer_default_fromus, 159 .construct = i8254_cputimer_construct, 160 .destruct = i8254_cputimer_destruct, 161 .freq = TIMER_FREQ 162 }; 163 164 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t); 165 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *); 166 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t); 167 168 static struct cputimer_intr i8254_cputimer_intr = { 169 .freq = TIMER_FREQ, 170 .reload = i8254_intr_reload, 171 .enable = cputimer_intr_default_enable, 172 .config = i8254_intr_config, 173 .restart = cputimer_intr_default_restart, 174 .pmfixup = cputimer_intr_default_pmfixup, 175 .initclock = i8254_intr_initclock, 176 .pcpuhand = NULL, 177 .next = SLIST_ENTRY_INITIALIZER, 178 .name = "i8254", 179 .type = CPUTIMER_INTR_8254, 180 .prio = CPUTIMER_INTR_PRIO_8254, 181 .caps = CPUTIMER_INTR_CAP_PS, 182 .priv = NULL 183 }; 184 185 /* 186 * Use this to lwkt_switch() when the scheduler clock is not 187 * yet running, otherwise lwkt_switch() won't do anything. 188 * XXX needs cleaning up in lwkt_thread.c 189 */ 190 static void 191 lwkt_force_switch(void) 192 { 193 crit_enter(); 194 lwkt_schedulerclock(curthread); 195 crit_exit(); 196 lwkt_switch(); 197 } 198 199 /* 200 * timer0 clock interrupt. Timer0 is in one-shot mode and has stopped 201 * counting as of this interrupt. We use timer1 in free-running mode (not 202 * generating any interrupts) as our main counter. Each cpu has timeouts 203 * pending. 204 * 205 * This code is INTR_MPSAFE and may be called without the BGL held. 206 */ 207 static void 208 clkintr(void *dummy, void *frame_arg) 209 { 210 static sysclock_t sysclock_count; /* NOTE! Must be static */ 211 struct globaldata *gd = mycpu; 212 struct globaldata *gscan; 213 int n; 214 215 /* 216 * SWSTROBE mode is a one-shot, the timer is no longer running 217 */ 218 timer0_running = 0; 219 220 /* 221 * XXX the dispatcher needs work. right now we call systimer_intr() 222 * directly or via IPI for any cpu with systimers queued, which is 223 * usually *ALL* of them. We need to use the LAPIC timer for this. 224 */ 225 sysclock_count = sys_cputimer->count(); 226 for (n = 0; n < ncpus; ++n) { 227 gscan = globaldata_find(n); 228 if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL) 229 continue; 230 if (gscan != gd) { 231 lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr, 232 &sysclock_count, 1); 233 } else { 234 systimer_intr(&sysclock_count, 0, frame_arg); 235 } 236 } 237 } 238 239 240 /* 241 * NOTE! not MP safe. 242 */ 243 int 244 acquire_timer2(int mode) 245 { 246 if (timer2_state != RELEASED) 247 return (-1); 248 timer2_state = ACQUIRED; 249 250 /* 251 * This access to the timer registers is as atomic as possible 252 * because it is a single instruction. We could do better if we 253 * knew the rate. 254 */ 255 outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f)); 256 return (0); 257 } 258 259 int 260 release_timer2(void) 261 { 262 if (timer2_state != ACQUIRED) 263 return (-1); 264 outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT); 265 timer2_state = RELEASED; 266 return (0); 267 } 268 269 #include "opt_ddb.h" 270 #ifdef DDB 271 #include <ddb/ddb.h> 272 273 DB_SHOW_COMMAND(rtc, rtc) 274 { 275 kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n", 276 rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY), 277 rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC), 278 rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR)); 279 } 280 #endif /* DDB */ 281 282 /* 283 * Return the current cpu timer count as a 32 bit integer. 284 */ 285 static 286 sysclock_t 287 i8254_cputimer_count(void) 288 { 289 static uint16_t cputimer_last; 290 uint16_t count; 291 sysclock_t ret; 292 293 clock_lock(); 294 outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH); 295 count = (uint8_t)inb(i8254_walltimer_cntr); /* get countdown */ 296 count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8); 297 count = -count; /* -> countup */ 298 if (count < cputimer_last) /* rollover */ 299 i8254_cputimer.base += 0x00010000U; 300 ret = i8254_cputimer.base | count; 301 cputimer_last = count; 302 clock_unlock(); 303 304 return(ret); 305 } 306 307 /* 308 * This function is called whenever the system timebase changes, allowing 309 * us to calculate what is needed to convert a system timebase tick 310 * into an 8254 tick for the interrupt timer. If we can convert to a 311 * simple shift, multiplication, or division, we do so. Otherwise 64 312 * bit arithmatic is required every time the interrupt timer is reloaded. 313 */ 314 static void 315 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer) 316 { 317 sysclock_t freq; 318 sysclock_t div; 319 320 /* 321 * Will a simple divide do the trick? 322 */ 323 div = (timer->freq + (cti->freq / 2)) / cti->freq; 324 freq = cti->freq * div; 325 326 if (freq >= timer->freq - 1 && freq <= timer->freq + 1) 327 i8254_cputimer_div = div; 328 else 329 i8254_cputimer_div = 0; 330 } 331 332 /* 333 * Reload for the next timeout. It is possible for the reload value 334 * to be 0 or negative, indicating that an immediate timer interrupt 335 * is desired. For now make the minimum 2 ticks. 336 * 337 * We may have to convert from the system timebase to the 8254 timebase. 338 */ 339 static void 340 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 341 { 342 uint16_t count; 343 344 if (i8254_cputimer_div) 345 reload /= i8254_cputimer_div; 346 else 347 reload = muldivu64(reload, cti->freq, sys_cputimer->freq); 348 349 if (reload < 2) 350 reload = 2; /* minimum count */ 351 if (reload > 0xFFFF) 352 reload = 0xFFFF; /* almost full count (0 is full count) */ 353 354 clock_lock(); 355 if (timer0_running) { 356 outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); /* count-down timer */ 357 count = (uint8_t)inb(TIMER_CNTR0); /* lsb */ 358 count |= ((uint8_t)inb(TIMER_CNTR0) << 8); /* msb */ 359 if (reload < count) { 360 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 361 outb(TIMER_CNTR0, (uint8_t)reload); /* lsb */ 362 outb(TIMER_CNTR0, (uint8_t)(reload >> 8)); /* msb */ 363 } 364 } else { 365 timer0_running = 1; 366 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 367 outb(TIMER_CNTR0, (uint8_t)reload); /* lsb */ 368 outb(TIMER_CNTR0, (uint8_t)(reload >> 8)); /* msb */ 369 } 370 clock_unlock(); 371 } 372 373 /* 374 * DELAY(usec) - Spin for the specified number of microseconds. 375 * DRIVERSLEEP(usec) - Spin for the specified number of microseconds, 376 * but do a thread switch in the loop 377 * 378 * Relies on timer 1 counting down from (cputimer_freq / hz) 379 * Note: timer had better have been programmed before this is first used! 380 */ 381 static void 382 DODELAY(int n, int doswitch) 383 { 384 ssysclock_t delta, ticks_left; 385 sysclock_t prev_tick, tick; 386 387 #ifdef DELAYDEBUG 388 int getit_calls = 1; 389 int n1; 390 static int state = 0; 391 392 if (state == 0) { 393 state = 1; 394 for (n1 = 1; n1 <= 10000000; n1 *= 10) 395 DELAY(n1); 396 state = 2; 397 } 398 if (state == 1) 399 kprintf("DELAY(%d)...", n); 400 #endif 401 /* 402 * Guard against the timer being uninitialized if we are called 403 * early for console i/o. 404 */ 405 if (timer0_state == RELEASED && i8254_cputimer_disable == 0) 406 i8254_restore(); 407 408 /* 409 * Read the counter first, so that the rest of the setup overhead is 410 * counted. Then calculate the number of hardware timer ticks 411 * required, rounding up to be sure we delay at least the requested 412 * number of microseconds. 413 */ 414 prev_tick = sys_cputimer->count(); 415 ticks_left = muldivu64(n, sys_cputimer->freq + 999999, 1000000); 416 417 /* 418 * Loop until done. 419 */ 420 while (ticks_left > 0) { 421 tick = sys_cputimer->count(); 422 #ifdef DELAYDEBUG 423 ++getit_calls; 424 #endif 425 delta = tick - prev_tick; 426 prev_tick = tick; 427 if (delta < 0) 428 delta = 0; 429 ticks_left -= delta; 430 if (doswitch && ticks_left > 0) 431 lwkt_switch(); 432 cpu_pause(); 433 } 434 #ifdef DELAYDEBUG 435 if (state == 1) 436 kprintf(" %d calls to getit() at %d usec each\n", 437 getit_calls, (n + 5) / getit_calls); 438 #endif 439 } 440 441 /* 442 * DELAY() never switches. 443 */ 444 void 445 DELAY(int n) 446 { 447 DODELAY(n, 0); 448 } 449 450 /* 451 * Returns non-zero if the specified time period has elapsed. Call 452 * first with last_clock set to 0. 453 */ 454 int 455 CHECKTIMEOUT(TOTALDELAY *tdd) 456 { 457 sysclock_t delta; 458 int us; 459 460 if (tdd->started == 0) { 461 if (timer0_state == RELEASED && i8254_cputimer_disable == 0) 462 i8254_restore(); 463 tdd->last_clock = sys_cputimer->count(); 464 tdd->started = 1; 465 return(0); 466 } 467 delta = sys_cputimer->count() - tdd->last_clock; 468 us = muldivu64(delta, 1000000, sys_cputimer->freq); 469 tdd->last_clock += muldivu64(us, sys_cputimer->freq, 1000000); 470 tdd->us -= us; 471 472 return (tdd->us < 0); 473 } 474 475 476 /* 477 * DRIVERSLEEP() does not switch if called with a spinlock held or 478 * from a hard interrupt. 479 */ 480 void 481 DRIVERSLEEP(int usec) 482 { 483 globaldata_t gd = mycpu; 484 485 if (gd->gd_intr_nesting_level || gd->gd_spinlocks) { 486 DODELAY(usec, 0); 487 } else { 488 DODELAY(usec, 1); 489 } 490 } 491 492 static void 493 sysbeepstop(void *chan) 494 { 495 outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */ 496 beeping = 0; 497 release_timer2(); 498 } 499 500 int 501 sysbeep(int pitch, int period) 502 { 503 if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) 504 return(-1); 505 if (sysbeep_enable == 0) 506 return(-1); 507 /* 508 * Nobody else is using timer2, we do not need the clock lock 509 */ 510 outb(TIMER_CNTR2, pitch); 511 outb(TIMER_CNTR2, (pitch>>8)); 512 if (!beeping) { 513 /* enable counter2 output to speaker */ 514 outb(IO_PPI, inb(IO_PPI) | 3); 515 beeping = period; 516 callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL); 517 } 518 return (0); 519 } 520 521 /* 522 * RTC support routines 523 */ 524 525 int 526 rtcin(int reg) 527 { 528 u_char val; 529 530 crit_enter(); 531 outb(IO_RTC, reg); 532 inb(0x84); 533 val = inb(IO_RTC + 1); 534 inb(0x84); 535 crit_exit(); 536 return (val); 537 } 538 539 static __inline void 540 writertc(u_char reg, u_char val) 541 { 542 crit_enter(); 543 inb(0x84); 544 outb(IO_RTC, reg); 545 inb(0x84); 546 outb(IO_RTC + 1, val); 547 inb(0x84); /* XXX work around wrong order in rtcin() */ 548 crit_exit(); 549 } 550 551 static __inline int 552 readrtc(int port) 553 { 554 return(bcd2bin(rtcin(port))); 555 } 556 557 static u_int 558 calibrate_clocks(void) 559 { 560 tsc_uclock_t old_tsc; 561 sysclock_t tot_count; 562 sysclock_t count, prev_count; 563 int sec, start_sec, timeout; 564 565 if (bootverbose) 566 kprintf("Calibrating clock(s) ...\n"); 567 if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) 568 goto fail; 569 timeout = 100000000; 570 571 /* Read the mc146818A seconds counter. */ 572 for (;;) { 573 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { 574 sec = rtcin(RTC_SEC); 575 break; 576 } 577 if (--timeout == 0) 578 goto fail; 579 } 580 581 /* Wait for the mC146818A seconds counter to change. */ 582 start_sec = sec; 583 for (;;) { 584 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { 585 sec = rtcin(RTC_SEC); 586 if (sec != start_sec) 587 break; 588 } 589 if (--timeout == 0) 590 goto fail; 591 } 592 593 /* Start keeping track of the i8254 counter. */ 594 prev_count = sys_cputimer->count(); 595 tot_count = 0; 596 597 if (tsc_present) 598 old_tsc = rdtsc(); 599 else 600 old_tsc = 0; /* shut up gcc */ 601 602 /* 603 * Wait for the mc146818A seconds counter to change. Read the i8254 604 * counter for each iteration since this is convenient and only 605 * costs a few usec of inaccuracy. The timing of the final reads 606 * of the counters almost matches the timing of the initial reads, 607 * so the main cause of inaccuracy is the varying latency from 608 * inside getit() or rtcin(RTC_STATUSA) to the beginning of the 609 * rtcin(RTC_SEC) that returns a changed seconds count. The 610 * maximum inaccuracy from this cause is < 10 usec on 486's. 611 */ 612 start_sec = sec; 613 for (;;) { 614 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) 615 sec = rtcin(RTC_SEC); 616 count = sys_cputimer->count(); 617 tot_count += (sysclock_t)(count - prev_count); 618 prev_count = count; 619 if (sec != start_sec) 620 break; 621 if (--timeout == 0) 622 goto fail; 623 } 624 625 /* 626 * Read the cpu cycle counter. The timing considerations are 627 * similar to those for the i8254 clock. 628 */ 629 if (tsc_present) { 630 tsc_frequency = rdtsc() - old_tsc; 631 if (bootverbose) { 632 kprintf("TSC clock: %jd Hz (Method A)\n", 633 (intmax_t)tsc_frequency); 634 } 635 } 636 tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000; 637 638 kprintf("i8254 clock: %lu Hz\n", tot_count); 639 return (tot_count); 640 641 fail: 642 kprintf("failed, using default i8254 clock of %lu Hz\n", 643 i8254_cputimer.freq); 644 return (i8254_cputimer.freq); 645 } 646 647 static void 648 i8254_restore(void) 649 { 650 timer0_state = ACQUIRED; 651 652 clock_lock(); 653 654 /* 655 * Timer0 is our fine-grained variable clock interrupt 656 */ 657 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 658 outb(TIMER_CNTR0, 2); /* lsb */ 659 outb(TIMER_CNTR0, 0); /* msb */ 660 clock_unlock(); 661 662 if (!i8254_nointr) { 663 cputimer_intr_register(&i8254_cputimer_intr); 664 cputimer_intr_select(&i8254_cputimer_intr, 0); 665 } 666 667 /* 668 * Timer1 or timer2 is our free-running clock, but only if another 669 * has not been selected. 670 */ 671 cputimer_register(&i8254_cputimer); 672 cputimer_select(&i8254_cputimer, 0); 673 } 674 675 static void 676 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock) 677 { 678 int which; 679 680 /* 681 * Should we use timer 1 or timer 2 ? 682 */ 683 which = 0; 684 TUNABLE_INT_FETCH("hw.i8254.walltimer", &which); 685 if (which != 1 && which != 2) 686 which = 2; 687 688 switch(which) { 689 case 1: 690 timer->name = "i8254_timer1"; 691 timer->type = CPUTIMER_8254_SEL1; 692 i8254_walltimer_sel = TIMER_SEL1; 693 i8254_walltimer_cntr = TIMER_CNTR1; 694 timer1_state = ACQUIRED; 695 break; 696 case 2: 697 timer->name = "i8254_timer2"; 698 timer->type = CPUTIMER_8254_SEL2; 699 i8254_walltimer_sel = TIMER_SEL2; 700 i8254_walltimer_cntr = TIMER_CNTR2; 701 timer2_state = ACQUIRED; 702 break; 703 } 704 705 timer->base = (oldclock + 0xFFFF) & 0xFFFFFFFFFFFF0000LU; 706 707 clock_lock(); 708 outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT); 709 outb(i8254_walltimer_cntr, 0); /* lsb */ 710 outb(i8254_walltimer_cntr, 0); /* msb */ 711 outb(IO_PPI, inb(IO_PPI) | 1); /* bit 0: enable gate, bit 1: spkr */ 712 clock_unlock(); 713 } 714 715 static void 716 i8254_cputimer_destruct(struct cputimer *timer) 717 { 718 switch(timer->type) { 719 case CPUTIMER_8254_SEL1: 720 timer1_state = RELEASED; 721 break; 722 case CPUTIMER_8254_SEL2: 723 timer2_state = RELEASED; 724 break; 725 default: 726 break; 727 } 728 timer->type = 0; 729 } 730 731 static void 732 rtc_restore(void) 733 { 734 /* Restore all of the RTC's "status" (actually, control) registers. */ 735 writertc(RTC_STATUSB, RTCSB_24HR); 736 writertc(RTC_STATUSA, rtc_statusa); 737 writertc(RTC_STATUSB, rtc_statusb); 738 } 739 740 /* 741 * Restore all the timers. 742 * 743 * This function is called to resynchronize our core timekeeping after a 744 * long halt, e.g. from apm_default_resume() and friends. It is also 745 * called if after a BIOS call we have detected munging of the 8254. 746 * It is necessary because cputimer_count() counter's delta may have grown 747 * too large for nanouptime() and friends to handle, or (in the case of 8254 748 * munging) might cause the SYSTIMER code to prematurely trigger. 749 */ 750 void 751 timer_restore(void) 752 { 753 crit_enter(); 754 if (i8254_cputimer_disable == 0) 755 i8254_restore(); /* restore timer_freq and hz */ 756 rtc_restore(); /* reenable RTC interrupts */ 757 crit_exit(); 758 } 759 760 #define MAX_MEASURE_RETRIES 100 761 762 static u_int64_t 763 do_measure(u_int64_t timer_latency, u_int64_t *latency, sysclock_t *time, 764 int *retries) 765 { 766 u_int64_t tsc1, tsc2; 767 u_int64_t threshold; 768 sysclock_t val; 769 int cnt = 0; 770 771 do { 772 if (cnt > MAX_MEASURE_RETRIES/2) 773 threshold = timer_latency << 1; 774 else 775 threshold = timer_latency + (timer_latency >> 2); 776 777 cnt++; 778 tsc1 = rdtsc_ordered(); 779 val = sys_cputimer->count(); 780 tsc2 = rdtsc_ordered(); 781 } while (timer_latency > 0 && cnt < MAX_MEASURE_RETRIES && 782 tsc2 - tsc1 > threshold); 783 784 *retries = cnt - 1; 785 *latency = tsc2 - tsc1; 786 *time = val; 787 return tsc1; 788 } 789 790 static u_int64_t 791 do_calibrate_cputimer(u_int usecs, u_int64_t timer_latency) 792 { 793 if (calibrate_tsc_fast) { 794 u_int64_t old_tsc1, start_lat1, new_tsc1, end_lat1; 795 u_int64_t old_tsc2, start_lat2, new_tsc2, end_lat2; 796 u_int64_t freq1, freq2; 797 sysclock_t start1, end1, start2, end2; 798 int retries1, retries2, retries3, retries4; 799 800 DELAY(1000); 801 old_tsc1 = do_measure(timer_latency, &start_lat1, &start1, 802 &retries1); 803 DELAY(20000); 804 old_tsc2 = do_measure(timer_latency, &start_lat2, &start2, 805 &retries2); 806 DELAY(usecs); 807 new_tsc1 = do_measure(timer_latency, &end_lat1, &end1, 808 &retries3); 809 DELAY(20000); 810 new_tsc2 = do_measure(timer_latency, &end_lat2, &end2, 811 &retries4); 812 813 old_tsc1 += start_lat1; 814 old_tsc2 += start_lat2; 815 freq1 = (new_tsc1 - old_tsc1) + (start_lat1 + end_lat1) / 2; 816 freq2 = (new_tsc2 - old_tsc2) + (start_lat2 + end_lat2) / 2; 817 end1 -= start1; 818 end2 -= start2; 819 /* This should in practice be safe from overflows. */ 820 freq1 = muldivu64(freq1, sys_cputimer->freq, end1); 821 freq2 = muldivu64(freq2, sys_cputimer->freq, end2); 822 if (calibrate_test && (retries1 > 0 || retries2 > 0)) { 823 kprintf("%s: retries: %d, %d, %d, %d\n", 824 __func__, retries1, retries2, retries3, retries4); 825 } 826 if (calibrate_test) { 827 kprintf("%s: freq1=%ju freq2=%ju avg=%ju\n", 828 __func__, freq1, freq2, (freq1 + freq2) / 2); 829 } 830 return (freq1 + freq2) / 2; 831 } else { 832 u_int64_t old_tsc, new_tsc; 833 u_int64_t freq; 834 835 old_tsc = rdtsc_ordered(); 836 DELAY(usecs); 837 new_tsc = rdtsc(); 838 freq = new_tsc - old_tsc; 839 /* This should in practice be safe from overflows. */ 840 freq = (freq * 1000 * 1000) / usecs; 841 return freq; 842 } 843 } 844 845 /* 846 * Initialize 8254 timer 0 early so that it can be used in DELAY(). 847 */ 848 void 849 startrtclock(void) 850 { 851 const timecounter_init_t **list; 852 sysclock_t delta, freq; 853 854 callout_init_mp(&sysbeepstop_ch); 855 856 /* 857 * Can we use the TSC? 858 * 859 * NOTE: If running under qemu, probably a good idea to force the 860 * TSC because we are not likely to detect it as being 861 * invariant or mpsyncd if you don't. This will greatly 862 * reduce SMP contention. 863 */ 864 if (cpu_feature & CPUID_TSC) { 865 tsc_present = 1; 866 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant); 867 868 if ((cpu_vendor_id == CPU_VENDOR_INTEL || 869 cpu_vendor_id == CPU_VENDOR_AMD) && 870 cpu_exthigh >= 0x80000007) { 871 u_int regs[4]; 872 873 do_cpuid(0x80000007, regs); 874 if (regs[3] & 0x100) 875 tsc_invariant = 1; 876 } 877 } else { 878 tsc_present = 0; 879 } 880 881 /* 882 * Initial RTC state, don't do anything unexpected 883 */ 884 writertc(RTC_STATUSA, rtc_statusa); 885 writertc(RTC_STATUSB, RTCSB_24HR); 886 887 SET_FOREACH(list, timecounter_init_set) { 888 if ((*list)->configure != NULL) 889 (*list)->configure(); 890 } 891 892 /* 893 * If tsc_frequency is already initialized now, and a flag is set 894 * that i8254 timer is unneeded, we are done. 895 */ 896 if (tsc_frequency != 0 && i8254_cputimer_disable != 0) 897 goto done; 898 899 /* 900 * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to 901 * generate an interrupt, which we will ignore for now. 902 * 903 * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000 904 * (so it counts a full 2^16 and repeats). We will use this timer 905 * for our counting. 906 */ 907 if (i8254_cputimer_disable == 0) 908 i8254_restore(); 909 910 kprintf("Using cputimer %s for TSC calibration\n", sys_cputimer->name); 911 912 /* 913 * When booting without verbose messages, it's pointless to run the 914 * calibrate_clocks() calibration code, when we don't use the 915 * results in any way. With bootverbose, we are at least printing 916 * this information to the kernel log. 917 */ 918 if (i8254_cputimer_disable != 0 || 919 (calibrate_timers_with_rtc == 0 && !bootverbose)) { 920 goto skip_rtc_based; 921 } 922 923 freq = calibrate_clocks(); 924 #ifdef CLK_CALIBRATION_LOOP 925 if (bootverbose) { 926 int c; 927 928 cnpoll(TRUE); 929 kprintf("Press a key on the console to " 930 "abort clock calibration\n"); 931 while ((c = cncheckc()) == -1 || c == NOKEY) 932 calibrate_clocks(); 933 cnpoll(FALSE); 934 } 935 #endif 936 937 /* 938 * Use the calibrated i8254 frequency if it seems reasonable. 939 * Otherwise use the default, and don't use the calibrated i586 940 * frequency. 941 */ 942 delta = freq > i8254_cputimer.freq ? 943 freq - i8254_cputimer.freq : i8254_cputimer.freq - freq; 944 if (delta < i8254_cputimer.freq / 100) { 945 if (calibrate_timers_with_rtc == 0) { 946 kprintf( 947 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n"); 948 freq = i8254_cputimer.freq; 949 } 950 /* 951 * NOTE: 952 * Interrupt timer's freq must be adjusted 953 * before we change the cuptimer's frequency. 954 */ 955 i8254_cputimer_intr.freq = freq; 956 cputimer_set_frequency(&i8254_cputimer, freq); 957 } else { 958 if (bootverbose) 959 kprintf("%lu Hz differs from default of %lu Hz " 960 "by more than 1%%\n", 961 freq, i8254_cputimer.freq); 962 tsc_frequency = 0; 963 } 964 965 if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) { 966 kprintf("hw.calibrate_timers_with_rtc not " 967 "set - using old calibration method\n"); 968 tsc_frequency = 0; 969 } 970 971 skip_rtc_based: 972 if (tsc_present && tsc_frequency == 0) { 973 u_int cnt; 974 u_int64_t cputime_latency_tsc = 0, max = 0, min = 0; 975 int i; 976 977 for (i = 0; i < 10; i++) { 978 /* Warm up */ 979 (void)sys_cputimer->count(); 980 } 981 for (i = 0; i < 100; i++) { 982 u_int64_t old_tsc, new_tsc; 983 984 old_tsc = rdtsc_ordered(); 985 (void)sys_cputimer->count(); 986 new_tsc = rdtsc_ordered(); 987 cputime_latency_tsc += (new_tsc - old_tsc); 988 if (max < (new_tsc - old_tsc)) 989 max = new_tsc - old_tsc; 990 if (min == 0 || min > (new_tsc - old_tsc)) 991 min = new_tsc - old_tsc; 992 } 993 cputime_latency_tsc /= 100; 994 kprintf( 995 "Timer latency (in TSC ticks): %lu min=%lu max=%lu\n", 996 cputime_latency_tsc, min, max); 997 /* XXX Instead of this, properly filter out outliers. */ 998 cputime_latency_tsc = min; 999 1000 if (calibrate_test > 0) { 1001 u_int64_t values[20], avg = 0; 1002 for (i = 1; i <= 20; i++) { 1003 u_int64_t freq; 1004 1005 freq = do_calibrate_cputimer(i * 100 * 1000, 1006 cputime_latency_tsc); 1007 values[i - 1] = freq; 1008 } 1009 /* Compute an average TSC for the 1s to 2s delays. */ 1010 for (i = 10; i < 20; i++) 1011 avg += values[i]; 1012 avg /= 10; 1013 for (i = 0; i < 20; i++) { 1014 kprintf("%ums: %lu (Diff from average: %ld)\n", 1015 (i + 1) * 100, values[i], 1016 (int64_t)(values[i] - avg)); 1017 } 1018 } 1019 1020 if (calibrate_tsc_fast > 0) { 1021 /* HPET would typically be >10MHz */ 1022 if (sys_cputimer->freq >= 10000000) 1023 cnt = 200000; 1024 else 1025 cnt = 500000; 1026 } else { 1027 cnt = 1000000; 1028 } 1029 1030 tsc_frequency = do_calibrate_cputimer(cnt, cputime_latency_tsc); 1031 if (bootverbose && calibrate_timers_with_rtc) { 1032 kprintf("TSC clock: %jd Hz (Method B)\n", 1033 (intmax_t)tsc_frequency); 1034 } 1035 } 1036 1037 done: 1038 if (tsc_present) { 1039 kprintf("TSC%s clock: %jd Hz\n", 1040 tsc_invariant ? " invariant" : "", 1041 (intmax_t)tsc_frequency); 1042 } 1043 tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000; 1044 1045 EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown, 1046 NULL, SHUTDOWN_PRI_LAST); 1047 } 1048 1049 /* 1050 * Sync the time of day back to the RTC on shutdown, but only if 1051 * we have already loaded it and have not crashed. 1052 */ 1053 static void 1054 resettodr_on_shutdown(void *arg __unused) 1055 { 1056 if (rtc_loaded && panicstr == NULL) { 1057 resettodr(); 1058 } 1059 } 1060 1061 /* 1062 * Initialize the time of day register, based on the time base which is, e.g. 1063 * from a filesystem. 1064 */ 1065 void 1066 inittodr(time_t base) 1067 { 1068 unsigned long sec, days; 1069 int year, month; 1070 int y, m; 1071 struct timespec ts; 1072 1073 if (base) { 1074 ts.tv_sec = base; 1075 ts.tv_nsec = 0; 1076 set_timeofday(&ts); 1077 } 1078 1079 /* Look if we have a RTC present and the time is valid */ 1080 if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) 1081 goto wrong_time; 1082 1083 /* wait for time update to complete */ 1084 /* If RTCSA_TUP is zero, we have at least 244us before next update */ 1085 crit_enter(); 1086 while (rtcin(RTC_STATUSA) & RTCSA_TUP) { 1087 crit_exit(); 1088 crit_enter(); 1089 } 1090 1091 days = 0; 1092 #ifdef USE_RTC_CENTURY 1093 year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100; 1094 #else 1095 year = readrtc(RTC_YEAR) + 1900; 1096 if (year < 1970) 1097 year += 100; 1098 #endif 1099 if (year < 1970) { 1100 crit_exit(); 1101 goto wrong_time; 1102 } 1103 month = readrtc(RTC_MONTH); 1104 for (m = 1; m < month; m++) 1105 days += daysinmonth[m-1]; 1106 if ((month > 2) && LEAPYEAR(year)) 1107 days ++; 1108 days += readrtc(RTC_DAY) - 1; 1109 for (y = 1970; y < year; y++) 1110 days += DAYSPERYEAR + LEAPYEAR(y); 1111 sec = ((( days * 24 + 1112 readrtc(RTC_HRS)) * 60 + 1113 readrtc(RTC_MIN)) * 60 + 1114 readrtc(RTC_SEC)); 1115 /* sec now contains the number of seconds, since Jan 1 1970, 1116 in the local time zone */ 1117 1118 sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); 1119 1120 y = (int)(time_second - sec); 1121 if (y <= -2 || y >= 2) { 1122 /* badly off, adjust it */ 1123 ts.tv_sec = sec; 1124 ts.tv_nsec = 0; 1125 set_timeofday(&ts); 1126 } 1127 rtc_loaded = 1; 1128 crit_exit(); 1129 return; 1130 1131 wrong_time: 1132 kprintf("Invalid time in real time clock.\n"); 1133 kprintf("Check and reset the date immediately!\n"); 1134 } 1135 1136 /* 1137 * Write system time back to RTC 1138 */ 1139 void 1140 resettodr(void) 1141 { 1142 struct timeval tv; 1143 unsigned long tm; 1144 int m; 1145 int y; 1146 1147 if (disable_rtc_set) 1148 return; 1149 1150 microtime(&tv); 1151 tm = tv.tv_sec; 1152 1153 crit_enter(); 1154 /* Disable RTC updates and interrupts. */ 1155 writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); 1156 1157 /* Calculate local time to put in RTC */ 1158 1159 tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); 1160 1161 writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ 1162 writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ 1163 writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ 1164 1165 /* We have now the days since 01-01-1970 in tm */ 1166 writertc(RTC_WDAY, (tm+4)%7); /* Write back Weekday */ 1167 for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); 1168 tm >= m; 1169 y++, m = DAYSPERYEAR + LEAPYEAR(y)) 1170 tm -= m; 1171 1172 /* Now we have the years in y and the day-of-the-year in tm */ 1173 writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */ 1174 #ifdef USE_RTC_CENTURY 1175 writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */ 1176 #endif 1177 for (m = 0; ; m++) { 1178 int ml; 1179 1180 ml = daysinmonth[m]; 1181 if (m == 1 && LEAPYEAR(y)) 1182 ml++; 1183 if (tm < ml) 1184 break; 1185 tm -= ml; 1186 } 1187 1188 writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */ 1189 writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */ 1190 1191 /* Reenable RTC updates and interrupts. */ 1192 writertc(RTC_STATUSB, rtc_statusb); 1193 crit_exit(); 1194 } 1195 1196 static int 1197 i8254_ioapic_trial(int irq, struct cputimer_intr *cti) 1198 { 1199 sysclock_t base; 1200 long lastcnt; 1201 1202 /* 1203 * Following code assumes the 8254 is the cpu timer, 1204 * so make sure it is. 1205 */ 1206 /*KKASSERT(sys_cputimer == &i8254_cputimer); (tested by CuteLarva) */ 1207 KKASSERT(cti == &i8254_cputimer_intr); 1208 1209 lastcnt = get_interrupt_counter(irq, mycpuid); 1210 1211 /* 1212 * Force an 8254 Timer0 interrupt and wait 1/100s for 1213 * it to happen, then see if we got it. 1214 */ 1215 kprintf("IOAPIC: testing 8254 interrupt delivery..."); 1216 1217 i8254_intr_reload(cti, sys_cputimer->fromus(2)); 1218 base = sys_cputimer->count(); 1219 while (sys_cputimer->count() - base < sys_cputimer->freq / 100) 1220 ; /* nothing */ 1221 1222 if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0) { 1223 kprintf(" failed\n"); 1224 return ENOENT; 1225 } else { 1226 kprintf(" success\n"); 1227 } 1228 return 0; 1229 } 1230 1231 /* 1232 * Start both clocks running. DragonFly note: the stat clock is no longer 1233 * used. Instead, 8254 based systimers are used for all major clock 1234 * interrupts. 1235 */ 1236 static void 1237 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected) 1238 { 1239 void *clkdesc = NULL; 1240 int irq = 0, mixed_mode = 0, error; 1241 1242 KKASSERT(mycpuid == 0); 1243 1244 if (!selected && i8254_intr_disable) 1245 goto nointr; 1246 1247 /* 1248 * The stat interrupt mask is different without the 1249 * statistics clock. Also, don't set the interrupt 1250 * flag which would normally cause the RTC to generate 1251 * interrupts. 1252 */ 1253 rtc_statusb = RTCSB_24HR; 1254 1255 /* Finish initializing 8254 timer 0. */ 1256 if (ioapic_enable) { 1257 irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE, 1258 INTR_POLARITY_HIGH); 1259 if (irq < 0) { 1260 mixed_mode_setup: 1261 error = ioapic_conf_legacy_extint(0); 1262 if (!error) { 1263 irq = machintr_legacy_intr_find(0, 1264 INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH); 1265 if (irq < 0) 1266 error = ENOENT; 1267 } 1268 1269 if (error) { 1270 if (!selected) { 1271 kprintf("IOAPIC: setup mixed mode for " 1272 "irq 0 failed: %d\n", error); 1273 goto nointr; 1274 } else { 1275 panic("IOAPIC: setup mixed mode for " 1276 "irq 0 failed: %d\n", error); 1277 } 1278 } 1279 mixed_mode = 1; 1280 } 1281 clkdesc = register_int(irq, clkintr, NULL, "clk", 1282 NULL, 1283 INTR_EXCL | INTR_CLOCK | 1284 INTR_NOPOLL | INTR_MPSAFE | 1285 INTR_NOENTROPY, 0); 1286 } else { 1287 register_int(0, clkintr, NULL, "clk", NULL, 1288 INTR_EXCL | INTR_CLOCK | 1289 INTR_NOPOLL | INTR_MPSAFE | 1290 INTR_NOENTROPY, 0); 1291 } 1292 1293 /* Initialize RTC. */ 1294 writertc(RTC_STATUSA, rtc_statusa); 1295 writertc(RTC_STATUSB, RTCSB_24HR); 1296 1297 if (ioapic_enable) { 1298 error = i8254_ioapic_trial(irq, cti); 1299 if (error) { 1300 if (mixed_mode) { 1301 if (!selected) { 1302 kprintf("IOAPIC: mixed mode for irq %d " 1303 "trial failed: %d\n", 1304 irq, error); 1305 goto nointr; 1306 } else { 1307 panic("IOAPIC: mixed mode for irq %d " 1308 "trial failed: %d\n", irq, error); 1309 } 1310 } else { 1311 kprintf("IOAPIC: warning 8254 is not connected " 1312 "to the correct pin, try mixed mode\n"); 1313 unregister_int(clkdesc, 0); 1314 goto mixed_mode_setup; 1315 } 1316 } 1317 } 1318 return; 1319 1320 nointr: 1321 i8254_nointr = 1; /* don't try to register again */ 1322 cputimer_intr_deregister(cti); 1323 } 1324 1325 void 1326 setstatclockrate(int newhz) 1327 { 1328 if (newhz == RTC_PROFRATE) 1329 rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF; 1330 else 1331 rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; 1332 writertc(RTC_STATUSA, rtc_statusa); 1333 } 1334 1335 #if 0 1336 static unsigned 1337 tsc_get_timecount(struct timecounter *tc) 1338 { 1339 return (rdtsc()); 1340 } 1341 #endif 1342 1343 #ifdef KERN_TIMESTAMP 1344 #define KERN_TIMESTAMP_SIZE 16384 1345 static u_long tsc[KERN_TIMESTAMP_SIZE] ; 1346 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc, 1347 sizeof(tsc), "LU", "Kernel timestamps"); 1348 void 1349 _TSTMP(u_int32_t x) 1350 { 1351 static int i; 1352 1353 tsc[i] = (u_int32_t)rdtsc(); 1354 tsc[i+1] = x; 1355 i = i + 2; 1356 if (i >= KERN_TIMESTAMP_SIZE) 1357 i = 0; 1358 tsc[i] = 0; /* mark last entry */ 1359 } 1360 #endif /* KERN_TIMESTAMP */ 1361 1362 /* 1363 * 1364 */ 1365 1366 static int 1367 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS) 1368 { 1369 sysclock_t count; 1370 uint64_t tscval; 1371 char buf[32]; 1372 1373 crit_enter(); 1374 if (sys_cputimer == &i8254_cputimer) 1375 count = sys_cputimer->count(); 1376 else 1377 count = 0; 1378 if (tsc_present) 1379 tscval = rdtsc(); 1380 else 1381 tscval = 0; 1382 crit_exit(); 1383 ksnprintf(buf, sizeof(buf), "%016lx %016lx", count, tscval); 1384 return(SYSCTL_OUT(req, buf, strlen(buf) + 1)); 1385 } 1386 1387 struct tsc_mpsync_info { 1388 volatile int tsc_ready_cnt; 1389 volatile int tsc_done_cnt; 1390 volatile int tsc_command; 1391 volatile int unused01[5]; 1392 struct { 1393 uint64_t v; 1394 uint64_t unused02; 1395 } tsc_saved[MAXCPU]; 1396 } __cachealign; 1397 1398 #if 0 1399 static void 1400 tsc_mpsync_test_loop(struct tsc_mpsync_thr *info) 1401 { 1402 struct globaldata *gd = mycpu; 1403 tsc_uclock_t test_end, test_begin; 1404 u_int i; 1405 1406 if (bootverbose) { 1407 kprintf("cpu%d: TSC testing MP synchronization ...\n", 1408 gd->gd_cpuid); 1409 } 1410 1411 test_begin = rdtsc_ordered(); 1412 /* Run test for 100ms */ 1413 test_end = test_begin + (tsc_frequency / 10); 1414 1415 arg->tsc_mpsync = 1; 1416 arg->tsc_target = test_begin; 1417 1418 #define TSC_TEST_TRYMAX 1000000 /* Make sure we could stop */ 1419 #define TSC_TEST_TRYMIN 50000 1420 1421 for (i = 0; i < TSC_TEST_TRYMAX; ++i) { 1422 struct lwkt_cpusync cs; 1423 1424 crit_enter(); 1425 lwkt_cpusync_init(&cs, gd->gd_other_cpus, 1426 tsc_mpsync_test_remote, arg); 1427 lwkt_cpusync_interlock(&cs); 1428 cpu_pause(); 1429 arg->tsc_target = rdtsc_ordered(); 1430 cpu_mfence(); 1431 lwkt_cpusync_deinterlock(&cs); 1432 crit_exit(); 1433 cpu_pause(); 1434 1435 if (!arg->tsc_mpsync) { 1436 kprintf("cpu%d: TSC is not MP synchronized @%u\n", 1437 gd->gd_cpuid, i); 1438 break; 1439 } 1440 if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN) 1441 break; 1442 } 1443 1444 #undef TSC_TEST_TRYMIN 1445 #undef TSC_TEST_TRYMAX 1446 1447 if (arg->tsc_target == test_begin) { 1448 kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid); 1449 /* XXX disable TSC? */ 1450 tsc_invariant = 0; 1451 arg->tsc_mpsync = 0; 1452 return; 1453 } 1454 1455 if (arg->tsc_mpsync && bootverbose) { 1456 kprintf("cpu%d: TSC is MP synchronized after %u tries\n", 1457 gd->gd_cpuid, i); 1458 } 1459 } 1460 1461 #endif 1462 1463 #define TSC_TEST_COUNT 50000 1464 1465 static void 1466 tsc_mpsync_ap_thread(void *xinfo) 1467 { 1468 struct tsc_mpsync_info *info = xinfo; 1469 int cpu = mycpuid; 1470 int i; 1471 1472 /* 1473 * Tell main loop that we are ready and wait for initiation 1474 */ 1475 atomic_add_int(&info->tsc_ready_cnt, 1); 1476 while (info->tsc_command == 0) { 1477 lwkt_force_switch(); 1478 } 1479 1480 /* 1481 * Run test for 10000 loops or until tsc_done_cnt != 0 (another 1482 * cpu has finished its test), then increment done. 1483 */ 1484 crit_enter(); 1485 for (i = 0; i < TSC_TEST_COUNT && info->tsc_done_cnt == 0; ++i) { 1486 info->tsc_saved[cpu].v = rdtsc_ordered(); 1487 } 1488 crit_exit(); 1489 atomic_add_int(&info->tsc_done_cnt, 1); 1490 1491 lwkt_exit(); 1492 } 1493 1494 static void 1495 tsc_mpsync_test(void) 1496 { 1497 int cpu; 1498 int try; 1499 1500 if (!tsc_invariant) { 1501 /* Not even invariant TSC */ 1502 return; 1503 } 1504 1505 if (ncpus == 1) { 1506 /* Only one CPU */ 1507 tsc_mpsync = 1; 1508 return; 1509 } 1510 1511 /* 1512 * Forcing can be used w/qemu to reduce contention 1513 */ 1514 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync); 1515 1516 if (tsc_mpsync == 0) { 1517 switch (cpu_vendor_id) { 1518 case CPU_VENDOR_INTEL: 1519 /* 1520 * Intel probably works 1521 */ 1522 break; 1523 1524 case CPU_VENDOR_AMD: 1525 /* 1526 * For AMD 15h and 16h (i.e. The Bulldozer and Jaguar 1527 * architectures) we have to watch out for 1528 * Erratum 778: 1529 * "Processor Core Time Stamp Counters May 1530 * Experience Drift" 1531 * This Erratum is only listed for cpus in Family 1532 * 15h < Model 30h and for 16h < Model 30h. 1533 * 1534 * AMD < Bulldozer probably doesn't work 1535 */ 1536 if (CPUID_TO_FAMILY(cpu_id) == 0x15 || 1537 CPUID_TO_FAMILY(cpu_id) == 0x16) { 1538 if (CPUID_TO_MODEL(cpu_id) < 0x30) 1539 return; 1540 } else if (CPUID_TO_FAMILY(cpu_id) < 0x17) { 1541 return; 1542 } 1543 break; 1544 1545 default: 1546 /* probably won't work */ 1547 return; 1548 } 1549 } else if (tsc_mpsync < 0) { 1550 kprintf("TSC MP synchronization test is disabled\n"); 1551 tsc_mpsync = 0; 1552 return; 1553 } 1554 1555 /* 1556 * Test even if forced to 1 above. If forced, we will use the TSC 1557 * even if the test fails. (set forced to -1 to disable entirely). 1558 */ 1559 kprintf("TSC testing MP synchronization ...\n"); 1560 1561 /* 1562 * Test TSC MP synchronization on APs. Try up to 4 times. 1563 */ 1564 for (try = 0; try < 4; ++try) { 1565 struct tsc_mpsync_info info; 1566 uint64_t last; 1567 int64_t xdelta; 1568 int64_t delta; 1569 1570 bzero(&info, sizeof(info)); 1571 1572 for (cpu = 0; cpu < ncpus; ++cpu) { 1573 thread_t td; 1574 lwkt_create(tsc_mpsync_ap_thread, &info, &td, 1575 NULL, TDF_NOSTART, cpu, 1576 "tsc mpsync %d", cpu); 1577 lwkt_setpri_initial(td, curthread->td_pri); 1578 lwkt_schedule(td); 1579 } 1580 while (info.tsc_ready_cnt != ncpus) 1581 lwkt_force_switch(); 1582 1583 /* 1584 * All threads are ready, start the test and wait for 1585 * completion. 1586 */ 1587 info.tsc_command = 1; 1588 while (info.tsc_done_cnt != ncpus) 1589 lwkt_force_switch(); 1590 1591 /* 1592 * Process results 1593 */ 1594 last = info.tsc_saved[0].v; 1595 delta = 0; 1596 for (cpu = 0; cpu < ncpus; ++cpu) { 1597 xdelta = (int64_t)(info.tsc_saved[cpu].v - last); 1598 last = info.tsc_saved[cpu].v; 1599 if (xdelta < 0) 1600 xdelta = -xdelta; 1601 delta += xdelta; 1602 1603 } 1604 1605 /* 1606 * Result from attempt. If its too wild just stop now. 1607 * Also break out if we succeed, no need to try further. 1608 */ 1609 kprintf("TSC MPSYNC TEST %jd %d -> %jd (10uS=%jd)\n", 1610 delta, ncpus, delta / ncpus, 1611 tsc_frequency / 100000); 1612 if (delta / ncpus > tsc_frequency / 100) 1613 break; 1614 if (delta / ncpus < tsc_frequency / 100000) { 1615 tsc_mpsync = 1; 1616 break; 1617 } 1618 } 1619 1620 if (tsc_mpsync) 1621 kprintf("TSC is MP synchronized\n"); 1622 else 1623 kprintf("TSC is not MP synchronized\n"); 1624 } 1625 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL); 1626 1627 SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254"); 1628 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0, 1629 "frequency"); 1630 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD, 1631 0, 0, hw_i8254_timestamp, "A", ""); 1632 1633 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD, 1634 &tsc_present, 0, "TSC Available"); 1635 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD, 1636 &tsc_invariant, 0, "Invariant TSC"); 1637 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD, 1638 &tsc_mpsync, 0, "TSC is synchronized across CPUs"); 1639 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD, 1640 &tsc_frequency, 0, "TSC Frequency"); 1641