1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * Copyright (c) 2008 The DragonFly Project. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * William Jolitz and Don Ahn. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 34 * $FreeBSD: src/sys/i386/isa/clock.c,v 1.149.2.6 2002/11/02 04:41:50 iwasaki Exp $ 35 */ 36 37 /* 38 * Routines to handle clock hardware. 39 */ 40 41 /* 42 * inittodr, settodr and support routines written 43 * by Christoph Robitschko <chmr@edvz.tu-graz.ac.at> 44 * 45 * reintroduced and updated by Chris Stenton <chris@gnome.co.uk> 8/10/94 46 */ 47 48 #if 0 49 #include "opt_clock.h" 50 #endif 51 52 #include <sys/param.h> 53 #include <sys/systm.h> 54 #include <sys/eventhandler.h> 55 #include <sys/time.h> 56 #include <sys/kernel.h> 57 #include <sys/bus.h> 58 #include <sys/sysctl.h> 59 #include <sys/cons.h> 60 #include <sys/kbio.h> 61 #include <sys/systimer.h> 62 #include <sys/globaldata.h> 63 #include <sys/machintr.h> 64 #include <sys/interrupt.h> 65 66 #include <sys/thread2.h> 67 68 #include <machine/clock.h> 69 #include <machine/cputypes.h> 70 #include <machine/frame.h> 71 #include <machine/ipl.h> 72 #include <machine/limits.h> 73 #include <machine/md_var.h> 74 #include <machine/psl.h> 75 #include <machine/segments.h> 76 #include <machine/smp.h> 77 #include <machine/specialreg.h> 78 #include <machine/intr_machdep.h> 79 80 #include <machine_base/apic/ioapic.h> 81 #include <machine_base/apic/ioapic_abi.h> 82 #include <machine_base/icu/icu.h> 83 #include <bus/isa/isa.h> 84 #include <bus/isa/rtc.h> 85 #include <machine_base/isa/timerreg.h> 86 87 SET_DECLARE(timecounter_init_set, const timecounter_init_t); 88 TIMECOUNTER_INIT(placeholder, NULL); 89 90 static void i8254_restore(void); 91 static void resettodr_on_shutdown(void *arg __unused); 92 93 /* 94 * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we 95 * can use a simple formula for leap years. 96 */ 97 #define LEAPYEAR(y) ((u_int)(y) % 4 == 0) 98 #define DAYSPERYEAR (31+28+31+30+31+30+31+31+30+31+30+31) 99 100 #ifndef TIMER_FREQ 101 #define TIMER_FREQ 1193182 102 #endif 103 104 static uint8_t i8254_walltimer_sel; 105 static uint16_t i8254_walltimer_cntr; 106 107 int adjkerntz; /* local offset from GMT in seconds */ 108 int disable_rtc_set; /* disable resettodr() if != 0 */ 109 int tsc_present; 110 int tsc_invariant; 111 int tsc_mpsync; 112 int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ 113 int timer0_running; 114 tsc_uclock_t tsc_frequency; 115 tsc_uclock_t tsc_oneus_approx; /* always at least 1, approx only */ 116 117 enum tstate { RELEASED, ACQUIRED }; 118 enum tstate timer0_state; 119 enum tstate timer1_state; 120 enum tstate timer2_state; 121 122 int i8254_cputimer_disable; /* No need to initialize i8254 cputimer. */ 123 124 static int beeping = 0; 125 static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; 126 static u_char rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; 127 static u_char rtc_statusb = RTCSB_24HR | RTCSB_PINTR; 128 static int rtc_loaded; 129 130 static int i8254_cputimer_div; 131 132 static int i8254_nointr; 133 static int i8254_intr_disable = 1; 134 TUNABLE_INT("hw.i8254.intr_disable", &i8254_intr_disable); 135 136 static int calibrate_timers_with_rtc = 0; 137 TUNABLE_INT("hw.calibrate_timers_with_rtc", &calibrate_timers_with_rtc); 138 139 static int calibrate_tsc_fast = 1; 140 TUNABLE_INT("hw.calibrate_tsc_fast", &calibrate_tsc_fast); 141 142 static int calibrate_test; 143 TUNABLE_INT("hw.tsc_calibrate_test", &calibrate_test); 144 145 static struct callout sysbeepstop_ch; 146 147 static sysclock_t i8254_cputimer_count(void); 148 static void i8254_cputimer_construct(struct cputimer *cputimer, sysclock_t last); 149 static void i8254_cputimer_destruct(struct cputimer *cputimer); 150 151 static struct cputimer i8254_cputimer = { 152 .next = SLIST_ENTRY_INITIALIZER, 153 .name = "i8254", 154 .pri = CPUTIMER_PRI_8254, 155 .type = 0, /* determined later */ 156 .count = i8254_cputimer_count, 157 .fromhz = cputimer_default_fromhz, 158 .fromus = cputimer_default_fromus, 159 .construct = i8254_cputimer_construct, 160 .destruct = i8254_cputimer_destruct, 161 .freq = TIMER_FREQ 162 }; 163 164 static void i8254_intr_reload(struct cputimer_intr *, sysclock_t); 165 static void i8254_intr_config(struct cputimer_intr *, const struct cputimer *); 166 static void i8254_intr_initclock(struct cputimer_intr *, boolean_t); 167 168 static struct cputimer_intr i8254_cputimer_intr = { 169 .freq = TIMER_FREQ, 170 .reload = i8254_intr_reload, 171 .enable = cputimer_intr_default_enable, 172 .config = i8254_intr_config, 173 .restart = cputimer_intr_default_restart, 174 .pmfixup = cputimer_intr_default_pmfixup, 175 .initclock = i8254_intr_initclock, 176 .pcpuhand = NULL, 177 .next = SLIST_ENTRY_INITIALIZER, 178 .name = "i8254", 179 .type = CPUTIMER_INTR_8254, 180 .prio = CPUTIMER_INTR_PRIO_8254, 181 .caps = CPUTIMER_INTR_CAP_PS, 182 .priv = NULL 183 }; 184 185 /* 186 * Use this to lwkt_switch() when the scheduler clock is not 187 * yet running, otherwise lwkt_switch() won't do anything. 188 * XXX needs cleaning up in lwkt_thread.c 189 */ 190 static void 191 lwkt_force_switch(void) 192 { 193 crit_enter(); 194 lwkt_schedulerclock(curthread); 195 crit_exit(); 196 lwkt_switch(); 197 } 198 199 /* 200 * timer0 clock interrupt. Timer0 is in one-shot mode and has stopped 201 * counting as of this interrupt. We use timer1 in free-running mode (not 202 * generating any interrupts) as our main counter. Each cpu has timeouts 203 * pending. 204 * 205 * This code is INTR_MPSAFE and may be called without the BGL held. 206 */ 207 static void 208 clkintr(void *dummy, void *frame_arg) 209 { 210 static sysclock_t sysclock_count; /* NOTE! Must be static */ 211 struct globaldata *gd = mycpu; 212 struct globaldata *gscan; 213 int n; 214 215 /* 216 * SWSTROBE mode is a one-shot, the timer is no longer running 217 */ 218 timer0_running = 0; 219 220 /* 221 * XXX the dispatcher needs work. right now we call systimer_intr() 222 * directly or via IPI for any cpu with systimers queued, which is 223 * usually *ALL* of them. We need to use the LAPIC timer for this. 224 */ 225 sysclock_count = sys_cputimer->count(); 226 for (n = 0; n < ncpus; ++n) { 227 gscan = globaldata_find(n); 228 if (TAILQ_FIRST(&gscan->gd_systimerq) == NULL) 229 continue; 230 if (gscan != gd) { 231 lwkt_send_ipiq3(gscan, (ipifunc3_t)systimer_intr, 232 &sysclock_count, 1); 233 } else { 234 systimer_intr(&sysclock_count, 0, frame_arg); 235 } 236 } 237 } 238 239 240 /* 241 * NOTE! not MP safe. 242 */ 243 int 244 acquire_timer2(int mode) 245 { 246 if (timer2_state != RELEASED) 247 return (-1); 248 timer2_state = ACQUIRED; 249 250 /* 251 * This access to the timer registers is as atomic as possible 252 * because it is a single instruction. We could do better if we 253 * knew the rate. 254 */ 255 outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f)); 256 return (0); 257 } 258 259 int 260 release_timer2(void) 261 { 262 if (timer2_state != ACQUIRED) 263 return (-1); 264 outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT); 265 timer2_state = RELEASED; 266 return (0); 267 } 268 269 #include "opt_ddb.h" 270 #ifdef DDB 271 #include <ddb/ddb.h> 272 273 DB_SHOW_COMMAND(rtc, rtc) 274 { 275 kprintf("%02x/%02x/%02x %02x:%02x:%02x, A = %02x, B = %02x, C = %02x\n", 276 rtcin(RTC_YEAR), rtcin(RTC_MONTH), rtcin(RTC_DAY), 277 rtcin(RTC_HRS), rtcin(RTC_MIN), rtcin(RTC_SEC), 278 rtcin(RTC_STATUSA), rtcin(RTC_STATUSB), rtcin(RTC_INTR)); 279 } 280 #endif /* DDB */ 281 282 /* 283 * Return the current cpu timer count as a 32 bit integer. 284 */ 285 static 286 sysclock_t 287 i8254_cputimer_count(void) 288 { 289 static uint16_t cputimer_last; 290 uint16_t count; 291 sysclock_t ret; 292 293 clock_lock(); 294 outb(TIMER_MODE, i8254_walltimer_sel | TIMER_LATCH); 295 count = (uint8_t)inb(i8254_walltimer_cntr); /* get countdown */ 296 count |= ((uint8_t)inb(i8254_walltimer_cntr) << 8); 297 count = -count; /* -> countup */ 298 if (count < cputimer_last) /* rollover */ 299 i8254_cputimer.base += 0x00010000; 300 ret = i8254_cputimer.base | count; 301 cputimer_last = count; 302 clock_unlock(); 303 return(ret); 304 } 305 306 /* 307 * This function is called whenever the system timebase changes, allowing 308 * us to calculate what is needed to convert a system timebase tick 309 * into an 8254 tick for the interrupt timer. If we can convert to a 310 * simple shift, multiplication, or division, we do so. Otherwise 64 311 * bit arithmatic is required every time the interrupt timer is reloaded. 312 */ 313 static void 314 i8254_intr_config(struct cputimer_intr *cti, const struct cputimer *timer) 315 { 316 int freq; 317 int div; 318 319 /* 320 * Will a simple divide do the trick? 321 */ 322 div = (timer->freq + (cti->freq / 2)) / cti->freq; 323 freq = cti->freq * div; 324 325 if (freq >= timer->freq - 1 && freq <= timer->freq + 1) 326 i8254_cputimer_div = div; 327 else 328 i8254_cputimer_div = 0; 329 } 330 331 /* 332 * Reload for the next timeout. It is possible for the reload value 333 * to be 0 or negative, indicating that an immediate timer interrupt 334 * is desired. For now make the minimum 2 ticks. 335 * 336 * We may have to convert from the system timebase to the 8254 timebase. 337 */ 338 static void 339 i8254_intr_reload(struct cputimer_intr *cti, sysclock_t reload) 340 { 341 uint16_t count; 342 343 if (i8254_cputimer_div) 344 reload /= i8254_cputimer_div; 345 else 346 reload = (int64_t)reload * cti->freq / sys_cputimer->freq; 347 348 if ((int)reload < 2) 349 reload = 2; 350 351 clock_lock(); 352 if (timer0_running) { 353 outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); /* count-down timer */ 354 count = (uint8_t)inb(TIMER_CNTR0); /* lsb */ 355 count |= ((uint8_t)inb(TIMER_CNTR0) << 8); /* msb */ 356 if (reload < count) { 357 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 358 outb(TIMER_CNTR0, (uint8_t)reload); /* lsb */ 359 outb(TIMER_CNTR0, (uint8_t)(reload >> 8)); /* msb */ 360 } 361 } else { 362 timer0_running = 1; 363 if (reload > 0xFFFF) 364 reload = 0; /* full count */ 365 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 366 outb(TIMER_CNTR0, (uint8_t)reload); /* lsb */ 367 outb(TIMER_CNTR0, (uint8_t)(reload >> 8)); /* msb */ 368 } 369 clock_unlock(); 370 } 371 372 /* 373 * DELAY(usec) - Spin for the specified number of microseconds. 374 * DRIVERSLEEP(usec) - Spin for the specified number of microseconds, 375 * but do a thread switch in the loop 376 * 377 * Relies on timer 1 counting down from (cputimer_freq / hz) 378 * Note: timer had better have been programmed before this is first used! 379 */ 380 static void 381 DODELAY(int n, int doswitch) 382 { 383 ssysclock_t delta, ticks_left; 384 sysclock_t prev_tick, tick; 385 386 #ifdef DELAYDEBUG 387 int getit_calls = 1; 388 int n1; 389 static int state = 0; 390 391 if (state == 0) { 392 state = 1; 393 for (n1 = 1; n1 <= 10000000; n1 *= 10) 394 DELAY(n1); 395 state = 2; 396 } 397 if (state == 1) 398 kprintf("DELAY(%d)...", n); 399 #endif 400 /* 401 * Guard against the timer being uninitialized if we are called 402 * early for console i/o. 403 */ 404 if (timer0_state == RELEASED && i8254_cputimer_disable == 0) 405 i8254_restore(); 406 407 /* 408 * Read the counter first, so that the rest of the setup overhead is 409 * counted. Then calculate the number of hardware timer ticks 410 * required, rounding up to be sure we delay at least the requested 411 * number of microseconds. 412 */ 413 prev_tick = sys_cputimer->count(); 414 ticks_left = ((u_int)n * (int64_t)sys_cputimer->freq + 999999) / 415 1000000; 416 417 /* 418 * Loop until done. 419 */ 420 while (ticks_left > 0) { 421 tick = sys_cputimer->count(); 422 #ifdef DELAYDEBUG 423 ++getit_calls; 424 #endif 425 delta = tick - prev_tick; 426 prev_tick = tick; 427 if (delta < 0) 428 delta = 0; 429 ticks_left -= delta; 430 if (doswitch && ticks_left > 0) 431 lwkt_switch(); 432 cpu_pause(); 433 } 434 #ifdef DELAYDEBUG 435 if (state == 1) 436 kprintf(" %d calls to getit() at %d usec each\n", 437 getit_calls, (n + 5) / getit_calls); 438 #endif 439 } 440 441 /* 442 * DELAY() never switches. 443 */ 444 void 445 DELAY(int n) 446 { 447 DODELAY(n, 0); 448 } 449 450 /* 451 * Returns non-zero if the specified time period has elapsed. Call 452 * first with last_clock set to 0. 453 */ 454 int 455 CHECKTIMEOUT(TOTALDELAY *tdd) 456 { 457 sysclock_t delta; 458 int us; 459 460 if (tdd->started == 0) { 461 if (timer0_state == RELEASED && i8254_cputimer_disable == 0) 462 i8254_restore(); 463 tdd->last_clock = sys_cputimer->count(); 464 tdd->started = 1; 465 return(0); 466 } 467 delta = sys_cputimer->count() - tdd->last_clock; 468 us = (u_int64_t)delta * (u_int64_t)1000000 / 469 (u_int64_t)sys_cputimer->freq; 470 tdd->last_clock += (u_int64_t)us * (u_int64_t)sys_cputimer->freq / 471 1000000; 472 tdd->us -= us; 473 return (tdd->us < 0); 474 } 475 476 477 /* 478 * DRIVERSLEEP() does not switch if called with a spinlock held or 479 * from a hard interrupt. 480 */ 481 void 482 DRIVERSLEEP(int usec) 483 { 484 globaldata_t gd = mycpu; 485 486 if (gd->gd_intr_nesting_level || gd->gd_spinlocks) { 487 DODELAY(usec, 0); 488 } else { 489 DODELAY(usec, 1); 490 } 491 } 492 493 static void 494 sysbeepstop(void *chan) 495 { 496 outb(IO_PPI, inb(IO_PPI)&0xFC); /* disable counter2 output to speaker */ 497 beeping = 0; 498 release_timer2(); 499 } 500 501 int 502 sysbeep(int pitch, int period) 503 { 504 if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) 505 return(-1); 506 if (sysbeep_enable == 0) 507 return(-1); 508 /* 509 * Nobody else is using timer2, we do not need the clock lock 510 */ 511 outb(TIMER_CNTR2, pitch); 512 outb(TIMER_CNTR2, (pitch>>8)); 513 if (!beeping) { 514 /* enable counter2 output to speaker */ 515 outb(IO_PPI, inb(IO_PPI) | 3); 516 beeping = period; 517 callout_reset(&sysbeepstop_ch, period, sysbeepstop, NULL); 518 } 519 return (0); 520 } 521 522 /* 523 * RTC support routines 524 */ 525 526 int 527 rtcin(int reg) 528 { 529 u_char val; 530 531 crit_enter(); 532 outb(IO_RTC, reg); 533 inb(0x84); 534 val = inb(IO_RTC + 1); 535 inb(0x84); 536 crit_exit(); 537 return (val); 538 } 539 540 static __inline void 541 writertc(u_char reg, u_char val) 542 { 543 crit_enter(); 544 inb(0x84); 545 outb(IO_RTC, reg); 546 inb(0x84); 547 outb(IO_RTC + 1, val); 548 inb(0x84); /* XXX work around wrong order in rtcin() */ 549 crit_exit(); 550 } 551 552 static __inline int 553 readrtc(int port) 554 { 555 return(bcd2bin(rtcin(port))); 556 } 557 558 static u_int 559 calibrate_clocks(void) 560 { 561 tsc_uclock_t old_tsc; 562 u_int tot_count; 563 sysclock_t count, prev_count; 564 int sec, start_sec, timeout; 565 566 if (bootverbose) 567 kprintf("Calibrating clock(s) ...\n"); 568 if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) 569 goto fail; 570 timeout = 100000000; 571 572 /* Read the mc146818A seconds counter. */ 573 for (;;) { 574 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { 575 sec = rtcin(RTC_SEC); 576 break; 577 } 578 if (--timeout == 0) 579 goto fail; 580 } 581 582 /* Wait for the mC146818A seconds counter to change. */ 583 start_sec = sec; 584 for (;;) { 585 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) { 586 sec = rtcin(RTC_SEC); 587 if (sec != start_sec) 588 break; 589 } 590 if (--timeout == 0) 591 goto fail; 592 } 593 594 /* Start keeping track of the i8254 counter. */ 595 prev_count = sys_cputimer->count(); 596 tot_count = 0; 597 598 if (tsc_present) 599 old_tsc = rdtsc(); 600 else 601 old_tsc = 0; /* shut up gcc */ 602 603 /* 604 * Wait for the mc146818A seconds counter to change. Read the i8254 605 * counter for each iteration since this is convenient and only 606 * costs a few usec of inaccuracy. The timing of the final reads 607 * of the counters almost matches the timing of the initial reads, 608 * so the main cause of inaccuracy is the varying latency from 609 * inside getit() or rtcin(RTC_STATUSA) to the beginning of the 610 * rtcin(RTC_SEC) that returns a changed seconds count. The 611 * maximum inaccuracy from this cause is < 10 usec on 486's. 612 */ 613 start_sec = sec; 614 for (;;) { 615 if (!(rtcin(RTC_STATUSA) & RTCSA_TUP)) 616 sec = rtcin(RTC_SEC); 617 count = sys_cputimer->count(); 618 tot_count += (int)(count - prev_count); 619 prev_count = count; 620 if (sec != start_sec) 621 break; 622 if (--timeout == 0) 623 goto fail; 624 } 625 626 /* 627 * Read the cpu cycle counter. The timing considerations are 628 * similar to those for the i8254 clock. 629 */ 630 if (tsc_present) { 631 tsc_frequency = rdtsc() - old_tsc; 632 if (bootverbose) { 633 kprintf("TSC clock: %jd Hz (Method A)\n", 634 (intmax_t)tsc_frequency); 635 } 636 } 637 tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000; 638 639 kprintf("i8254 clock: %u Hz\n", tot_count); 640 return (tot_count); 641 642 fail: 643 kprintf("failed, using default i8254 clock of %u Hz\n", 644 i8254_cputimer.freq); 645 return (i8254_cputimer.freq); 646 } 647 648 static void 649 i8254_restore(void) 650 { 651 timer0_state = ACQUIRED; 652 653 clock_lock(); 654 655 /* 656 * Timer0 is our fine-grained variable clock interrupt 657 */ 658 outb(TIMER_MODE, TIMER_SEL0 | TIMER_SWSTROBE | TIMER_16BIT); 659 outb(TIMER_CNTR0, 2); /* lsb */ 660 outb(TIMER_CNTR0, 0); /* msb */ 661 clock_unlock(); 662 663 if (!i8254_nointr) { 664 cputimer_intr_register(&i8254_cputimer_intr); 665 cputimer_intr_select(&i8254_cputimer_intr, 0); 666 } 667 668 /* 669 * Timer1 or timer2 is our free-running clock, but only if another 670 * has not been selected. 671 */ 672 cputimer_register(&i8254_cputimer); 673 cputimer_select(&i8254_cputimer, 0); 674 } 675 676 static void 677 i8254_cputimer_construct(struct cputimer *timer, sysclock_t oldclock) 678 { 679 int which; 680 681 /* 682 * Should we use timer 1 or timer 2 ? 683 */ 684 which = 0; 685 TUNABLE_INT_FETCH("hw.i8254.walltimer", &which); 686 if (which != 1 && which != 2) 687 which = 2; 688 689 switch(which) { 690 case 1: 691 timer->name = "i8254_timer1"; 692 timer->type = CPUTIMER_8254_SEL1; 693 i8254_walltimer_sel = TIMER_SEL1; 694 i8254_walltimer_cntr = TIMER_CNTR1; 695 timer1_state = ACQUIRED; 696 break; 697 case 2: 698 timer->name = "i8254_timer2"; 699 timer->type = CPUTIMER_8254_SEL2; 700 i8254_walltimer_sel = TIMER_SEL2; 701 i8254_walltimer_cntr = TIMER_CNTR2; 702 timer2_state = ACQUIRED; 703 break; 704 } 705 706 timer->base = (oldclock + 0xFFFF) & ~0xFFFF; 707 708 clock_lock(); 709 outb(TIMER_MODE, i8254_walltimer_sel | TIMER_RATEGEN | TIMER_16BIT); 710 outb(i8254_walltimer_cntr, 0); /* lsb */ 711 outb(i8254_walltimer_cntr, 0); /* msb */ 712 outb(IO_PPI, inb(IO_PPI) | 1); /* bit 0: enable gate, bit 1: spkr */ 713 clock_unlock(); 714 } 715 716 static void 717 i8254_cputimer_destruct(struct cputimer *timer) 718 { 719 switch(timer->type) { 720 case CPUTIMER_8254_SEL1: 721 timer1_state = RELEASED; 722 break; 723 case CPUTIMER_8254_SEL2: 724 timer2_state = RELEASED; 725 break; 726 default: 727 break; 728 } 729 timer->type = 0; 730 } 731 732 static void 733 rtc_restore(void) 734 { 735 /* Restore all of the RTC's "status" (actually, control) registers. */ 736 writertc(RTC_STATUSB, RTCSB_24HR); 737 writertc(RTC_STATUSA, rtc_statusa); 738 writertc(RTC_STATUSB, rtc_statusb); 739 } 740 741 /* 742 * Restore all the timers. 743 * 744 * This function is called to resynchronize our core timekeeping after a 745 * long halt, e.g. from apm_default_resume() and friends. It is also 746 * called if after a BIOS call we have detected munging of the 8254. 747 * It is necessary because cputimer_count() counter's delta may have grown 748 * too large for nanouptime() and friends to handle, or (in the case of 8254 749 * munging) might cause the SYSTIMER code to prematurely trigger. 750 */ 751 void 752 timer_restore(void) 753 { 754 crit_enter(); 755 if (i8254_cputimer_disable == 0) 756 i8254_restore(); /* restore timer_freq and hz */ 757 rtc_restore(); /* reenable RTC interrupts */ 758 crit_exit(); 759 } 760 761 #define MAX_MEASURE_RETRIES 100 762 763 static u_int64_t 764 do_measure(u_int64_t timer_latency, u_int64_t *latency, sysclock_t *time, 765 int *retries) 766 { 767 u_int64_t tsc1, tsc2; 768 u_int64_t threshold; 769 sysclock_t val; 770 int cnt = 0; 771 772 do { 773 if (cnt > MAX_MEASURE_RETRIES/2) 774 threshold = timer_latency << 1; 775 else 776 threshold = timer_latency + (timer_latency >> 2); 777 778 cnt++; 779 tsc1 = rdtsc_ordered(); 780 val = sys_cputimer->count(); 781 tsc2 = rdtsc_ordered(); 782 } while (timer_latency > 0 && cnt < MAX_MEASURE_RETRIES && 783 tsc2 - tsc1 > threshold); 784 785 *retries = cnt - 1; 786 *latency = tsc2 - tsc1; 787 *time = val; 788 return tsc1; 789 } 790 791 static u_int64_t 792 do_calibrate_cputimer(u_int usecs, u_int64_t timer_latency) 793 { 794 if (calibrate_tsc_fast) { 795 u_int64_t old_tsc1, start_lat1, new_tsc1, end_lat1; 796 u_int64_t old_tsc2, start_lat2, new_tsc2, end_lat2; 797 u_int64_t freq1, freq2; 798 sysclock_t start1, end1, start2, end2; 799 int retries1, retries2, retries3, retries4; 800 801 DELAY(1000); 802 old_tsc1 = do_measure(timer_latency, &start_lat1, &start1, 803 &retries1); 804 DELAY(20000); 805 old_tsc2 = do_measure(timer_latency, &start_lat2, &start2, 806 &retries2); 807 DELAY(usecs); 808 new_tsc1 = do_measure(timer_latency, &end_lat1, &end1, 809 &retries3); 810 DELAY(20000); 811 new_tsc2 = do_measure(timer_latency, &end_lat2, &end2, 812 &retries4); 813 814 old_tsc1 += start_lat1; 815 old_tsc2 += start_lat2; 816 freq1 = (new_tsc1 - old_tsc1) + (start_lat1 + end_lat1) / 2; 817 freq2 = (new_tsc2 - old_tsc2) + (start_lat2 + end_lat2) / 2; 818 end1 -= start1; 819 end2 -= start2; 820 /* This should in practice be safe from overflows. */ 821 freq1 = (freq1 * sys_cputimer->freq) / end1; 822 freq2 = (freq2 * sys_cputimer->freq) / end2; 823 if (calibrate_test && (retries1 > 0 || retries2 > 0)) { 824 kprintf("%s: retries: %d, %d, %d, %d\n", 825 __func__, retries1, retries2, retries3, retries4); 826 } 827 if (calibrate_test) { 828 kprintf("%s: freq1=%ju freq2=%ju avg=%ju\n", 829 __func__, freq1, freq2, (freq1 + freq2) / 2); 830 } 831 return (freq1 + freq2) / 2; 832 } else { 833 u_int64_t old_tsc, new_tsc; 834 u_int64_t freq; 835 836 old_tsc = rdtsc_ordered(); 837 DELAY(usecs); 838 new_tsc = rdtsc(); 839 freq = new_tsc - old_tsc; 840 /* This should in practice be safe from overflows. */ 841 freq = (freq * 1000 * 1000) / usecs; 842 return freq; 843 } 844 } 845 846 /* 847 * Initialize 8254 timer 0 early so that it can be used in DELAY(). 848 */ 849 void 850 startrtclock(void) 851 { 852 const timecounter_init_t **list; 853 u_int delta, freq; 854 855 callout_init_mp(&sysbeepstop_ch); 856 857 /* 858 * Can we use the TSC? 859 * 860 * NOTE: If running under qemu, probably a good idea to force the 861 * TSC because we are not likely to detect it as being 862 * invariant or mpsyncd if you don't. This will greatly 863 * reduce SMP contention. 864 */ 865 if (cpu_feature & CPUID_TSC) { 866 tsc_present = 1; 867 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_invariant); 868 869 if ((cpu_vendor_id == CPU_VENDOR_INTEL || 870 cpu_vendor_id == CPU_VENDOR_AMD) && 871 cpu_exthigh >= 0x80000007) { 872 u_int regs[4]; 873 874 do_cpuid(0x80000007, regs); 875 if (regs[3] & 0x100) 876 tsc_invariant = 1; 877 } 878 } else { 879 tsc_present = 0; 880 } 881 882 /* 883 * Initial RTC state, don't do anything unexpected 884 */ 885 writertc(RTC_STATUSA, rtc_statusa); 886 writertc(RTC_STATUSB, RTCSB_24HR); 887 888 SET_FOREACH(list, timecounter_init_set) { 889 if ((*list)->configure != NULL) 890 (*list)->configure(); 891 } 892 893 /* 894 * If tsc_frequency is already initialized now, and a flag is set 895 * that i8254 timer is unneeded, we are done. 896 */ 897 if (tsc_frequency != 0 && i8254_cputimer_disable != 0) 898 goto done; 899 900 /* 901 * Set the 8254 timer0 in TIMER_SWSTROBE mode and cause it to 902 * generate an interrupt, which we will ignore for now. 903 * 904 * Set the 8254 timer1 in TIMER_RATEGEN mode and load 0x0000 905 * (so it counts a full 2^16 and repeats). We will use this timer 906 * for our counting. 907 */ 908 if (i8254_cputimer_disable == 0) 909 i8254_restore(); 910 911 kprintf("Using cputimer %s for TSC calibration\n", sys_cputimer->name); 912 913 /* 914 * When booting without verbose messages, it's pointless to run the 915 * calibrate_clocks() calibration code, when we don't use the 916 * results in any way. With bootverbose, we are at least printing 917 * this information to the kernel log. 918 */ 919 if (i8254_cputimer_disable != 0 || 920 (calibrate_timers_with_rtc == 0 && !bootverbose)) { 921 goto skip_rtc_based; 922 } 923 924 freq = calibrate_clocks(); 925 #ifdef CLK_CALIBRATION_LOOP 926 if (bootverbose) { 927 int c; 928 929 cnpoll(TRUE); 930 kprintf("Press a key on the console to " 931 "abort clock calibration\n"); 932 while ((c = cncheckc()) == -1 || c == NOKEY) 933 calibrate_clocks(); 934 cnpoll(FALSE); 935 } 936 #endif 937 938 /* 939 * Use the calibrated i8254 frequency if it seems reasonable. 940 * Otherwise use the default, and don't use the calibrated i586 941 * frequency. 942 */ 943 delta = freq > i8254_cputimer.freq ? 944 freq - i8254_cputimer.freq : i8254_cputimer.freq - freq; 945 if (delta < i8254_cputimer.freq / 100) { 946 if (calibrate_timers_with_rtc == 0) { 947 kprintf( 948 "hw.calibrate_timers_with_rtc not set - using default i8254 frequency\n"); 949 freq = i8254_cputimer.freq; 950 } 951 /* 952 * NOTE: 953 * Interrupt timer's freq must be adjusted 954 * before we change the cuptimer's frequency. 955 */ 956 i8254_cputimer_intr.freq = freq; 957 cputimer_set_frequency(&i8254_cputimer, freq); 958 } else { 959 if (bootverbose) 960 kprintf("%d Hz differs from default of %d Hz " 961 "by more than 1%%\n", 962 freq, i8254_cputimer.freq); 963 tsc_frequency = 0; 964 } 965 966 if (tsc_frequency != 0 && calibrate_timers_with_rtc == 0) { 967 kprintf("hw.calibrate_timers_with_rtc not " 968 "set - using old calibration method\n"); 969 tsc_frequency = 0; 970 } 971 972 skip_rtc_based: 973 if (tsc_present && tsc_frequency == 0) { 974 u_int cnt; 975 u_int64_t cputime_latency_tsc = 0, max = 0, min = 0; 976 int i; 977 978 for (i = 0; i < 10; i++) { 979 /* Warm up */ 980 (void)sys_cputimer->count(); 981 } 982 for (i = 0; i < 100; i++) { 983 u_int64_t old_tsc, new_tsc; 984 985 old_tsc = rdtsc_ordered(); 986 (void)sys_cputimer->count(); 987 new_tsc = rdtsc_ordered(); 988 cputime_latency_tsc += (new_tsc - old_tsc); 989 if (max < (new_tsc - old_tsc)) 990 max = new_tsc - old_tsc; 991 if (min == 0 || min > (new_tsc - old_tsc)) 992 min = new_tsc - old_tsc; 993 } 994 cputime_latency_tsc /= 100; 995 kprintf( 996 "Timer latency (in TSC ticks): %lu min=%lu max=%lu\n", 997 cputime_latency_tsc, min, max); 998 /* XXX Instead of this, properly filter out outliers. */ 999 cputime_latency_tsc = min; 1000 1001 if (calibrate_test > 0) { 1002 u_int64_t values[20], avg = 0; 1003 for (i = 1; i <= 20; i++) { 1004 u_int64_t freq; 1005 1006 freq = do_calibrate_cputimer(i * 100 * 1000, 1007 cputime_latency_tsc); 1008 values[i - 1] = freq; 1009 } 1010 /* Compute an average TSC for the 1s to 2s delays. */ 1011 for (i = 10; i < 20; i++) 1012 avg += values[i]; 1013 avg /= 10; 1014 for (i = 0; i < 20; i++) { 1015 kprintf("%ums: %lu (Diff from average: %ld)\n", 1016 (i + 1) * 100, values[i], 1017 (int64_t)(values[i] - avg)); 1018 } 1019 } 1020 1021 if (calibrate_tsc_fast > 0) { 1022 /* HPET would typically be >10MHz */ 1023 if (sys_cputimer->freq >= 10000000) 1024 cnt = 200000; 1025 else 1026 cnt = 500000; 1027 } else { 1028 cnt = 1000000; 1029 } 1030 1031 tsc_frequency = do_calibrate_cputimer(cnt, cputime_latency_tsc); 1032 if (bootverbose && calibrate_timers_with_rtc) { 1033 kprintf("TSC clock: %jd Hz (Method B)\n", 1034 (intmax_t)tsc_frequency); 1035 } 1036 } 1037 1038 done: 1039 if (tsc_present) { 1040 kprintf("TSC%s clock: %jd Hz\n", 1041 tsc_invariant ? " invariant" : "", 1042 (intmax_t)tsc_frequency); 1043 } 1044 tsc_oneus_approx = ((tsc_frequency|1) + 999999) / 1000000; 1045 1046 EVENTHANDLER_REGISTER(shutdown_post_sync, resettodr_on_shutdown, 1047 NULL, SHUTDOWN_PRI_LAST); 1048 } 1049 1050 /* 1051 * Sync the time of day back to the RTC on shutdown, but only if 1052 * we have already loaded it and have not crashed. 1053 */ 1054 static void 1055 resettodr_on_shutdown(void *arg __unused) 1056 { 1057 if (rtc_loaded && panicstr == NULL) { 1058 resettodr(); 1059 } 1060 } 1061 1062 /* 1063 * Initialize the time of day register, based on the time base which is, e.g. 1064 * from a filesystem. 1065 */ 1066 void 1067 inittodr(time_t base) 1068 { 1069 unsigned long sec, days; 1070 int year, month; 1071 int y, m; 1072 struct timespec ts; 1073 1074 if (base) { 1075 ts.tv_sec = base; 1076 ts.tv_nsec = 0; 1077 set_timeofday(&ts); 1078 } 1079 1080 /* Look if we have a RTC present and the time is valid */ 1081 if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) 1082 goto wrong_time; 1083 1084 /* wait for time update to complete */ 1085 /* If RTCSA_TUP is zero, we have at least 244us before next update */ 1086 crit_enter(); 1087 while (rtcin(RTC_STATUSA) & RTCSA_TUP) { 1088 crit_exit(); 1089 crit_enter(); 1090 } 1091 1092 days = 0; 1093 #ifdef USE_RTC_CENTURY 1094 year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100; 1095 #else 1096 year = readrtc(RTC_YEAR) + 1900; 1097 if (year < 1970) 1098 year += 100; 1099 #endif 1100 if (year < 1970) { 1101 crit_exit(); 1102 goto wrong_time; 1103 } 1104 month = readrtc(RTC_MONTH); 1105 for (m = 1; m < month; m++) 1106 days += daysinmonth[m-1]; 1107 if ((month > 2) && LEAPYEAR(year)) 1108 days ++; 1109 days += readrtc(RTC_DAY) - 1; 1110 for (y = 1970; y < year; y++) 1111 days += DAYSPERYEAR + LEAPYEAR(y); 1112 sec = ((( days * 24 + 1113 readrtc(RTC_HRS)) * 60 + 1114 readrtc(RTC_MIN)) * 60 + 1115 readrtc(RTC_SEC)); 1116 /* sec now contains the number of seconds, since Jan 1 1970, 1117 in the local time zone */ 1118 1119 sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); 1120 1121 y = (int)(time_second - sec); 1122 if (y <= -2 || y >= 2) { 1123 /* badly off, adjust it */ 1124 ts.tv_sec = sec; 1125 ts.tv_nsec = 0; 1126 set_timeofday(&ts); 1127 } 1128 rtc_loaded = 1; 1129 crit_exit(); 1130 return; 1131 1132 wrong_time: 1133 kprintf("Invalid time in real time clock.\n"); 1134 kprintf("Check and reset the date immediately!\n"); 1135 } 1136 1137 /* 1138 * Write system time back to RTC 1139 */ 1140 void 1141 resettodr(void) 1142 { 1143 struct timeval tv; 1144 unsigned long tm; 1145 int m; 1146 int y; 1147 1148 if (disable_rtc_set) 1149 return; 1150 1151 microtime(&tv); 1152 tm = tv.tv_sec; 1153 1154 crit_enter(); 1155 /* Disable RTC updates and interrupts. */ 1156 writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); 1157 1158 /* Calculate local time to put in RTC */ 1159 1160 tm -= tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); 1161 1162 writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ 1163 writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ 1164 writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ 1165 1166 /* We have now the days since 01-01-1970 in tm */ 1167 writertc(RTC_WDAY, (tm+4)%7); /* Write back Weekday */ 1168 for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); 1169 tm >= m; 1170 y++, m = DAYSPERYEAR + LEAPYEAR(y)) 1171 tm -= m; 1172 1173 /* Now we have the years in y and the day-of-the-year in tm */ 1174 writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */ 1175 #ifdef USE_RTC_CENTURY 1176 writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */ 1177 #endif 1178 for (m = 0; ; m++) { 1179 int ml; 1180 1181 ml = daysinmonth[m]; 1182 if (m == 1 && LEAPYEAR(y)) 1183 ml++; 1184 if (tm < ml) 1185 break; 1186 tm -= ml; 1187 } 1188 1189 writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */ 1190 writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */ 1191 1192 /* Reenable RTC updates and interrupts. */ 1193 writertc(RTC_STATUSB, rtc_statusb); 1194 crit_exit(); 1195 } 1196 1197 static int 1198 i8254_ioapic_trial(int irq, struct cputimer_intr *cti) 1199 { 1200 sysclock_t base; 1201 long lastcnt; 1202 1203 /* 1204 * Following code assumes the 8254 is the cpu timer, 1205 * so make sure it is. 1206 */ 1207 KKASSERT(sys_cputimer == &i8254_cputimer); 1208 KKASSERT(cti == &i8254_cputimer_intr); 1209 1210 lastcnt = get_interrupt_counter(irq, mycpuid); 1211 1212 /* 1213 * Force an 8254 Timer0 interrupt and wait 1/100s for 1214 * it to happen, then see if we got it. 1215 */ 1216 kprintf("IOAPIC: testing 8254 interrupt delivery\n"); 1217 1218 i8254_intr_reload(cti, 2); 1219 base = sys_cputimer->count(); 1220 while (sys_cputimer->count() - base < sys_cputimer->freq / 100) 1221 ; /* nothing */ 1222 1223 if (get_interrupt_counter(irq, mycpuid) - lastcnt == 0) 1224 return ENOENT; 1225 return 0; 1226 } 1227 1228 /* 1229 * Start both clocks running. DragonFly note: the stat clock is no longer 1230 * used. Instead, 8254 based systimers are used for all major clock 1231 * interrupts. 1232 */ 1233 static void 1234 i8254_intr_initclock(struct cputimer_intr *cti, boolean_t selected) 1235 { 1236 void *clkdesc = NULL; 1237 int irq = 0, mixed_mode = 0, error; 1238 1239 KKASSERT(mycpuid == 0); 1240 1241 if (!selected && i8254_intr_disable) 1242 goto nointr; 1243 1244 /* 1245 * The stat interrupt mask is different without the 1246 * statistics clock. Also, don't set the interrupt 1247 * flag which would normally cause the RTC to generate 1248 * interrupts. 1249 */ 1250 rtc_statusb = RTCSB_24HR; 1251 1252 /* Finish initializing 8254 timer 0. */ 1253 if (ioapic_enable) { 1254 irq = machintr_legacy_intr_find(0, INTR_TRIGGER_EDGE, 1255 INTR_POLARITY_HIGH); 1256 if (irq < 0) { 1257 mixed_mode_setup: 1258 error = ioapic_conf_legacy_extint(0); 1259 if (!error) { 1260 irq = machintr_legacy_intr_find(0, 1261 INTR_TRIGGER_EDGE, INTR_POLARITY_HIGH); 1262 if (irq < 0) 1263 error = ENOENT; 1264 } 1265 1266 if (error) { 1267 if (!selected) { 1268 kprintf("IOAPIC: setup mixed mode for " 1269 "irq 0 failed: %d\n", error); 1270 goto nointr; 1271 } else { 1272 panic("IOAPIC: setup mixed mode for " 1273 "irq 0 failed: %d\n", error); 1274 } 1275 } 1276 mixed_mode = 1; 1277 } 1278 clkdesc = register_int(irq, clkintr, NULL, "clk", 1279 NULL, 1280 INTR_EXCL | INTR_CLOCK | 1281 INTR_NOPOLL | INTR_MPSAFE | 1282 INTR_NOENTROPY, 0); 1283 } else { 1284 register_int(0, clkintr, NULL, "clk", NULL, 1285 INTR_EXCL | INTR_CLOCK | 1286 INTR_NOPOLL | INTR_MPSAFE | 1287 INTR_NOENTROPY, 0); 1288 } 1289 1290 /* Initialize RTC. */ 1291 writertc(RTC_STATUSA, rtc_statusa); 1292 writertc(RTC_STATUSB, RTCSB_24HR); 1293 1294 if (ioapic_enable) { 1295 error = i8254_ioapic_trial(irq, cti); 1296 if (error) { 1297 if (mixed_mode) { 1298 if (!selected) { 1299 kprintf("IOAPIC: mixed mode for irq %d " 1300 "trial failed: %d\n", 1301 irq, error); 1302 goto nointr; 1303 } else { 1304 panic("IOAPIC: mixed mode for irq %d " 1305 "trial failed: %d\n", irq, error); 1306 } 1307 } else { 1308 kprintf("IOAPIC: warning 8254 is not connected " 1309 "to the correct pin, try mixed mode\n"); 1310 unregister_int(clkdesc, 0); 1311 goto mixed_mode_setup; 1312 } 1313 } 1314 } 1315 return; 1316 1317 nointr: 1318 i8254_nointr = 1; /* don't try to register again */ 1319 cputimer_intr_deregister(cti); 1320 } 1321 1322 void 1323 setstatclockrate(int newhz) 1324 { 1325 if (newhz == RTC_PROFRATE) 1326 rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF; 1327 else 1328 rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; 1329 writertc(RTC_STATUSA, rtc_statusa); 1330 } 1331 1332 #if 0 1333 static unsigned 1334 tsc_get_timecount(struct timecounter *tc) 1335 { 1336 return (rdtsc()); 1337 } 1338 #endif 1339 1340 #ifdef KERN_TIMESTAMP 1341 #define KERN_TIMESTAMP_SIZE 16384 1342 static u_long tsc[KERN_TIMESTAMP_SIZE] ; 1343 SYSCTL_OPAQUE(_debug, OID_AUTO, timestamp, CTLFLAG_RD, tsc, 1344 sizeof(tsc), "LU", "Kernel timestamps"); 1345 void 1346 _TSTMP(u_int32_t x) 1347 { 1348 static int i; 1349 1350 tsc[i] = (u_int32_t)rdtsc(); 1351 tsc[i+1] = x; 1352 i = i + 2; 1353 if (i >= KERN_TIMESTAMP_SIZE) 1354 i = 0; 1355 tsc[i] = 0; /* mark last entry */ 1356 } 1357 #endif /* KERN_TIMESTAMP */ 1358 1359 /* 1360 * 1361 */ 1362 1363 static int 1364 hw_i8254_timestamp(SYSCTL_HANDLER_ARGS) 1365 { 1366 sysclock_t count; 1367 uint64_t tscval; 1368 char buf[32]; 1369 1370 crit_enter(); 1371 if (sys_cputimer == &i8254_cputimer) 1372 count = sys_cputimer->count(); 1373 else 1374 count = 0; 1375 if (tsc_present) 1376 tscval = rdtsc(); 1377 else 1378 tscval = 0; 1379 crit_exit(); 1380 ksnprintf(buf, sizeof(buf), "%08x %016llx", count, (long long)tscval); 1381 return(SYSCTL_OUT(req, buf, strlen(buf) + 1)); 1382 } 1383 1384 struct tsc_mpsync_info { 1385 volatile int tsc_ready_cnt; 1386 volatile int tsc_done_cnt; 1387 volatile int tsc_command; 1388 volatile int unused01[5]; 1389 struct { 1390 uint64_t v; 1391 uint64_t unused02; 1392 } tsc_saved[MAXCPU]; 1393 } __cachealign; 1394 1395 #if 0 1396 static void 1397 tsc_mpsync_test_loop(struct tsc_mpsync_thr *info) 1398 { 1399 struct globaldata *gd = mycpu; 1400 tsc_uclock_t test_end, test_begin; 1401 u_int i; 1402 1403 if (bootverbose) { 1404 kprintf("cpu%d: TSC testing MP synchronization ...\n", 1405 gd->gd_cpuid); 1406 } 1407 1408 test_begin = rdtsc_ordered(); 1409 /* Run test for 100ms */ 1410 test_end = test_begin + (tsc_frequency / 10); 1411 1412 arg->tsc_mpsync = 1; 1413 arg->tsc_target = test_begin; 1414 1415 #define TSC_TEST_TRYMAX 1000000 /* Make sure we could stop */ 1416 #define TSC_TEST_TRYMIN 50000 1417 1418 for (i = 0; i < TSC_TEST_TRYMAX; ++i) { 1419 struct lwkt_cpusync cs; 1420 1421 crit_enter(); 1422 lwkt_cpusync_init(&cs, gd->gd_other_cpus, 1423 tsc_mpsync_test_remote, arg); 1424 lwkt_cpusync_interlock(&cs); 1425 cpu_pause(); 1426 arg->tsc_target = rdtsc_ordered(); 1427 cpu_mfence(); 1428 lwkt_cpusync_deinterlock(&cs); 1429 crit_exit(); 1430 cpu_pause(); 1431 1432 if (!arg->tsc_mpsync) { 1433 kprintf("cpu%d: TSC is not MP synchronized @%u\n", 1434 gd->gd_cpuid, i); 1435 break; 1436 } 1437 if (arg->tsc_target > test_end && i >= TSC_TEST_TRYMIN) 1438 break; 1439 } 1440 1441 #undef TSC_TEST_TRYMIN 1442 #undef TSC_TEST_TRYMAX 1443 1444 if (arg->tsc_target == test_begin) { 1445 kprintf("cpu%d: TSC does not tick?!\n", gd->gd_cpuid); 1446 /* XXX disable TSC? */ 1447 tsc_invariant = 0; 1448 arg->tsc_mpsync = 0; 1449 return; 1450 } 1451 1452 if (arg->tsc_mpsync && bootverbose) { 1453 kprintf("cpu%d: TSC is MP synchronized after %u tries\n", 1454 gd->gd_cpuid, i); 1455 } 1456 } 1457 1458 #endif 1459 1460 #define TSC_TEST_COUNT 50000 1461 1462 static void 1463 tsc_mpsync_ap_thread(void *xinfo) 1464 { 1465 struct tsc_mpsync_info *info = xinfo; 1466 int cpu = mycpuid; 1467 int i; 1468 1469 /* 1470 * Tell main loop that we are ready and wait for initiation 1471 */ 1472 atomic_add_int(&info->tsc_ready_cnt, 1); 1473 while (info->tsc_command == 0) { 1474 lwkt_force_switch(); 1475 } 1476 1477 /* 1478 * Run test for 10000 loops or until tsc_done_cnt != 0 (another 1479 * cpu has finished its test), then increment done. 1480 */ 1481 crit_enter(); 1482 for (i = 0; i < TSC_TEST_COUNT && info->tsc_done_cnt == 0; ++i) { 1483 info->tsc_saved[cpu].v = rdtsc_ordered(); 1484 } 1485 crit_exit(); 1486 atomic_add_int(&info->tsc_done_cnt, 1); 1487 1488 lwkt_exit(); 1489 } 1490 1491 static void 1492 tsc_mpsync_test(void) 1493 { 1494 int cpu; 1495 int try; 1496 1497 if (!tsc_invariant) { 1498 /* Not even invariant TSC */ 1499 return; 1500 } 1501 1502 if (ncpus == 1) { 1503 /* Only one CPU */ 1504 tsc_mpsync = 1; 1505 return; 1506 } 1507 1508 /* 1509 * Forcing can be used w/qemu to reduce contention 1510 */ 1511 TUNABLE_INT_FETCH("hw.tsc_cputimer_force", &tsc_mpsync); 1512 1513 if (tsc_mpsync == 0) { 1514 switch (cpu_vendor_id) { 1515 case CPU_VENDOR_INTEL: 1516 /* 1517 * Intel probably works 1518 */ 1519 break; 1520 1521 case CPU_VENDOR_AMD: 1522 /* 1523 * For AMD 15h and 16h (i.e. The Bulldozer and Jaguar 1524 * architectures) we have to watch out for 1525 * Erratum 778: 1526 * "Processor Core Time Stamp Counters May 1527 * Experience Drift" 1528 * This Erratum is only listed for cpus in Family 1529 * 15h < Model 30h and for 16h < Model 30h. 1530 * 1531 * AMD < Bulldozer probably doesn't work 1532 */ 1533 if (CPUID_TO_FAMILY(cpu_id) == 0x15 || 1534 CPUID_TO_FAMILY(cpu_id) == 0x16) { 1535 if (CPUID_TO_MODEL(cpu_id) < 0x30) 1536 return; 1537 } else if (CPUID_TO_FAMILY(cpu_id) < 0x17) { 1538 return; 1539 } 1540 break; 1541 1542 default: 1543 /* probably won't work */ 1544 return; 1545 } 1546 } else if (tsc_mpsync < 0) { 1547 kprintf("TSC MP synchronization test is disabled\n"); 1548 tsc_mpsync = 0; 1549 return; 1550 } 1551 1552 /* 1553 * Test even if forced to 1 above. If forced, we will use the TSC 1554 * even if the test fails. (set forced to -1 to disable entirely). 1555 */ 1556 kprintf("TSC testing MP synchronization ...\n"); 1557 1558 /* 1559 * Test TSC MP synchronization on APs. Try up to 4 times. 1560 */ 1561 for (try = 0; try < 4; ++try) { 1562 struct tsc_mpsync_info info; 1563 uint64_t last; 1564 int64_t xdelta; 1565 int64_t delta; 1566 1567 bzero(&info, sizeof(info)); 1568 1569 for (cpu = 0; cpu < ncpus; ++cpu) { 1570 thread_t td; 1571 lwkt_create(tsc_mpsync_ap_thread, &info, &td, 1572 NULL, TDF_NOSTART, cpu, 1573 "tsc mpsync %d", cpu); 1574 lwkt_setpri_initial(td, curthread->td_pri); 1575 lwkt_schedule(td); 1576 } 1577 while (info.tsc_ready_cnt != ncpus) 1578 lwkt_force_switch(); 1579 1580 /* 1581 * All threads are ready, start the test and wait for 1582 * completion. 1583 */ 1584 info.tsc_command = 1; 1585 while (info.tsc_done_cnt != ncpus) 1586 lwkt_force_switch(); 1587 1588 /* 1589 * Process results 1590 */ 1591 last = info.tsc_saved[0].v; 1592 delta = 0; 1593 for (cpu = 0; cpu < ncpus; ++cpu) { 1594 xdelta = (int64_t)(info.tsc_saved[cpu].v - last); 1595 last = info.tsc_saved[cpu].v; 1596 if (xdelta < 0) 1597 xdelta = -xdelta; 1598 delta += xdelta; 1599 1600 } 1601 1602 /* 1603 * Result from attempt. If its too wild just stop now. 1604 * Also break out if we succeed, no need to try further. 1605 */ 1606 kprintf("TSC MPSYNC TEST %jd %d -> %jd (10uS=%jd)\n", 1607 delta, ncpus, delta / ncpus, 1608 tsc_frequency / 100000); 1609 if (delta / ncpus > tsc_frequency / 100) 1610 break; 1611 if (delta / ncpus < tsc_frequency / 100000) { 1612 tsc_mpsync = 1; 1613 break; 1614 } 1615 } 1616 1617 if (tsc_mpsync) 1618 kprintf("TSC is MP synchronized\n"); 1619 else 1620 kprintf("TSC is not MP synchronized\n"); 1621 } 1622 SYSINIT(tsc_mpsync, SI_BOOT2_FINISH_SMP, SI_ORDER_ANY, tsc_mpsync_test, NULL); 1623 1624 SYSCTL_NODE(_hw, OID_AUTO, i8254, CTLFLAG_RW, 0, "I8254"); 1625 SYSCTL_UINT(_hw_i8254, OID_AUTO, freq, CTLFLAG_RD, &i8254_cputimer.freq, 0, 1626 "frequency"); 1627 SYSCTL_PROC(_hw_i8254, OID_AUTO, timestamp, CTLTYPE_STRING|CTLFLAG_RD, 1628 0, 0, hw_i8254_timestamp, "A", ""); 1629 1630 SYSCTL_INT(_hw, OID_AUTO, tsc_present, CTLFLAG_RD, 1631 &tsc_present, 0, "TSC Available"); 1632 SYSCTL_INT(_hw, OID_AUTO, tsc_invariant, CTLFLAG_RD, 1633 &tsc_invariant, 0, "Invariant TSC"); 1634 SYSCTL_INT(_hw, OID_AUTO, tsc_mpsync, CTLFLAG_RD, 1635 &tsc_mpsync, 0, "TSC is synchronized across CPUs"); 1636 SYSCTL_QUAD(_hw, OID_AUTO, tsc_frequency, CTLFLAG_RD, 1637 &tsc_frequency, 0, "TSC Frequency"); 1638