1 /* $NetBSD: cpu.c,v 1.80 2011/02/02 12:26:42 bouyer Exp $ */ 2 3 /*- 4 * Copyright (c) 2000, 2006, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Bill Sommerfeld of RedBack Networks Inc, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1999 Stefan Grefen 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgement: 45 * This product includes software developed by the NetBSD 46 * Foundation, Inc. and its contributors. 47 * 4. Neither the name of The NetBSD Foundation nor the names of its 48 * contributors may be used to endorse or promote products derived 49 * from this software without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY 52 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.80 2011/02/02 12:26:42 bouyer Exp $"); 66 67 #include "opt_ddb.h" 68 #include "opt_mpbios.h" /* for MPDEBUG */ 69 #include "opt_mtrr.h" 70 71 #include "lapic.h" 72 #include "ioapic.h" 73 74 #ifdef i386 75 #include "npx.h" 76 #endif 77 78 #include <sys/param.h> 79 #include <sys/proc.h> 80 #include <sys/systm.h> 81 #include <sys/device.h> 82 #include <sys/kmem.h> 83 #include <sys/cpu.h> 84 #include <sys/atomic.h> 85 #include <sys/reboot.h> 86 87 #include <uvm/uvm.h> 88 89 #include <machine/cpufunc.h> 90 #include <machine/cpuvar.h> 91 #include <machine/pmap.h> 92 #include <machine/vmparam.h> 93 #include <machine/mpbiosvar.h> 94 #include <machine/pcb.h> 95 #include <machine/specialreg.h> 96 #include <machine/segments.h> 97 #include <machine/gdt.h> 98 #include <machine/mtrr.h> 99 #include <machine/pio.h> 100 #include <machine/cpu_counter.h> 101 102 #ifdef i386 103 #include <machine/tlog.h> 104 #endif 105 106 #include <machine/apicvar.h> 107 #include <machine/i82489reg.h> 108 #include <machine/i82489var.h> 109 110 #include <dev/ic/mc146818reg.h> 111 #include <i386/isa/nvram.h> 112 #include <dev/isa/isareg.h> 113 114 #include "tsc.h" 115 116 #if MAXCPUS > 32 117 #error cpu_info contains 32bit bitmasks 118 #endif 119 120 int cpu_match(device_t, cfdata_t, void *); 121 void cpu_attach(device_t, device_t, void *); 122 123 static bool cpu_suspend(device_t, const pmf_qual_t *); 124 static bool cpu_resume(device_t, const pmf_qual_t *); 125 static bool cpu_shutdown(device_t, int); 126 127 struct cpu_softc { 128 device_t sc_dev; /* device tree glue */ 129 struct cpu_info *sc_info; /* pointer to CPU info */ 130 bool sc_wasonline; 131 }; 132 133 int mp_cpu_start(struct cpu_info *, paddr_t); 134 void mp_cpu_start_cleanup(struct cpu_info *); 135 const struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL, 136 mp_cpu_start_cleanup }; 137 138 139 CFATTACH_DECL_NEW(cpu, sizeof(struct cpu_softc), 140 cpu_match, cpu_attach, NULL, NULL); 141 142 /* 143 * Statically-allocated CPU info for the primary CPU (or the only 144 * CPU, on uniprocessors). The CPU info list is initialized to 145 * point at it. 146 */ 147 #ifdef TRAPLOG 148 struct tlog tlog_primary; 149 #endif 150 struct cpu_info cpu_info_primary __aligned(CACHE_LINE_SIZE) = { 151 .ci_dev = 0, 152 .ci_self = &cpu_info_primary, 153 .ci_idepth = -1, 154 .ci_curlwp = &lwp0, 155 .ci_curldt = -1, 156 #ifdef TRAPLOG 157 .ci_tlog_base = &tlog_primary, 158 #endif /* !TRAPLOG */ 159 }; 160 161 struct cpu_info *cpu_info_list = &cpu_info_primary; 162 163 static void cpu_set_tss_gates(struct cpu_info *); 164 165 #ifdef i386 166 static void tss_init(struct i386tss *, void *, void *); 167 #endif 168 169 static void cpu_init_idle_lwp(struct cpu_info *); 170 171 uint32_t cpus_attached = 0; 172 uint32_t cpus_running = 0; 173 174 uint32_t cpu_feature[5]; /* X86 CPUID feature bits 175 * [0] basic features %edx 176 * [1] basic features %ecx 177 * [2] extended features %edx 178 * [3] extended features %ecx 179 * [4] VIA padlock features 180 */ 181 182 extern char x86_64_doubleflt_stack[]; 183 184 bool x86_mp_online; 185 paddr_t mp_trampoline_paddr = MP_TRAMPOLINE; 186 static vaddr_t cmos_data_mapping; 187 struct cpu_info *cpu_starting; 188 189 void cpu_hatch(void *); 190 static void cpu_boot_secondary(struct cpu_info *ci); 191 static void cpu_start_secondary(struct cpu_info *ci); 192 static void cpu_copy_trampoline(void); 193 194 /* 195 * Runs once per boot once multiprocessor goo has been detected and 196 * the local APIC on the boot processor has been mapped. 197 * 198 * Called from lapic_boot_init() (from mpbios_scan()). 199 */ 200 void 201 cpu_init_first(void) 202 { 203 204 cpu_info_primary.ci_cpuid = lapic_cpu_number(); 205 cpu_copy_trampoline(); 206 207 cmos_data_mapping = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_VAONLY); 208 if (cmos_data_mapping == 0) 209 panic("No KVA for page 0"); 210 pmap_kenter_pa(cmos_data_mapping, 0, VM_PROT_READ|VM_PROT_WRITE, 0); 211 pmap_update(pmap_kernel()); 212 } 213 214 int 215 cpu_match(device_t parent, cfdata_t match, void *aux) 216 { 217 218 return 1; 219 } 220 221 static void 222 cpu_vm_init(struct cpu_info *ci) 223 { 224 int ncolors = 2, i; 225 226 for (i = CAI_ICACHE; i <= CAI_L2CACHE; i++) { 227 struct x86_cache_info *cai; 228 int tcolors; 229 230 cai = &ci->ci_cinfo[i]; 231 232 tcolors = atop(cai->cai_totalsize); 233 switch(cai->cai_associativity) { 234 case 0xff: 235 tcolors = 1; /* fully associative */ 236 break; 237 case 0: 238 case 1: 239 break; 240 default: 241 tcolors /= cai->cai_associativity; 242 } 243 ncolors = max(ncolors, tcolors); 244 /* 245 * If the desired number of colors is not a power of 246 * two, it won't be good. Find the greatest power of 247 * two which is an even divisor of the number of colors, 248 * to preserve even coloring of pages. 249 */ 250 if (ncolors & (ncolors - 1) ) { 251 int try, picked = 1; 252 for (try = 1; try < ncolors; try *= 2) { 253 if (ncolors % try == 0) picked = try; 254 } 255 if (picked == 1) { 256 panic("desired number of cache colors %d is " 257 " > 1, but not even!", ncolors); 258 } 259 ncolors = picked; 260 } 261 } 262 263 /* 264 * Knowing the size of the largest cache on this CPU, re-color 265 * our pages. 266 */ 267 if (ncolors <= uvmexp.ncolors) 268 return; 269 aprint_debug_dev(ci->ci_dev, "%d page colors\n", ncolors); 270 uvm_page_recolor(ncolors); 271 } 272 273 274 void 275 cpu_attach(device_t parent, device_t self, void *aux) 276 { 277 struct cpu_softc *sc = device_private(self); 278 struct cpu_attach_args *caa = aux; 279 struct cpu_info *ci; 280 uintptr_t ptr; 281 int cpunum = caa->cpu_number; 282 static bool again; 283 284 sc->sc_dev = self; 285 286 if (cpus_attached == ~0) { 287 aprint_error(": increase MAXCPUS\n"); 288 return; 289 } 290 291 /* 292 * If we're an Application Processor, allocate a cpu_info 293 * structure, otherwise use the primary's. 294 */ 295 if (caa->cpu_role == CPU_ROLE_AP) { 296 if ((boothowto & RB_MD1) != 0) { 297 aprint_error(": multiprocessor boot disabled\n"); 298 if (!pmf_device_register(self, NULL, NULL)) 299 aprint_error_dev(self, 300 "couldn't establish power handler\n"); 301 return; 302 } 303 aprint_naive(": Application Processor\n"); 304 ptr = (uintptr_t)kmem_zalloc(sizeof(*ci) + CACHE_LINE_SIZE - 1, 305 KM_SLEEP); 306 ci = (struct cpu_info *)roundup2(ptr, CACHE_LINE_SIZE); 307 ci->ci_curldt = -1; 308 #ifdef TRAPLOG 309 ci->ci_tlog_base = kmem_zalloc(sizeof(struct tlog), KM_SLEEP); 310 #endif 311 } else { 312 aprint_naive(": %s Processor\n", 313 caa->cpu_role == CPU_ROLE_SP ? "Single" : "Boot"); 314 ci = &cpu_info_primary; 315 if (cpunum != lapic_cpu_number()) { 316 /* XXX should be done earlier. */ 317 uint32_t reg; 318 aprint_verbose("\n"); 319 aprint_verbose_dev(self, "running CPU at apic %d" 320 " instead of at expected %d", lapic_cpu_number(), 321 cpunum); 322 reg = i82489_readreg(LAPIC_ID); 323 i82489_writereg(LAPIC_ID, (reg & ~LAPIC_ID_MASK) | 324 (cpunum << LAPIC_ID_SHIFT)); 325 } 326 if (cpunum != lapic_cpu_number()) { 327 aprint_error_dev(self, "unable to reset apic id\n"); 328 } 329 } 330 331 ci->ci_self = ci; 332 sc->sc_info = ci; 333 ci->ci_dev = self; 334 ci->ci_acpiid = caa->cpu_id; 335 ci->ci_cpuid = caa->cpu_number; 336 ci->ci_func = caa->cpu_func; 337 338 /* Must be before mi_cpu_attach(). */ 339 cpu_vm_init(ci); 340 341 if (caa->cpu_role == CPU_ROLE_AP) { 342 int error; 343 344 error = mi_cpu_attach(ci); 345 if (error != 0) { 346 aprint_normal("\n"); 347 aprint_error_dev(self, 348 "mi_cpu_attach failed with %d\n", error); 349 return; 350 } 351 cpu_init_tss(ci); 352 } else { 353 KASSERT(ci->ci_data.cpu_idlelwp != NULL); 354 } 355 356 ci->ci_cpumask = (1 << cpu_index(ci)); 357 pmap_reference(pmap_kernel()); 358 ci->ci_pmap = pmap_kernel(); 359 ci->ci_tlbstate = TLBSTATE_STALE; 360 361 /* 362 * Boot processor may not be attached first, but the below 363 * must be done to allow booting other processors. 364 */ 365 if (!again) { 366 atomic_or_32(&ci->ci_flags, CPUF_PRESENT | CPUF_PRIMARY); 367 /* Basic init. */ 368 cpu_intr_init(ci); 369 cpu_get_tsc_freq(ci); 370 cpu_init(ci); 371 cpu_set_tss_gates(ci); 372 pmap_cpu_init_late(ci); 373 if (caa->cpu_role != CPU_ROLE_SP) { 374 /* Enable lapic. */ 375 lapic_enable(); 376 lapic_set_lvt(); 377 lapic_calibrate_timer(ci); 378 } 379 /* Make sure DELAY() is initialized. */ 380 DELAY(1); 381 again = true; 382 } 383 384 /* further PCB init done later. */ 385 386 switch (caa->cpu_role) { 387 case CPU_ROLE_SP: 388 atomic_or_32(&ci->ci_flags, CPUF_SP); 389 cpu_identify(ci); 390 x86_errata(); 391 x86_cpu_idle_init(); 392 break; 393 394 case CPU_ROLE_BP: 395 atomic_or_32(&ci->ci_flags, CPUF_BSP); 396 cpu_identify(ci); 397 x86_errata(); 398 x86_cpu_idle_init(); 399 break; 400 401 case CPU_ROLE_AP: 402 /* 403 * report on an AP 404 */ 405 cpu_intr_init(ci); 406 gdt_alloc_cpu(ci); 407 cpu_set_tss_gates(ci); 408 pmap_cpu_init_early(ci); 409 pmap_cpu_init_late(ci); 410 cpu_start_secondary(ci); 411 if (ci->ci_flags & CPUF_PRESENT) { 412 struct cpu_info *tmp; 413 414 cpu_identify(ci); 415 tmp = cpu_info_list; 416 while (tmp->ci_next) 417 tmp = tmp->ci_next; 418 419 tmp->ci_next = ci; 420 } 421 break; 422 423 default: 424 aprint_normal("\n"); 425 panic("unknown processor type??\n"); 426 } 427 428 pat_init(ci); 429 atomic_or_32(&cpus_attached, ci->ci_cpumask); 430 431 if (!pmf_device_register1(self, cpu_suspend, cpu_resume, cpu_shutdown)) 432 aprint_error_dev(self, "couldn't establish power handler\n"); 433 434 if (mp_verbose) { 435 struct lwp *l = ci->ci_data.cpu_idlelwp; 436 struct pcb *pcb = lwp_getpcb(l); 437 438 aprint_verbose_dev(self, 439 "idle lwp at %p, idle sp at %p\n", 440 l, 441 #ifdef i386 442 (void *)pcb->pcb_esp 443 #else 444 (void *)pcb->pcb_rsp 445 #endif 446 ); 447 } 448 } 449 450 /* 451 * Initialize the processor appropriately. 452 */ 453 454 void 455 cpu_init(struct cpu_info *ci) 456 { 457 458 lcr0(rcr0() | CR0_WP); 459 460 /* 461 * On a P6 or above, enable global TLB caching if the 462 * hardware supports it. 463 */ 464 if (cpu_feature[0] & CPUID_PGE) 465 lcr4(rcr4() | CR4_PGE); /* enable global TLB caching */ 466 467 /* 468 * If we have FXSAVE/FXRESTOR, use them. 469 */ 470 if (cpu_feature[0] & CPUID_FXSR) { 471 lcr4(rcr4() | CR4_OSFXSR); 472 473 /* 474 * If we have SSE/SSE2, enable XMM exceptions. 475 */ 476 if (cpu_feature[0] & (CPUID_SSE|CPUID_SSE2)) 477 lcr4(rcr4() | CR4_OSXMMEXCPT); 478 } 479 480 #ifdef MTRR 481 /* 482 * On a P6 or above, initialize MTRR's if the hardware supports them. 483 */ 484 if (cpu_feature[0] & CPUID_MTRR) { 485 if ((ci->ci_flags & CPUF_AP) == 0) 486 i686_mtrr_init_first(); 487 mtrr_init_cpu(ci); 488 } 489 490 #ifdef i386 491 if (strcmp((char *)(ci->ci_vendor), "AuthenticAMD") == 0) { 492 /* 493 * Must be a K6-2 Step >= 7 or a K6-III. 494 */ 495 if (CPUID2FAMILY(ci->ci_signature) == 5) { 496 if (CPUID2MODEL(ci->ci_signature) > 8 || 497 (CPUID2MODEL(ci->ci_signature) == 8 && 498 CPUID2STEPPING(ci->ci_signature) >= 7)) { 499 mtrr_funcs = &k6_mtrr_funcs; 500 k6_mtrr_init_first(); 501 mtrr_init_cpu(ci); 502 } 503 } 504 } 505 #endif /* i386 */ 506 #endif /* MTRR */ 507 508 atomic_or_32(&cpus_running, ci->ci_cpumask); 509 510 if (ci != &cpu_info_primary) { 511 /* Synchronize TSC again, and check for drift. */ 512 wbinvd(); 513 atomic_or_32(&ci->ci_flags, CPUF_RUNNING); 514 tsc_sync_ap(ci); 515 } else { 516 atomic_or_32(&ci->ci_flags, CPUF_RUNNING); 517 } 518 } 519 520 void 521 cpu_boot_secondary_processors(void) 522 { 523 struct cpu_info *ci; 524 u_long i; 525 526 /* Now that we know the number of CPUs, patch the text segment. */ 527 x86_patch(false); 528 529 for (i=0; i < maxcpus; i++) { 530 ci = cpu_lookup(i); 531 if (ci == NULL) 532 continue; 533 if (ci->ci_data.cpu_idlelwp == NULL) 534 continue; 535 if ((ci->ci_flags & CPUF_PRESENT) == 0) 536 continue; 537 if (ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY)) 538 continue; 539 cpu_boot_secondary(ci); 540 } 541 542 x86_mp_online = true; 543 544 /* Now that we know about the TSC, attach the timecounter. */ 545 tsc_tc_init(); 546 547 /* Enable zeroing of pages in the idle loop if we have SSE2. */ 548 vm_page_zero_enable = ((cpu_feature[0] & CPUID_SSE2) != 0); 549 } 550 551 static void 552 cpu_init_idle_lwp(struct cpu_info *ci) 553 { 554 struct lwp *l = ci->ci_data.cpu_idlelwp; 555 struct pcb *pcb = lwp_getpcb(l); 556 557 pcb->pcb_cr0 = rcr0(); 558 } 559 560 void 561 cpu_init_idle_lwps(void) 562 { 563 struct cpu_info *ci; 564 u_long i; 565 566 for (i = 0; i < maxcpus; i++) { 567 ci = cpu_lookup(i); 568 if (ci == NULL) 569 continue; 570 if (ci->ci_data.cpu_idlelwp == NULL) 571 continue; 572 if ((ci->ci_flags & CPUF_PRESENT) == 0) 573 continue; 574 cpu_init_idle_lwp(ci); 575 } 576 } 577 578 void 579 cpu_start_secondary(struct cpu_info *ci) 580 { 581 extern paddr_t mp_pdirpa; 582 u_long psl; 583 int i; 584 585 mp_pdirpa = pmap_init_tmp_pgtbl(mp_trampoline_paddr); 586 atomic_or_32(&ci->ci_flags, CPUF_AP); 587 ci->ci_curlwp = ci->ci_data.cpu_idlelwp; 588 if (CPU_STARTUP(ci, mp_trampoline_paddr) != 0) { 589 return; 590 } 591 592 /* 593 * Wait for it to become ready. Setting cpu_starting opens the 594 * initial gate and allows the AP to start soft initialization. 595 */ 596 KASSERT(cpu_starting == NULL); 597 cpu_starting = ci; 598 for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT)) && i > 0; i--) { 599 #ifdef MPDEBUG 600 extern int cpu_trace[3]; 601 static int otrace[3]; 602 if (memcmp(otrace, cpu_trace, sizeof(otrace)) != 0) { 603 aprint_debug_dev(ci->ci_dev, "trace %02x %02x %02x\n", 604 cpu_trace[0], cpu_trace[1], cpu_trace[2]); 605 memcpy(otrace, cpu_trace, sizeof(otrace)); 606 } 607 #endif 608 i8254_delay(10); 609 } 610 611 if ((ci->ci_flags & CPUF_PRESENT) == 0) { 612 aprint_error_dev(ci->ci_dev, "failed to become ready\n"); 613 #if defined(MPDEBUG) && defined(DDB) 614 printf("dropping into debugger; continue from here to resume boot\n"); 615 Debugger(); 616 #endif 617 } else { 618 /* 619 * Synchronize time stamp counters. Invalidate cache and do 620 * twice to try and minimize possible cache effects. Disable 621 * interrupts to try and rule out any external interference. 622 */ 623 psl = x86_read_psl(); 624 x86_disable_intr(); 625 wbinvd(); 626 tsc_sync_bp(ci); 627 x86_write_psl(psl); 628 } 629 630 CPU_START_CLEANUP(ci); 631 cpu_starting = NULL; 632 } 633 634 void 635 cpu_boot_secondary(struct cpu_info *ci) 636 { 637 int64_t drift; 638 u_long psl; 639 int i; 640 641 atomic_or_32(&ci->ci_flags, CPUF_GO); 642 for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i > 0; i--) { 643 i8254_delay(10); 644 } 645 if ((ci->ci_flags & CPUF_RUNNING) == 0) { 646 aprint_error_dev(ci->ci_dev, "failed to start\n"); 647 #if defined(MPDEBUG) && defined(DDB) 648 printf("dropping into debugger; continue from here to resume boot\n"); 649 Debugger(); 650 #endif 651 } else { 652 /* Synchronize TSC again, check for drift. */ 653 drift = ci->ci_data.cpu_cc_skew; 654 psl = x86_read_psl(); 655 x86_disable_intr(); 656 wbinvd(); 657 tsc_sync_bp(ci); 658 x86_write_psl(psl); 659 drift -= ci->ci_data.cpu_cc_skew; 660 aprint_debug_dev(ci->ci_dev, "TSC skew=%lld drift=%lld\n", 661 (long long)ci->ci_data.cpu_cc_skew, (long long)drift); 662 tsc_sync_drift(drift); 663 } 664 } 665 666 /* 667 * The CPU ends up here when its ready to run 668 * This is called from code in mptramp.s; at this point, we are running 669 * in the idle pcb/idle stack of the new CPU. When this function returns, 670 * this processor will enter the idle loop and start looking for work. 671 */ 672 void 673 cpu_hatch(void *v) 674 { 675 struct cpu_info *ci = (struct cpu_info *)v; 676 struct pcb *pcb; 677 int s, i; 678 679 cpu_init_msrs(ci, true); 680 cpu_probe(ci); 681 682 ci->ci_data.cpu_cc_freq = cpu_info_primary.ci_data.cpu_cc_freq; 683 /* cpu_get_tsc_freq(ci); */ 684 685 KDASSERT((ci->ci_flags & CPUF_PRESENT) == 0); 686 687 /* 688 * Synchronize time stamp counters. Invalidate cache and do twice 689 * to try and minimize possible cache effects. Note that interrupts 690 * are off at this point. 691 */ 692 wbinvd(); 693 atomic_or_32(&ci->ci_flags, CPUF_PRESENT); 694 tsc_sync_ap(ci); 695 696 /* 697 * Wait to be brought online. Use 'monitor/mwait' if available, 698 * in order to make the TSC drift as much as possible. so that 699 * we can detect it later. If not available, try 'pause'. 700 * We'd like to use 'hlt', but we have interrupts off. 701 */ 702 while ((ci->ci_flags & CPUF_GO) == 0) { 703 if ((cpu_feature[1] & CPUID2_MONITOR) != 0) { 704 x86_monitor(&ci->ci_flags, 0, 0); 705 if ((ci->ci_flags & CPUF_GO) != 0) { 706 continue; 707 } 708 x86_mwait(0, 0); 709 } else { 710 for (i = 10000; i != 0; i--) { 711 x86_pause(); 712 } 713 } 714 } 715 716 /* Because the text may have been patched in x86_patch(). */ 717 wbinvd(); 718 x86_flush(); 719 720 KASSERT((ci->ci_flags & CPUF_RUNNING) == 0); 721 722 #ifdef PAE 723 pd_entry_t * l3_pd = ci->ci_pae_l3_pdir; 724 for (i = 0 ; i < PDP_SIZE; i++) { 725 l3_pd[i] = pmap_kernel()->pm_pdirpa[i] | PG_V; 726 } 727 lcr3(ci->ci_pae_l3_pdirpa); 728 #else 729 lcr3(pmap_pdirpa(pmap_kernel(), 0)); 730 #endif 731 732 pcb = lwp_getpcb(curlwp); 733 pcb->pcb_cr3 = rcr3(); 734 pcb = lwp_getpcb(ci->ci_data.cpu_idlelwp); 735 lcr0(pcb->pcb_cr0); 736 737 cpu_init_idt(); 738 gdt_init_cpu(ci); 739 lapic_enable(); 740 lapic_set_lvt(); 741 lapic_initclocks(); 742 743 #ifdef i386 744 #if NNPX > 0 745 npxinit(ci); 746 #endif 747 #else 748 fpuinit(ci); 749 #endif 750 lldt(GSYSSEL(GLDT_SEL, SEL_KPL)); 751 ltr(ci->ci_tss_sel); 752 753 cpu_init(ci); 754 cpu_get_tsc_freq(ci); 755 756 s = splhigh(); 757 #ifdef i386 758 lapic_tpr = 0; 759 #else 760 lcr8(0); 761 #endif 762 x86_enable_intr(); 763 splx(s); 764 x86_errata(); 765 766 aprint_debug_dev(ci->ci_dev, "running\n"); 767 } 768 769 #if defined(DDB) 770 771 #include <ddb/db_output.h> 772 #include <machine/db_machdep.h> 773 774 /* 775 * Dump CPU information from ddb. 776 */ 777 void 778 cpu_debug_dump(void) 779 { 780 struct cpu_info *ci; 781 CPU_INFO_ITERATOR cii; 782 783 db_printf("addr dev id flags ipis curlwp fpcurlwp\n"); 784 for (CPU_INFO_FOREACH(cii, ci)) { 785 db_printf("%p %s %ld %x %x %10p %10p\n", 786 ci, 787 ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev), 788 (long)ci->ci_cpuid, 789 ci->ci_flags, ci->ci_ipis, 790 ci->ci_curlwp, 791 ci->ci_fpcurlwp); 792 } 793 } 794 #endif 795 796 static void 797 cpu_copy_trampoline(void) 798 { 799 /* 800 * Copy boot code. 801 */ 802 extern u_char cpu_spinup_trampoline[]; 803 extern u_char cpu_spinup_trampoline_end[]; 804 805 vaddr_t mp_trampoline_vaddr; 806 807 mp_trampoline_vaddr = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 808 UVM_KMF_VAONLY); 809 810 pmap_kenter_pa(mp_trampoline_vaddr, mp_trampoline_paddr, 811 VM_PROT_READ | VM_PROT_WRITE, 0); 812 pmap_update(pmap_kernel()); 813 memcpy((void *)mp_trampoline_vaddr, 814 cpu_spinup_trampoline, 815 cpu_spinup_trampoline_end - cpu_spinup_trampoline); 816 817 pmap_kremove(mp_trampoline_vaddr, PAGE_SIZE); 818 pmap_update(pmap_kernel()); 819 uvm_km_free(kernel_map, mp_trampoline_vaddr, PAGE_SIZE, UVM_KMF_VAONLY); 820 } 821 822 #ifdef i386 823 static void 824 tss_init(struct i386tss *tss, void *stack, void *func) 825 { 826 KASSERT(curcpu()->ci_pmap == pmap_kernel()); 827 828 memset(tss, 0, sizeof *tss); 829 tss->tss_esp0 = tss->tss_esp = (int)((char *)stack + USPACE - 16); 830 tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); 831 tss->__tss_cs = GSEL(GCODE_SEL, SEL_KPL); 832 tss->tss_fs = GSEL(GCPU_SEL, SEL_KPL); 833 tss->tss_gs = tss->__tss_es = tss->__tss_ds = 834 tss->__tss_ss = GSEL(GDATA_SEL, SEL_KPL); 835 /* %cr3 contains the value associated to pmap_kernel */ 836 tss->tss_cr3 = rcr3(); 837 tss->tss_esp = (int)((char *)stack + USPACE - 16); 838 tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL); 839 tss->__tss_eflags = PSL_MBO | PSL_NT; /* XXX not needed? */ 840 tss->__tss_eip = (int)func; 841 } 842 843 /* XXX */ 844 #define IDTVEC(name) __CONCAT(X, name) 845 typedef void (vector)(void); 846 extern vector IDTVEC(tss_trap08); 847 #ifdef DDB 848 extern vector Xintrddbipi; 849 extern int ddb_vec; 850 #endif 851 852 static void 853 cpu_set_tss_gates(struct cpu_info *ci) 854 { 855 struct segment_descriptor sd; 856 857 ci->ci_doubleflt_stack = (char *)uvm_km_alloc(kernel_map, USPACE, 0, 858 UVM_KMF_WIRED); 859 tss_init(&ci->ci_doubleflt_tss, ci->ci_doubleflt_stack, 860 IDTVEC(tss_trap08)); 861 setsegment(&sd, &ci->ci_doubleflt_tss, sizeof(struct i386tss) - 1, 862 SDT_SYS386TSS, SEL_KPL, 0, 0); 863 ci->ci_gdt[GTRAPTSS_SEL].sd = sd; 864 setgate(&idt[8], NULL, 0, SDT_SYSTASKGT, SEL_KPL, 865 GSEL(GTRAPTSS_SEL, SEL_KPL)); 866 867 #if defined(DDB) 868 /* 869 * Set up separate handler for the DDB IPI, so that it doesn't 870 * stomp on a possibly corrupted stack. 871 * 872 * XXX overwriting the gate set in db_machine_init. 873 * Should rearrange the code so that it's set only once. 874 */ 875 ci->ci_ddbipi_stack = (char *)uvm_km_alloc(kernel_map, USPACE, 0, 876 UVM_KMF_WIRED); 877 tss_init(&ci->ci_ddbipi_tss, ci->ci_ddbipi_stack, Xintrddbipi); 878 879 setsegment(&sd, &ci->ci_ddbipi_tss, sizeof(struct i386tss) - 1, 880 SDT_SYS386TSS, SEL_KPL, 0, 0); 881 ci->ci_gdt[GIPITSS_SEL].sd = sd; 882 883 setgate(&idt[ddb_vec], NULL, 0, SDT_SYSTASKGT, SEL_KPL, 884 GSEL(GIPITSS_SEL, SEL_KPL)); 885 #endif 886 } 887 #else 888 static void 889 cpu_set_tss_gates(struct cpu_info *ci) 890 { 891 892 } 893 #endif /* i386 */ 894 895 int 896 mp_cpu_start(struct cpu_info *ci, paddr_t target) 897 { 898 unsigned short dwordptr[2]; 899 int error; 900 901 /* 902 * Bootstrap code must be addressable in real mode 903 * and it must be page aligned. 904 */ 905 KASSERT(target < 0x10000 && target % PAGE_SIZE == 0); 906 907 /* 908 * "The BSP must initialize CMOS shutdown code to 0Ah ..." 909 */ 910 911 outb(IO_RTC, NVRAM_RESET); 912 outb(IO_RTC+1, NVRAM_RESET_JUMP); 913 914 /* 915 * "and the warm reset vector (DWORD based at 40:67) to point 916 * to the AP startup code ..." 917 */ 918 919 dwordptr[0] = 0; 920 dwordptr[1] = target >> 4; 921 922 memcpy((uint8_t *)cmos_data_mapping + 0x467, dwordptr, 4); 923 924 if ((cpu_feature[0] & CPUID_APIC) == 0) { 925 aprint_error("mp_cpu_start: CPU does not have APIC\n"); 926 return ENODEV; 927 } 928 929 /* 930 * ... prior to executing the following sequence:". We'll also add in 931 * local cache flush, in case the BIOS has left the AP with its cache 932 * disabled. It may not be able to cope with MP coherency. 933 */ 934 wbinvd(); 935 936 if (ci->ci_flags & CPUF_AP) { 937 error = x86_ipi_init(ci->ci_cpuid); 938 if (error != 0) { 939 aprint_error_dev(ci->ci_dev, "%s: IPI not taken (1)\n", 940 __func__); 941 return error; 942 } 943 i8254_delay(10000); 944 945 error = x86_ipi_startup(ci->ci_cpuid, target / PAGE_SIZE); 946 if (error != 0) { 947 aprint_error_dev(ci->ci_dev, "%s: IPI not taken (2)\n", 948 __func__); 949 return error; 950 } 951 i8254_delay(200); 952 953 error = x86_ipi_startup(ci->ci_cpuid, target / PAGE_SIZE); 954 if (error != 0) { 955 aprint_error_dev(ci->ci_dev, "%s: IPI not taken (3)\n", 956 __func__); 957 return error; 958 } 959 i8254_delay(200); 960 } 961 962 return 0; 963 } 964 965 void 966 mp_cpu_start_cleanup(struct cpu_info *ci) 967 { 968 /* 969 * Ensure the NVRAM reset byte contains something vaguely sane. 970 */ 971 972 outb(IO_RTC, NVRAM_RESET); 973 outb(IO_RTC+1, NVRAM_RESET_RST); 974 } 975 976 #ifdef __x86_64__ 977 typedef void (vector)(void); 978 extern vector Xsyscall, Xsyscall32; 979 #endif 980 981 void 982 cpu_init_msrs(struct cpu_info *ci, bool full) 983 { 984 #ifdef __x86_64__ 985 wrmsr(MSR_STAR, 986 ((uint64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | 987 ((uint64_t)LSEL(LSYSRETBASE_SEL, SEL_UPL) << 48)); 988 wrmsr(MSR_LSTAR, (uint64_t)Xsyscall); 989 wrmsr(MSR_CSTAR, (uint64_t)Xsyscall32); 990 wrmsr(MSR_SFMASK, PSL_NT|PSL_T|PSL_I|PSL_C); 991 992 if (full) { 993 wrmsr(MSR_FSBASE, 0); 994 wrmsr(MSR_GSBASE, (uint64_t)ci); 995 wrmsr(MSR_KERNELGSBASE, 0); 996 } 997 #endif /* __x86_64__ */ 998 999 if (cpu_feature[2] & CPUID_NOX) 1000 wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE); 1001 } 1002 1003 void 1004 cpu_offline_md(void) 1005 { 1006 int s; 1007 1008 s = splhigh(); 1009 #ifdef i386 1010 #if NNPX > 0 1011 npxsave_cpu(true); 1012 #endif 1013 #else 1014 fpusave_cpu(true); 1015 #endif 1016 splx(s); 1017 } 1018 1019 /* XXX joerg restructure and restart CPUs individually */ 1020 static bool 1021 cpu_suspend(device_t dv, const pmf_qual_t *qual) 1022 { 1023 struct cpu_softc *sc = device_private(dv); 1024 struct cpu_info *ci = sc->sc_info; 1025 int err; 1026 1027 if (ci->ci_flags & CPUF_PRIMARY) 1028 return true; 1029 if (ci->ci_data.cpu_idlelwp == NULL) 1030 return true; 1031 if ((ci->ci_flags & CPUF_PRESENT) == 0) 1032 return true; 1033 1034 sc->sc_wasonline = !(ci->ci_schedstate.spc_flags & SPCF_OFFLINE); 1035 1036 if (sc->sc_wasonline) { 1037 mutex_enter(&cpu_lock); 1038 err = cpu_setstate(ci, false); 1039 mutex_exit(&cpu_lock); 1040 1041 if (err) 1042 return false; 1043 } 1044 1045 return true; 1046 } 1047 1048 static bool 1049 cpu_resume(device_t dv, const pmf_qual_t *qual) 1050 { 1051 struct cpu_softc *sc = device_private(dv); 1052 struct cpu_info *ci = sc->sc_info; 1053 int err = 0; 1054 1055 if (ci->ci_flags & CPUF_PRIMARY) 1056 return true; 1057 if (ci->ci_data.cpu_idlelwp == NULL) 1058 return true; 1059 if ((ci->ci_flags & CPUF_PRESENT) == 0) 1060 return true; 1061 1062 if (sc->sc_wasonline) { 1063 mutex_enter(&cpu_lock); 1064 err = cpu_setstate(ci, true); 1065 mutex_exit(&cpu_lock); 1066 } 1067 1068 return err == 0; 1069 } 1070 1071 static bool 1072 cpu_shutdown(device_t dv, int how) 1073 { 1074 return cpu_suspend(dv, NULL); 1075 } 1076 1077 void 1078 cpu_get_tsc_freq(struct cpu_info *ci) 1079 { 1080 uint64_t last_tsc; 1081 1082 if (cpu_hascounter()) { 1083 last_tsc = cpu_counter_serializing(); 1084 i8254_delay(100000); 1085 ci->ci_data.cpu_cc_freq = 1086 (cpu_counter_serializing() - last_tsc) * 10; 1087 } 1088 } 1089 1090 void 1091 x86_cpu_idle_mwait(void) 1092 { 1093 struct cpu_info *ci = curcpu(); 1094 1095 KASSERT(ci->ci_ilevel == IPL_NONE); 1096 1097 x86_monitor(&ci->ci_want_resched, 0, 0); 1098 if (__predict_false(ci->ci_want_resched)) { 1099 return; 1100 } 1101 x86_mwait(0, 0); 1102 } 1103 1104 void 1105 x86_cpu_idle_halt(void) 1106 { 1107 struct cpu_info *ci = curcpu(); 1108 1109 KASSERT(ci->ci_ilevel == IPL_NONE); 1110 1111 x86_disable_intr(); 1112 if (!__predict_false(ci->ci_want_resched)) { 1113 x86_stihlt(); 1114 } else { 1115 x86_enable_intr(); 1116 } 1117 } 1118 1119 /* 1120 * Loads pmap for the current CPU. 1121 */ 1122 void 1123 cpu_load_pmap(struct pmap *pmap) 1124 { 1125 #ifdef PAE 1126 int i, s; 1127 struct cpu_info *ci; 1128 1129 s = splvm(); /* just to be safe */ 1130 ci = curcpu(); 1131 pd_entry_t *l3_pd = ci->ci_pae_l3_pdir; 1132 for (i = 0 ; i < PDP_SIZE; i++) { 1133 l3_pd[i] = pmap->pm_pdirpa[i] | PG_V; 1134 } 1135 splx(s); 1136 tlbflush(); 1137 #else /* PAE */ 1138 lcr3(pmap_pdirpa(pmap, 0)); 1139 #endif /* PAE */ 1140 } 1141