1 /* $NetBSD: x86_machdep.c,v 1.44 2010/10/21 11:17:54 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 2002, 2006, 2007 YAMAMOTO Takashi, 5 * Copyright (c) 2005, 2008, 2009 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Julio M. Merino Vidal. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __KERNEL_RCSID(0, "$NetBSD: x86_machdep.c,v 1.44 2010/10/21 11:17:54 yamt Exp $"); 35 36 #include "opt_modular.h" 37 #include "opt_physmem.h" 38 39 #include <sys/types.h> 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kcore.h> 43 #include <sys/errno.h> 44 #include <sys/kauth.h> 45 #include <sys/mutex.h> 46 #include <sys/cpu.h> 47 #include <sys/intr.h> 48 #include <sys/atomic.h> 49 #include <sys/module.h> 50 #include <sys/sysctl.h> 51 #include <sys/extent.h> 52 53 #include <x86/cpuvar.h> 54 #include <x86/cputypes.h> 55 #include <x86/machdep.h> 56 #include <x86/nmi.h> 57 #include <x86/pio.h> 58 59 #include <dev/isa/isareg.h> 60 #include <dev/ic/i8042reg.h> 61 62 #include <machine/bootinfo.h> 63 #include <machine/vmparam.h> 64 65 #include <uvm/uvm_extern.h> 66 67 void (*x86_cpu_idle)(void); 68 static bool x86_cpu_idle_ipi; 69 static char x86_cpu_idle_text[16]; 70 71 int check_pa_acc(paddr_t, vm_prot_t); 72 73 /* --------------------------------------------------------------------- */ 74 75 /* 76 * Main bootinfo structure. This is filled in by the bootstrap process 77 * done in locore.S based on the information passed by the boot loader. 78 */ 79 struct bootinfo bootinfo; 80 81 /* --------------------------------------------------------------------- */ 82 83 static kauth_listener_t x86_listener; 84 85 /* 86 * Given the type of a bootinfo entry, looks for a matching item inside 87 * the bootinfo structure. If found, returns a pointer to it (which must 88 * then be casted to the appropriate bootinfo_* type); otherwise, returns 89 * NULL. 90 */ 91 void * 92 lookup_bootinfo(int type) 93 { 94 bool found; 95 int i; 96 struct btinfo_common *bic; 97 98 bic = (struct btinfo_common *)(bootinfo.bi_data); 99 found = FALSE; 100 for (i = 0; i < bootinfo.bi_nentries && !found; i++) { 101 if (bic->type == type) 102 found = TRUE; 103 else 104 bic = (struct btinfo_common *) 105 ((uint8_t *)bic + bic->len); 106 } 107 108 return found ? bic : NULL; 109 } 110 111 /* 112 * check_pa_acc: check if given pa is accessible. 113 */ 114 int 115 check_pa_acc(paddr_t pa, vm_prot_t prot) 116 { 117 extern phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX]; 118 extern int mem_cluster_cnt; 119 int i; 120 121 for (i = 0; i < mem_cluster_cnt; i++) { 122 const phys_ram_seg_t *seg = &mem_clusters[i]; 123 paddr_t lstart = seg->start; 124 125 if (lstart <= pa && pa - lstart <= seg->size) { 126 return 0; 127 } 128 } 129 130 return kauth_authorize_machdep(kauth_cred_get(), 131 KAUTH_MACHDEP_UNMANAGEDMEM, NULL, NULL, NULL, NULL); 132 } 133 134 #ifdef MODULAR 135 /* 136 * Push any modules loaded by the boot loader. 137 */ 138 void 139 module_init_md(void) 140 { 141 struct btinfo_modulelist *biml; 142 struct bi_modulelist_entry *bi, *bimax; 143 144 biml = lookup_bootinfo(BTINFO_MODULELIST); 145 if (biml == NULL) { 146 aprint_debug("No module info at boot\n"); 147 return; 148 } 149 150 bi = (struct bi_modulelist_entry *)((uint8_t *)biml + sizeof(*biml)); 151 bimax = bi + biml->num; 152 for (; bi < bimax; bi++) { 153 if (bi->type != BI_MODULE_ELF) { 154 aprint_debug("Skipping non-ELF module\n"); 155 continue; 156 } 157 aprint_debug("Prep module path=%s len=%d pa=%x\n", bi->path, 158 bi->len, bi->base); 159 KASSERT(trunc_page(bi->base) == bi->base); 160 (void)module_prime((void *)((uintptr_t)bi->base + KERNBASE), 161 bi->len); 162 } 163 } 164 #endif /* MODULAR */ 165 166 void 167 cpu_need_resched(struct cpu_info *ci, int flags) 168 { 169 struct cpu_info *cur; 170 lwp_t *l; 171 172 KASSERT(kpreempt_disabled()); 173 cur = curcpu(); 174 l = ci->ci_data.cpu_onproc; 175 ci->ci_want_resched |= flags; 176 177 if (__predict_false((l->l_pflag & LP_INTR) != 0)) { 178 /* 179 * No point doing anything, it will switch soon. 180 * Also here to prevent an assertion failure in 181 * kpreempt() due to preemption being set on a 182 * soft interrupt LWP. 183 */ 184 return; 185 } 186 187 if (l == ci->ci_data.cpu_idlelwp) { 188 if (ci == cur) 189 return; 190 #ifndef XEN /* XXX review when Xen gets MP support */ 191 if (x86_cpu_idle_ipi != false) 192 x86_send_ipi(ci, 0); 193 #endif 194 return; 195 } 196 197 if ((flags & RESCHED_KPREEMPT) != 0) { 198 #ifdef __HAVE_PREEMPTION 199 atomic_or_uint(&l->l_dopreempt, DOPREEMPT_ACTIVE); 200 if (ci == cur) { 201 softint_trigger(1 << SIR_PREEMPT); 202 } else { 203 x86_send_ipi(ci, X86_IPI_KPREEMPT); 204 } 205 #endif 206 } else { 207 aston(l, X86_AST_PREEMPT); 208 if (ci == cur) { 209 return; 210 } 211 if ((flags & RESCHED_IMMED) != 0) { 212 x86_send_ipi(ci, 0); 213 } 214 } 215 } 216 217 void 218 cpu_signotify(struct lwp *l) 219 { 220 221 KASSERT(kpreempt_disabled()); 222 aston(l, X86_AST_GENERIC); 223 if (l->l_cpu != curcpu()) 224 x86_send_ipi(l->l_cpu, 0); 225 } 226 227 void 228 cpu_need_proftick(struct lwp *l) 229 { 230 231 KASSERT(kpreempt_disabled()); 232 KASSERT(l->l_cpu == curcpu()); 233 234 l->l_pflag |= LP_OWEUPC; 235 aston(l, X86_AST_GENERIC); 236 } 237 238 bool 239 cpu_intr_p(void) 240 { 241 int idepth; 242 243 kpreempt_disable(); 244 idepth = curcpu()->ci_idepth; 245 kpreempt_enable(); 246 return (idepth >= 0); 247 } 248 249 #ifdef __HAVE_PREEMPTION 250 /* 251 * Called to check MD conditions that would prevent preemption, and to 252 * arrange for those conditions to be rechecked later. 253 */ 254 bool 255 cpu_kpreempt_enter(uintptr_t where, int s) 256 { 257 struct cpu_info *ci; 258 struct pcb *pcb; 259 lwp_t *l; 260 261 KASSERT(kpreempt_disabled()); 262 263 l = curlwp; 264 ci = curcpu(); 265 266 /* 267 * If SPL raised, can't go. Note this implies that spin 268 * mutexes at IPL_NONE are _not_ valid to use. 269 */ 270 if (s > IPL_PREEMPT) { 271 softint_trigger(1 << SIR_PREEMPT); 272 aston(l, X86_AST_PREEMPT); /* paranoid */ 273 return false; 274 } 275 276 /* Must save cr2 or it could be clobbered. */ 277 pcb = lwp_getpcb(l); 278 pcb->pcb_cr2 = rcr2(); 279 280 return true; 281 } 282 283 /* 284 * Called after returning from a kernel preemption, and called with 285 * preemption disabled. 286 */ 287 void 288 cpu_kpreempt_exit(uintptr_t where) 289 { 290 extern char x86_copyfunc_start, x86_copyfunc_end; 291 struct pcb *pcb; 292 293 KASSERT(kpreempt_disabled()); 294 295 /* 296 * If we interrupted any of the copy functions we must reload 297 * the pmap when resuming, as they cannot tolerate it being 298 * swapped out. 299 */ 300 if (where >= (uintptr_t)&x86_copyfunc_start && 301 where < (uintptr_t)&x86_copyfunc_end) { 302 pmap_load(); 303 } 304 305 /* Restore cr2 only after the pmap, as pmap_load can block. */ 306 pcb = lwp_getpcb(curlwp); 307 lcr2(pcb->pcb_cr2); 308 } 309 310 /* 311 * Return true if preemption is disabled for MD reasons. Must be called 312 * with preemption disabled, and thus is only for diagnostic checks. 313 */ 314 bool 315 cpu_kpreempt_disabled(void) 316 { 317 318 return curcpu()->ci_ilevel > IPL_NONE; 319 } 320 #endif /* __HAVE_PREEMPTION */ 321 322 SYSCTL_SETUP(sysctl_machdep_cpu_idle, "sysctl machdep cpu_idle") 323 { 324 const struct sysctlnode *mnode, *node; 325 326 sysctl_createv(NULL, 0, NULL, &mnode, 327 CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL, 328 NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL); 329 330 sysctl_createv(NULL, 0, &mnode, &node, 331 CTLFLAG_PERMANENT, CTLTYPE_STRING, "idle-mechanism", 332 SYSCTL_DESCR("Mechanism used for the idle loop."), 333 NULL, 0, x86_cpu_idle_text, 0, 334 CTL_CREATE, CTL_EOL); 335 } 336 337 void 338 x86_cpu_idle_init(void) 339 { 340 341 #ifndef XEN 342 if ((cpu_feature[1] & CPUID2_MONITOR) == 0 || 343 cpu_vendor == CPUVENDOR_AMD) 344 x86_cpu_idle_set(x86_cpu_idle_halt, "halt", true); 345 else 346 x86_cpu_idle_set(x86_cpu_idle_mwait, "mwait", false); 347 #else 348 x86_cpu_idle_set(x86_cpu_idle_xen, "xen", false); 349 #endif 350 } 351 352 void 353 x86_cpu_idle_get(void (**func)(void), char *text, size_t len) 354 { 355 356 *func = x86_cpu_idle; 357 358 (void)strlcpy(text, x86_cpu_idle_text, len); 359 } 360 361 void 362 x86_cpu_idle_set(void (*func)(void), const char *text, bool ipi) 363 { 364 365 x86_cpu_idle = func; 366 x86_cpu_idle_ipi = ipi; 367 368 (void)strlcpy(x86_cpu_idle_text, text, sizeof(x86_cpu_idle_text)); 369 } 370 371 #ifndef XEN 372 373 #define KBTOB(x) ((size_t)(x) * 1024UL) 374 #define MBTOB(x) ((size_t)(x) * 1024UL * 1024UL) 375 376 extern paddr_t avail_start, avail_end; 377 378 static int 379 add_mem_cluster(phys_ram_seg_t *seg_clusters, int seg_cluster_cnt, 380 struct extent *iomem_ex, 381 uint64_t seg_start, uint64_t seg_end, uint32_t type) 382 { 383 uint64_t new_physmem = 0; 384 phys_ram_seg_t *cluster; 385 int i; 386 387 #ifdef i386 388 #ifdef PAE 389 #define TOPLIMIT 0x1000000000ULL /* 64GB */ 390 #else 391 #define TOPLIMIT 0x100000000ULL /* 4GB */ 392 #endif 393 #else 394 #define TOPLIMIT 0x100000000000ULL /* 16TB */ 395 #endif 396 397 if (seg_end > TOPLIMIT) { 398 aprint_verbose("WARNING: skipping large memory map entry: " 399 "0x%"PRIx64"/0x%"PRIx64"/0x%x\n", 400 seg_start, 401 (seg_end - seg_start), 402 type); 403 return seg_cluster_cnt; 404 } 405 406 /* 407 * XXX Chop the last page off the size so that 408 * XXX it can fit in avail_end. 409 */ 410 if (seg_end == TOPLIMIT) 411 seg_end -= PAGE_SIZE; 412 413 if (seg_end <= seg_start) 414 return seg_cluster_cnt; 415 416 for (i = 0; i < seg_cluster_cnt; i++) { 417 cluster = &seg_clusters[i]; 418 if ((cluster->start == round_page(seg_start)) 419 && (cluster->size == trunc_page(seg_end) - cluster->start)) 420 { 421 #ifdef DEBUG_MEMLOAD 422 printf("WARNING: skipping duplicate segment entry\n"); 423 #endif 424 return seg_cluster_cnt; 425 } 426 } 427 428 /* 429 * Allocate the physical addresses used by RAM 430 * from the iomem extent map. This is done before 431 * the addresses are page rounded just to make 432 * sure we get them all. 433 */ 434 if (seg_start < 0x100000000ULL) { 435 uint64_t io_end; 436 437 if (seg_end > 0x100000000ULL) 438 io_end = 0x100000000ULL; 439 else 440 io_end = seg_end; 441 442 if (iomem_ex != NULL && extent_alloc_region(iomem_ex, seg_start, 443 io_end - seg_start, EX_NOWAIT)) { 444 /* XXX What should we do? */ 445 printf("WARNING: CAN't ALLOCATE MEMORY SEGMENT " 446 "(0x%"PRIx64"/0x%"PRIx64"/0x%x) FROM " 447 "IOMEM EXTENT MAP!\n", 448 seg_start, seg_end - seg_start, type); 449 return seg_cluster_cnt; 450 } 451 } 452 453 /* 454 * If it's not free memory, skip it. 455 */ 456 if (type != BIM_Memory) 457 return seg_cluster_cnt; 458 459 /* XXX XXX XXX */ 460 if (seg_cluster_cnt >= VM_PHYSSEG_MAX) 461 panic("%s: too many memory segments (increase VM_PHYSSEG_MAX)", 462 __func__); 463 464 #ifdef PHYSMEM_MAX_ADDR 465 if (seg_start >= MBTOB(PHYSMEM_MAX_ADDR)) 466 return seg_cluster_cnt; 467 if (seg_end > MBTOB(PHYSMEM_MAX_ADDR)) 468 seg_end = MBTOB(PHYSMEM_MAX_ADDR); 469 #endif 470 471 seg_start = round_page(seg_start); 472 seg_end = trunc_page(seg_end); 473 474 if (seg_start == seg_end) 475 return seg_cluster_cnt; 476 477 cluster = &seg_clusters[seg_cluster_cnt]; 478 cluster->start = seg_start; 479 if (iomem_ex != NULL) 480 new_physmem = physmem + atop(seg_end - seg_start); 481 482 #ifdef PHYSMEM_MAX_SIZE 483 if (iomem_ex != NULL) { 484 if (physmem >= atop(MBTOB(PHYSMEM_MAX_SIZE))) 485 return seg_cluster_cnt; 486 if (new_physmem > atop(MBTOB(PHYSMEM_MAX_SIZE))) { 487 seg_end = seg_start + MBTOB(PHYSMEM_MAX_SIZE) - ptoa(physmem); 488 new_physmem = atop(MBTOB(PHYSMEM_MAX_SIZE)); 489 } 490 } 491 #endif 492 493 cluster->size = seg_end - seg_start; 494 495 if (iomem_ex != NULL) { 496 if (avail_end < seg_end) 497 avail_end = seg_end; 498 physmem = new_physmem; 499 } 500 seg_cluster_cnt++; 501 502 return seg_cluster_cnt; 503 } 504 505 int 506 initx86_parse_memmap(struct btinfo_memmap *bim, struct extent *iomem_ex) 507 { 508 uint64_t seg_start, seg_end; 509 uint64_t addr, size; 510 uint32_t type; 511 int x; 512 513 KASSERT(bim != NULL); 514 KASSERT(bim->num > 0); 515 516 #ifdef DEBUG_MEMLOAD 517 printf("BIOS MEMORY MAP (%d ENTRIES):\n", bim->num); 518 #endif 519 for (x = 0; x < bim->num; x++) { 520 addr = bim->entry[x].addr; 521 size = bim->entry[x].size; 522 type = bim->entry[x].type; 523 #ifdef DEBUG_MEMLOAD 524 printf(" addr 0x%"PRIx64" size 0x%"PRIx64" type 0x%x\n", 525 addr, size, type); 526 #endif 527 528 /* 529 * If the segment is not memory, skip it. 530 */ 531 switch (type) { 532 case BIM_Memory: 533 case BIM_ACPI: 534 case BIM_NVS: 535 break; 536 default: 537 continue; 538 } 539 540 /* 541 * If the segment is smaller than a page, skip it. 542 */ 543 if (size < NBPG) 544 continue; 545 546 seg_start = addr; 547 seg_end = addr + size; 548 549 /* 550 * Avoid Compatibility Holes. 551 * XXX Holes within memory space that allow access 552 * XXX to be directed to the PC-compatible frame buffer 553 * XXX (0xa0000-0xbffff), to adapter ROM space 554 * XXX (0xc0000-0xdffff), and to system BIOS space 555 * XXX (0xe0000-0xfffff). 556 * XXX Some laptop(for example,Toshiba Satellite2550X) 557 * XXX report this area and occurred problems, 558 * XXX so we avoid this area. 559 */ 560 if (seg_start < 0x100000 && seg_end > 0xa0000) { 561 printf("WARNING: memory map entry overlaps " 562 "with ``Compatibility Holes'': " 563 "0x%"PRIx64"/0x%"PRIx64"/0x%x\n", seg_start, 564 seg_end - seg_start, type); 565 mem_cluster_cnt = add_mem_cluster( 566 mem_clusters, mem_cluster_cnt, iomem_ex, 567 seg_start, 0xa0000, type); 568 mem_cluster_cnt = add_mem_cluster( 569 mem_clusters, mem_cluster_cnt, iomem_ex, 570 0x100000, seg_end, type); 571 } else 572 mem_cluster_cnt = add_mem_cluster( 573 mem_clusters, mem_cluster_cnt, iomem_ex, 574 seg_start, seg_end, type); 575 } 576 577 return 0; 578 } 579 580 int 581 initx86_fake_memmap(struct extent *iomem_ex) 582 { 583 phys_ram_seg_t *cluster; 584 KASSERT(mem_cluster_cnt == 0); 585 586 /* 587 * Allocate the physical addresses used by RAM from the iomem 588 * extent map. This is done before the addresses are 589 * page rounded just to make sure we get them all. 590 */ 591 if (extent_alloc_region(iomem_ex, 0, KBTOB(biosbasemem), 592 EX_NOWAIT)) 593 { 594 /* XXX What should we do? */ 595 printf("WARNING: CAN'T ALLOCATE BASE MEMORY FROM " 596 "IOMEM EXTENT MAP!\n"); 597 } 598 599 cluster = &mem_clusters[0]; 600 cluster->start = 0; 601 cluster->size = trunc_page(KBTOB(biosbasemem)); 602 physmem += atop(cluster->size); 603 604 if (extent_alloc_region(iomem_ex, IOM_END, KBTOB(biosextmem), 605 EX_NOWAIT)) 606 { 607 /* XXX What should we do? */ 608 printf("WARNING: CAN'T ALLOCATE EXTENDED MEMORY FROM " 609 "IOMEM EXTENT MAP!\n"); 610 } 611 612 #if NISADMA > 0 613 /* 614 * Some motherboards/BIOSes remap the 384K of RAM that would 615 * normally be covered by the ISA hole to the end of memory 616 * so that it can be used. However, on a 16M system, this 617 * would cause bounce buffers to be allocated and used. 618 * This is not desirable behaviour, as more than 384K of 619 * bounce buffers might be allocated. As a work-around, 620 * we round memory down to the nearest 1M boundary if 621 * we're using any isadma devices and the remapped memory 622 * is what puts us over 16M. 623 */ 624 if (biosextmem > (15*1024) && biosextmem < (16*1024)) { 625 char pbuf[9]; 626 627 format_bytes(pbuf, sizeof(pbuf), 628 biosextmem - (15*1024)); 629 printf("Warning: ignoring %s of remapped memory\n", 630 pbuf); 631 biosextmem = (15*1024); 632 } 633 #endif 634 cluster = &mem_clusters[1]; 635 cluster->start = IOM_END; 636 cluster->size = trunc_page(KBTOB(biosextmem)); 637 physmem += atop(cluster->size); 638 639 mem_cluster_cnt = 2; 640 641 avail_end = IOM_END + trunc_page(KBTOB(biosextmem)); 642 643 return 0; 644 } 645 646 #ifdef amd64 647 extern vaddr_t kern_end; 648 extern vaddr_t module_start, module_end; 649 #endif 650 651 int 652 initx86_load_memmap(paddr_t first_avail) 653 { 654 uint64_t seg_start, seg_end; 655 uint64_t seg_start1, seg_end1; 656 int first16q, x; 657 #ifdef VM_FREELIST_FIRST4G 658 int first4gq; 659 #endif 660 661 /* 662 * If we have 16M of RAM or less, just put it all on 663 * the default free list. Otherwise, put the first 664 * 16M of RAM on a lower priority free list (so that 665 * all of the ISA DMA'able memory won't be eaten up 666 * first-off). 667 */ 668 #define ADDR_16M (16 * 1024 * 1024) 669 670 if (avail_end <= ADDR_16M) 671 first16q = VM_FREELIST_DEFAULT; 672 else 673 first16q = VM_FREELIST_FIRST16; 674 675 #ifdef VM_FREELIST_FIRST4G 676 /* 677 * If we have 4G of RAM or less, just put it all on 678 * the default free list. Otherwise, put the first 679 * 4G of RAM on a lower priority free list (so that 680 * all of the 32bit PCI DMA'able memory won't be eaten up 681 * first-off). 682 */ 683 #define ADDR_4G (4ULL * 1024 * 1024 * 1024) 684 if (avail_end <= ADDR_4G) 685 first4gq = VM_FREELIST_DEFAULT; 686 else 687 first4gq = VM_FREELIST_FIRST4G; 688 #endif /* defined(VM_FREELIST_FIRST4G) */ 689 690 /* Make sure the end of the space used by the kernel is rounded. */ 691 first_avail = round_page(first_avail); 692 693 #ifdef amd64 694 kern_end = KERNBASE + first_avail; 695 module_start = kern_end; 696 module_end = KERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2; 697 #endif 698 699 /* 700 * Now, load the memory clusters (which have already been 701 * rounded and truncated) into the VM system. 702 * 703 * NOTE: WE ASSUME THAT MEMORY STARTS AT 0 AND THAT THE KERNEL 704 * IS LOADED AT IOM_END (1M). 705 */ 706 for (x = 0; x < mem_cluster_cnt; x++) { 707 const phys_ram_seg_t *cluster = &mem_clusters[x]; 708 709 seg_start = cluster->start; 710 seg_end = cluster->start + cluster->size; 711 seg_start1 = 0; 712 seg_end1 = 0; 713 714 /* 715 * Skip memory before our available starting point. 716 */ 717 if (seg_end <= avail_start) 718 continue; 719 720 if (avail_start >= seg_start && avail_start < seg_end) { 721 if (seg_start != 0) 722 panic("init_x86_64: memory doesn't start at 0"); 723 seg_start = avail_start; 724 if (seg_start == seg_end) 725 continue; 726 } 727 728 /* 729 * If this segment contains the kernel, split it 730 * in two, around the kernel. 731 */ 732 if (seg_start <= IOM_END && first_avail <= seg_end) { 733 seg_start1 = first_avail; 734 seg_end1 = seg_end; 735 seg_end = IOM_END; 736 KASSERT(seg_end < seg_end1); 737 } 738 739 /* First hunk */ 740 if (seg_start != seg_end) { 741 if (seg_start < ADDR_16M && 742 first16q != VM_FREELIST_DEFAULT) { 743 uint64_t tmp; 744 745 if (seg_end > ADDR_16M) 746 tmp = ADDR_16M; 747 else 748 tmp = seg_end; 749 750 if (tmp != seg_start) { 751 #ifdef DEBUG_MEMLOAD 752 printf("loading first16q 0x%"PRIx64 753 "-0x%"PRIx64 754 " (0x%"PRIx64"-0x%"PRIx64")\n", 755 seg_start, tmp, 756 (uint64_t)atop(seg_start), 757 (uint64_t)atop(tmp)); 758 #endif 759 uvm_page_physload(atop(seg_start), 760 atop(tmp), atop(seg_start), 761 atop(tmp), first16q); 762 } 763 seg_start = tmp; 764 } 765 766 #ifdef VM_FREELIST_FIRST4G 767 if (seg_start < ADDR_4G && 768 first4gq != VM_FREELIST_DEFAULT) { 769 uint64_t tmp; 770 771 if (seg_end > ADDR_4G) 772 tmp = ADDR_4G; 773 else 774 tmp = seg_end; 775 776 if (tmp != seg_start) { 777 #ifdef DEBUG_MEMLOAD 778 printf("loading first4gq 0x%"PRIx64 779 "-0x%"PRIx64 780 " (0x%"PRIx64"-0x%"PRIx64")\n", 781 seg_start, tmp, 782 (uint64_t)atop(seg_start), 783 (uint64_t)atop(tmp)); 784 #endif 785 uvm_page_physload(atop(seg_start), 786 atop(tmp), atop(seg_start), 787 atop(tmp), first4gq); 788 } 789 seg_start = tmp; 790 } 791 #endif /* defined(VM_FREELIST_FIRST4G) */ 792 793 if (seg_start != seg_end) { 794 #ifdef DEBUG_MEMLOAD 795 printf("loading default 0x%"PRIx64"-0x%"PRIx64 796 " (0x%"PRIx64"-0x%"PRIx64")\n", 797 seg_start, seg_end, 798 (uint64_t)atop(seg_start), 799 (uint64_t)atop(seg_end)); 800 #endif 801 uvm_page_physload(atop(seg_start), 802 atop(seg_end), atop(seg_start), 803 atop(seg_end), VM_FREELIST_DEFAULT); 804 } 805 } 806 807 /* Second hunk */ 808 if (seg_start1 != seg_end1) { 809 if (seg_start1 < ADDR_16M && 810 first16q != VM_FREELIST_DEFAULT) { 811 uint64_t tmp; 812 813 if (seg_end1 > ADDR_16M) 814 tmp = ADDR_16M; 815 else 816 tmp = seg_end1; 817 818 if (tmp != seg_start1) { 819 #ifdef DEBUG_MEMLOAD 820 printf("loading first16q 0x%"PRIx64 821 "-0x%"PRIx64 822 " (0x%"PRIx64"-0x%"PRIx64")\n", 823 seg_start1, tmp, 824 (uint64_t)atop(seg_start1), 825 (uint64_t)atop(tmp)); 826 #endif 827 uvm_page_physload(atop(seg_start1), 828 atop(tmp), atop(seg_start1), 829 atop(tmp), first16q); 830 } 831 seg_start1 = tmp; 832 } 833 834 #ifdef VM_FREELIST_FIRST4G 835 if (seg_start1 < ADDR_4G && 836 first4gq != VM_FREELIST_DEFAULT) { 837 uint64_t tmp; 838 839 if (seg_end1 > ADDR_4G) 840 tmp = ADDR_4G; 841 else 842 tmp = seg_end1; 843 844 if (tmp != seg_start1) { 845 #ifdef DEBUG_MEMLOAD 846 printf("loading first4gq 0x%"PRIx64 847 "-0x%"PRIx64 848 " (0x%"PRIx64"-0x%"PRIx64")\n", 849 seg_start1, tmp, 850 (uint64_t)atop(seg_start1), 851 (uint64_t)atop(tmp)); 852 #endif 853 uvm_page_physload(atop(seg_start1), 854 atop(tmp), atop(seg_start1), 855 atop(tmp), first4gq); 856 } 857 seg_start1 = tmp; 858 } 859 #endif /* defined(VM_FREELIST_FIRST4G) */ 860 861 if (seg_start1 != seg_end1) { 862 #ifdef DEBUG_MEMLOAD 863 printf("loading default 0x%"PRIx64"-0x%"PRIx64 864 " (0x%"PRIx64"-0x%"PRIx64")\n", 865 seg_start1, seg_end1, 866 (uint64_t)atop(seg_start1), 867 (uint64_t)atop(seg_end1)); 868 #endif 869 uvm_page_physload(atop(seg_start1), 870 atop(seg_end1), atop(seg_start1), 871 atop(seg_end1), VM_FREELIST_DEFAULT); 872 } 873 } 874 } 875 876 return 0; 877 } 878 #endif 879 880 void 881 x86_reset(void) 882 { 883 uint8_t b; 884 /* 885 * The keyboard controller has 4 random output pins, one of which is 886 * connected to the RESET pin on the CPU in many PCs. We tell the 887 * keyboard controller to pulse this line a couple of times. 888 */ 889 outb(IO_KBD + KBCMDP, KBC_PULSE0); 890 delay(100000); 891 outb(IO_KBD + KBCMDP, KBC_PULSE0); 892 delay(100000); 893 894 /* 895 * Attempt to force a reset via the Reset Control register at 896 * I/O port 0xcf9. Bit 2 forces a system reset when it 897 * transitions from 0 to 1. Bit 1 selects the type of reset 898 * to attempt: 0 selects a "soft" reset, and 1 selects a 899 * "hard" reset. We try a "hard" reset. The first write sets 900 * bit 1 to select a "hard" reset and clears bit 2. The 901 * second write forces a 0 -> 1 transition in bit 2 to trigger 902 * a reset. 903 */ 904 outb(0xcf9, 0x2); 905 outb(0xcf9, 0x6); 906 DELAY(500000); /* wait 0.5 sec to see if that did it */ 907 908 /* 909 * Attempt to force a reset via the Fast A20 and Init register 910 * at I/O port 0x92. Bit 1 serves as an alternate A20 gate. 911 * Bit 0 asserts INIT# when set to 1. We are careful to only 912 * preserve bit 1 while setting bit 0. We also must clear bit 913 * 0 before setting it if it isn't already clear. 914 */ 915 b = inb(0x92); 916 if (b != 0xff) { 917 if ((b & 0x1) != 0) 918 outb(0x92, b & 0xfe); 919 outb(0x92, b | 0x1); 920 DELAY(500000); /* wait 0.5 sec to see if that did it */ 921 } 922 } 923 924 static int 925 x86_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 926 void *arg0, void *arg1, void *arg2, void *arg3) 927 { 928 int result; 929 930 result = KAUTH_RESULT_DEFER; 931 932 switch (action) { 933 case KAUTH_MACHDEP_IOPERM_GET: 934 case KAUTH_MACHDEP_LDT_GET: 935 case KAUTH_MACHDEP_LDT_SET: 936 case KAUTH_MACHDEP_MTRR_GET: 937 result = KAUTH_RESULT_ALLOW; 938 939 break; 940 941 default: 942 break; 943 } 944 945 return result; 946 } 947 948 void 949 machdep_init(void) 950 { 951 952 x86_listener = kauth_listen_scope(KAUTH_SCOPE_MACHDEP, 953 x86_listener_cb, NULL); 954 } 955 956 /* 957 * x86_startup: x86 common startup routine 958 * 959 * called by cpu_startup. 960 */ 961 962 void 963 x86_startup(void) 964 { 965 966 #if !defined(XEN) 967 nmi_init(); 968 #endif /* !defined(XEN) */ 969 } 970