1/* $NetBSD: locore.S,v 1.108 2016/07/25 16:03:38 maxv Exp $ */ 2 3/* 4 * Copyright-o-rama! 5 */ 6 7/* 8 * Copyright (c) 1998, 2000, 2007, 2008, 2016 The NetBSD Foundation, Inc. 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to The NetBSD Foundation 12 * by Charles M. Hannum and by Maxime Villard. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36/* 37 * Copyright (c) 2007 Manuel Bouyer. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 49 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 50 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 51 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 52 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 53 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 54 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 55 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 56 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 57 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 58 * 59 */ 60 61/* 62 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> 63 * 64 * Permission to use, copy, modify, and distribute this software for any 65 * purpose with or without fee is hereby granted, provided that the above 66 * copyright notice and this permission notice appear in all copies. 67 * 68 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 69 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 70 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 71 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 72 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 73 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 74 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 75 */ 76 77/* 78 * Copyright (c) 2001 Wasabi Systems, Inc. 79 * All rights reserved. 80 * 81 * Written by Frank van der Linden for Wasabi Systems, Inc. 82 * 83 * Redistribution and use in source and binary forms, with or without 84 * modification, are permitted provided that the following conditions 85 * are met: 86 * 1. Redistributions of source code must retain the above copyright 87 * notice, this list of conditions and the following disclaimer. 88 * 2. Redistributions in binary form must reproduce the above copyright 89 * notice, this list of conditions and the following disclaimer in the 90 * documentation and/or other materials provided with the distribution. 91 * 3. All advertising materials mentioning features or use of this software 92 * must display the following acknowledgement: 93 * This product includes software developed for the NetBSD Project by 94 * Wasabi Systems, Inc. 95 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 96 * or promote products derived from this software without specific prior 97 * written permission. 98 * 99 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 100 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 101 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 102 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 103 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 104 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 105 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 106 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 107 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 108 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 109 * POSSIBILITY OF SUCH DAMAGE. 110 */ 111 112/*- 113 * Copyright (c) 1990 The Regents of the University of California. 114 * All rights reserved. 115 * 116 * This code is derived from software contributed to Berkeley by 117 * William Jolitz. 118 * 119 * Redistribution and use in source and binary forms, with or without 120 * modification, are permitted provided that the following conditions 121 * are met: 122 * 1. Redistributions of source code must retain the above copyright 123 * notice, this list of conditions and the following disclaimer. 124 * 2. Redistributions in binary form must reproduce the above copyright 125 * notice, this list of conditions and the following disclaimer in the 126 * documentation and/or other materials provided with the distribution. 127 * 3. Neither the name of the University nor the names of its contributors 128 * may be used to endorse or promote products derived from this software 129 * without specific prior written permission. 130 * 131 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 132 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 133 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 134 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 135 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 136 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 137 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 138 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 139 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 140 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 141 * SUCH DAMAGE. 142 * 143 * @(#)locore.s 7.3 (Berkeley) 5/13/91 144 */ 145 146/* Override user-land alignment before including asm.h */ 147#define ALIGN_DATA .align 8 148#define ALIGN_TEXT .align 16,0x90 149#define _ALIGN_TEXT ALIGN_TEXT 150 151#include <machine/asm.h> 152 153#include "opt_copy_symtab.h" 154#include "opt_ddb.h" 155#include "opt_ddbparam.h" 156#include "opt_modular.h" 157#include "opt_realmem.h" 158 159#include "opt_compat_netbsd.h" 160#include "opt_compat_netbsd32.h" 161#include "opt_compat_ibcs2.h" 162#include "opt_xen.h" 163 164#include "assym.h" 165#include "lapic.h" 166#include "ioapic.h" 167#include "ksyms.h" 168 169#include <sys/errno.h> 170#include <sys/syscall.h> 171 172#include <machine/pte.h> 173#include <machine/segments.h> 174#include <machine/specialreg.h> 175#include <machine/trap.h> 176#include <machine/bootinfo.h> 177#include <machine/frameasm.h> 178#include <machine/cputypes.h> 179 180#if NLAPIC > 0 181#include <machine/i82489reg.h> 182#endif 183 184/* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */ 185#include <dev/isa/isareg.h> 186 187#define _RELOC(x) ((x) - KERNBASE) 188#define RELOC(x) _RELOC(_C_LABEL(x)) 189 190/* 32bit version of PG_NX */ 191#define PG_NX32 0x80000000 192 193#if L2_SLOT_KERNBASE > 0 194#define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1)) 195#else 196#define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1) 197#endif 198 199#if L3_SLOT_KERNBASE > 0 200#define TABLE_L3_ENTRIES (2 * NKL3_KIMG_ENTRIES) 201#else 202#define TABLE_L3_ENTRIES NKL3_KIMG_ENTRIES 203#endif 204 205#define PROC0_PML4_OFF 0 206#define PROC0_STK_OFF (PROC0_PML4_OFF + 1 * PAGE_SIZE) 207#define PROC0_PTP3_OFF (PROC0_STK_OFF + UPAGES * PAGE_SIZE) 208#define PROC0_PTP2_OFF (PROC0_PTP3_OFF + NKL4_KIMG_ENTRIES * PAGE_SIZE) 209#define PROC0_PTP1_OFF (PROC0_PTP2_OFF + TABLE_L3_ENTRIES * PAGE_SIZE) 210#define TABLESIZE \ 211 ((NKL4_KIMG_ENTRIES + TABLE_L3_ENTRIES + TABLE_L2_ENTRIES + 1 + UPAGES) \ 212 * PAGE_SIZE) 213 214/* 215 * fillkpt - Fill in a kernel page table 216 * eax = pte (page frame | control | status) 217 * ebx = page table address 218 * ecx = number of pages to map 219 * 220 * Each entry is 8 (PDE_SIZE) bytes long: we must set the 4 upper bytes to 0. 221 */ 222#define fillkpt \ 223 cmpl $0,%ecx ; /* zero-sized? */ \ 224 je 2f ; \ 2251: movl $0,(PDE_SIZE-4)(%ebx) ; /* upper 32 bits: 0 */ \ 226 movl %eax,(%ebx) ; /* store phys addr */ \ 227 addl $PDE_SIZE,%ebx ; /* next PTE/PDE */ \ 228 addl $PAGE_SIZE,%eax ; /* next phys page */ \ 229 loop 1b ; \ 2302: ; 231 232/* 233 * fillkpt_nox - Same as fillkpt, but sets the NX/XD bit. 234 */ 235#define fillkpt_nox \ 236 cmpl $0,%ecx ; /* zero-sized? */ \ 237 je 2f ; \ 238 pushl %ebp ; \ 239 movl RELOC(nox_flag),%ebp ; \ 2401: movl %ebp,(PDE_SIZE-4)(%ebx) ; /* upper 32 bits: NX */ \ 241 movl %eax,(%ebx) ; /* store phys addr */ \ 242 addl $PDE_SIZE,%ebx ; /* next PTE/PDE */ \ 243 addl $PAGE_SIZE,%eax ; /* next phys page */ \ 244 loop 1b ; \ 245 popl %ebp ; \ 2462: ; 247 248/* 249 * fillkpt_blank - Fill in a kernel page table with blank entries 250 * ebx = page table address 251 * ecx = number of pages to map 252 */ 253#define fillkpt_blank \ 254 cmpl $0,%ecx ; /* zero-sized? */ \ 255 je 2f ; \ 2561: movl $0,(PDE_SIZE-4)(%ebx) ; /* upper 32 bits: 0 */ \ 257 movl $0,(%ebx) ; /* lower 32 bits: 0 */ \ 258 addl $PDE_SIZE,%ebx ; /* next PTE/PDE */ \ 259 loop 1b ; \ 2602: ; 261 262/* 263 * killkpt - Destroy a kernel page table (long mode) 264 * rbx = page table address 265 * rcx = number of pages to destroy 266 */ 267#define killkpt \ 2681: movq $0,(%rbx) ; \ 269 addq $PDE_SIZE,%rbx ; \ 270 loop 1b ; 271 272 273#ifdef XEN 274#define __ASSEMBLY__ 275#include <xen/xen-public/elfnote.h> 276#include <xen/xen-public/xen.h> 277#define ELFNOTE(name, type, desctype, descdata...) \ 278.pushsection .note.name ; \ 279 .align 4 ; \ 280 .long 2f - 1f /* namesz */ ; \ 281 .long 4f - 3f /* descsz */ ; \ 282 .long type ; \ 2831:.asciz #name ; \ 2842:.align 4 ; \ 2853:desctype descdata ; \ 2864:.align 4 ; \ 287.popsection 288 289/* 290 * Xen guest identifier and loader selection 291 */ 292.section __xen_guest 293 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "NetBSD") 294 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "4.99") 295 ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0") 296 ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .quad, KERNBASE) 297 ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, KERNBASE) 298 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .quad, start) 299 ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad, hypercall_page) 300 ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .quad, HYPERVISOR_VIRT_START) 301 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "") 302 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes") 303 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, PG_V, PG_V)\ 304 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic") 305 ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 0) 306#if NKSYMS > 0 || defined(DDB) || defined(MODULAR) 307 ELFNOTE(Xen, XEN_ELFNOTE_BSD_SYMTAB, .asciz, "yes") 308#endif 309#endif /* XEN */ 310 311/* 312 * Initialization 313 */ 314 .data 315 316#if NLAPIC > 0 317 .align PAGE_SIZE 318 .globl _C_LABEL(local_apic) 319 .globl _C_LABEL(lapic_tpr) 320 321 .type _C_LABEL(local_apic), @object 322LABEL(local_apic) 323 .space LAPIC_TPRI 324END(local_apic) 325 .type _C_LABEL(lapic_tpr), @object 326LABEL(lapic_tpr) 327 .space PAGE_SIZE-LAPIC_TPRI 328END(lapic_tpr) 329#endif /* NLAPIC > 0 */ 330 331 .globl _C_LABEL(tablesize) 332 .globl _C_LABEL(nox_flag) 333 .globl _C_LABEL(cputype) 334 .globl _C_LABEL(cpuid_level) 335 .globl _C_LABEL(esym) 336 .globl _C_LABEL(eblob) 337 .globl _C_LABEL(atdevbase) 338 .globl _C_LABEL(PDPpaddr) 339 .globl _C_LABEL(boothowto) 340 .globl _C_LABEL(bootinfo) 341 .globl _C_LABEL(biosbasemem) 342 .globl _C_LABEL(biosextmem) 343 .globl _C_LABEL(gdtstore) 344 345 .type _C_LABEL(tablesize), @object 346_C_LABEL(tablesize): .long TABLESIZE 347END(tablesize) 348 .type _C_LABEL(nox_flag), @object 349LABEL(nox_flag) .long 0 /* 32bit NOX flag, set if supported */ 350END(nox_flag) 351 .type _C_LABEL(cputype), @object 352LABEL(cputype) .long 0 /* are we 80486, Pentium, or.. */ 353END(cputype) 354 .type _C_LABEL(cpuid_level), @object 355LABEL(cpuid_level) .long -1 /* max. level accepted by cpuid instr */ 356END(cpuid_level) 357 .type _C_LABEL(esym), @object 358LABEL(esym) .quad 0 /* ptr to end of syms */ 359END(esym) 360 .type _C_LABEL(eblob), @object 361LABEL(eblob) .quad 0 /* ptr to end of modules */ 362END(eblob) 363 .type _C_LABEL(atdevbase), @object 364LABEL(atdevbase) .quad 0 /* location of start of iomem in virt */ 365END(atdevbase) 366 .type _C_LABEL(PDPpaddr), @object 367LABEL(PDPpaddr) .quad 0 /* paddr of PTD, for libkvm */ 368END(PDPpaddr) 369 .type _C_LABEL(biosbasemem), @object 370#ifndef REALBASEMEM 371LABEL(biosbasemem) .long 0 /* base memory reported by BIOS */ 372#else 373LABEL(biosbasemem) .long REALBASEMEM 374#endif 375END(biosbasemem) 376 .type _C_LABEL(biosextmem), @object 377#ifndef REALEXTMEM 378LABEL(biosextmem) .long 0 /* extended memory reported by BIOS */ 379#else 380LABEL(biosextmem) .long REALEXTMEM 381#endif 382END(biosextmem) 383 384#ifndef XEN 385 .globl gdt64_lo 386 .globl gdt64_hi 387 388#define GDT64_LIMIT gdt64_end-gdt64_start-1 389/* Temporary gdt64, with base address in low memory */ 390 .type _C_LABEL(gdt64_lo), @object 391LABEL(gdt64_lo) 392 .word GDT64_LIMIT 393 .quad _RELOC(gdt64_start) 394END(gdt64_lo) 395.align 64 396 397/* Temporary gdt64, with base address in high memory */ 398 .type _C_LABEL(gdt64_hi), @object 399LABEL(gdt64_hi) 400 .word GDT64_LIMIT 401 .quad gdt64_start 402END(gdt64_hi) 403.align 64 404#undef GDT64_LIMIT 405 406 .type _C_LABEL(gdt64_start), @object 407_C_LABEL(gdt64_start): 408 .quad 0x0000000000000000 /* always empty */ 409 .quad 0x00af9a000000ffff /* kernel CS */ 410 .quad 0x00cf92000000ffff /* kernel DS */ 411END(gdt64_start) 412gdt64_end: 413 414 .type _C_LABEL(farjmp64), @object 415_C_LABEL(farjmp64): 416 .long _RELOC(longmode) 417 .word GSEL(GCODE_SEL, SEL_KPL) 418END(farjmp64) 419 420#endif /* !XEN */ 421 422 /* Space for the temporary stack */ 423 .size tmpstk, tmpstk - . 424 .space 512 425tmpstk: 426 427 .globl _C_LABEL(cpu_private) 428 .comm _C_LABEL(cpu_private),PAGE_SIZE,PAGE_SIZE 429 430/* 431 * Some hackage to deal with 64bit symbols in 32 bit mode. 432 * This may not be needed if things are cleaned up a little. 433 */ 434 435 .text 436 .globl _C_LABEL(kernel_text) 437 .set _C_LABEL(kernel_text),KERNTEXTOFF 438 439ENTRY(start) 440#ifndef XEN 441 .code32 442 443 /* Warm boot */ 444 movw $0x1234,0x472 445 446 /* 447 * Load parameters from the stack (32 bits): 448 * boothowto, [bootdev], bootinfo, esym, biosextmem, biosbasemem 449 * We are not interested in 'bootdev'. 450 */ 451 452 /* Load 'boothowto' */ 453 movl 4(%esp),%eax 454 movl %eax,RELOC(boothowto) 455 456 /* Load 'bootinfo' */ 457 movl 12(%esp),%eax 458 testl %eax,%eax /* bootinfo = NULL? */ 459 jz bootinfo_finished 460 461 movl (%eax),%ebx /* number of entries */ 462 movl $RELOC(bootinfo),%ebp 463 movl %ebp,%edx 464 addl $BOOTINFO_MAXSIZE,%ebp 465 movl %ebx,(%edx) 466 addl $4,%edx 467 468bootinfo_entryloop: 469 testl %ebx,%ebx /* no remaining entries? */ 470 jz bootinfo_finished 471 472 addl $4,%eax 473 movl (%eax),%ecx /* address of entry */ 474 pushl %edi 475 pushl %esi 476 pushl %eax 477 478 movl (%ecx),%eax /* btinfo_common::len (size of entry) */ 479 movl %edx,%edi 480 addl (%ecx),%edx /* update dest pointer */ 481 cmpl %ebp,%edx /* beyond bootinfo+BOOTINFO_MAXSIZE? */ 482 jg bootinfo_overflow 483 484 movl %ecx,%esi 485 movl %eax,%ecx 486 487 /* 488 * If any modules were loaded, record where they end. We'll need to 489 * skip over them. 490 */ 491 cmpl $BTINFO_MODULELIST,4(%esi) /* btinfo_common::type */ 492 jne 0f 493 494 pushl 12(%esi) /* btinfo_modulelist::endpa */ 495 popl RELOC(eblob) 496 addl $KERNBASE_LO,RELOC(eblob) 497 adcl $KERNBASE_HI,RELOC(eblob)+4 498 4990: 500 rep 501 movsb /* copy esi -> edi */ 502 popl %eax 503 popl %esi 504 popl %edi 505 subl $1,%ebx /* decrement the # of entries */ 506 jmp bootinfo_entryloop 507 508bootinfo_overflow: 509 /* 510 * Cleanup for overflow case. Pop the registers, and correct the number 511 * of entries. 512 */ 513 popl %eax 514 popl %esi 515 popl %edi 516 movl $RELOC(bootinfo),%ebp 517 movl %ebp,%edx 518 subl %ebx,(%edx) /* correct the number of entries */ 519 520bootinfo_finished: 521 /* Load 'esym' */ 522 movl 16(%esp),%eax 523 testl %eax,%eax /* esym = NULL? */ 524 jz 1f 525 526 addl $KERNBASE_LO,%eax 527 5281: 529 movl $RELOC(esym),%ebp 530 movl %eax,(%ebp) 531 movl $KERNBASE_HI,4(%ebp) 532 533 /* Load 'biosextmem' */ 534 movl $RELOC(biosextmem),%ebp 535 movl (%ebp),%eax 536 testl %eax,%eax /* already set? */ 537 jnz biosextmem_finished 538 539 movl 20(%esp),%eax 540 movl %eax,(%ebp) 541 542biosextmem_finished: 543 /* Load 'biosbasemem' */ 544 movl $RELOC(biosbasemem),%ebp 545 movl (%ebp),%eax 546 testl %eax,%eax /* already set? */ 547 jnz biosbasemem_finished 548 549 movl 24(%esp),%eax 550 movl %eax,(%ebp) 551 552biosbasemem_finished: 553 /* 554 * Done with the parameters! 555 */ 556 557 /* First, reset the PSL. */ 558 pushl $PSL_MBO 559 popfl 560 561 xorl %eax,%eax 562 cpuid 563 movl %eax,RELOC(cpuid_level) 564 565 /* 566 * Finished with old stack; load new %esp now instead of later so we 567 * can trace this code without having to worry about the trace trap 568 * clobbering the memory test or the zeroing of the bss+bootstrap page 569 * tables. 570 * 571 * The boot program should check: 572 * text+data <= &stack_variable - more_space_for_stack 573 * text+data+bss+pad+space_for_page_tables <= end_of_memory 574 * 575 * XXX: the gdt is in the carcass of the boot program so clearing 576 * the rest of memory is still not possible. 577 */ 578 movl $RELOC(tmpstk),%esp 579 580 /* 581 * Retrieve the NX/XD flag. We use the 32bit version of PG_NX. 582 */ 583 movl $0x80000001,%eax 584 cpuid 585 andl $CPUID_NOX,%edx 586 jz no_NOX 587 movl $PG_NX32,RELOC(nox_flag) 588no_NOX: 589 590/* 591 * There are four levels of pages in amd64: PML4 -> PDP -> PD -> PT. They will 592 * be referred to as: L4 -> L3 -> L2 -> L1. 593 * 594 * Virtual address space of the kernel: 595 * +------+--------+------+-----+--------+---------------------+---------- 596 * | TEXT | RODATA | DATA | BSS | [SYMS] | [PRELOADED MODULES] | L4 -> 597 * +------+--------+------+-----+--------+---------------------+---------- 598 * (1) (2) (3) 599 * 600 * --------------+-----+-----+----+-------------+ 601 * -> PROC0 STK -> L3 -> L2 -> L1 | ISA I/O MEM | 602 * --------------+-----+-----+----+-------------+ 603 * (4) 604 * 605 * PROC0 STK is obviously not linked as a page level. It just happens to be 606 * caught between L4 and L3. 607 * 608 * (PROC0 STK + L4 + L3 + L2 + L1) is later referred to as BOOTSTRAP TABLES. 609 * 610 * Important note: the kernel segments are properly 4k-aligned 611 * (see kern.ldscript), so there's no need to enforce alignment. 612 */ 613 614 /* Find end of kernel image; brings us on (1). */ 615 movl $RELOC(end),%edi 616 617#if (NKSYMS || defined(DDB) || defined(MODULAR)) && !defined(makeoptions_COPY_SYMTAB) 618 /* Save the symbols (if loaded); brinds us on (2). */ 619 movl RELOC(esym),%eax 620 testl %eax,%eax 621 jz 1f 622 subl $KERNBASE_LO,%eax /* XXX */ 623 movl %eax,%edi 6241: 625#endif 626 /* Skip over any modules/blobs; brings us on (3). */ 627 movl RELOC(eblob),%eax 628 testl %eax,%eax 629 jz 1f 630 subl $KERNBASE_LO,%eax /* XXX */ 631 movl %eax,%edi 6321: 633 634 /* We are on (3). Align up for BOOTSTRAP TABLES. */ 635 movl %edi,%esi 636 addl $PGOFSET,%esi 637 andl $~PGOFSET,%esi 638 639 /* We are on the BOOTSTRAP TABLES. Save L4's physical address. */ 640 movl $RELOC(PDPpaddr),%ebp 641 movl %esi,(%ebp) 642 movl $0,4(%ebp) 643 644 /* Now, zero out the BOOTSTRAP TABLES (before filling them in). */ 645 movl %esi,%edi 646 xorl %eax,%eax 647 cld 648 movl $TABLESIZE,%ecx 649 shrl $2,%ecx 650 rep 651 stosl /* copy eax -> edi */ 652 653/* 654 * Build the page tables and levels. We go from L1 to L4, and link the levels 655 * together. Note: RELOC computes &addr - KERNBASE in 32 bits; the value can't 656 * be > 4G, or we can't deal with it anyway, since we are in 32bit mode. 657 */ 658 /* 659 * Build L1. 660 */ 661 leal (PROC0_PTP1_OFF)(%esi),%ebx 662 663 /* Skip the first MB. */ 664 movl $(KERNTEXTOFF_LO - KERNBASE_LO),%ecx 665 shrl $PGSHIFT,%ecx 666 fillkpt_blank 667 668 /* Map the kernel text RX. */ 669 movl $(KERNTEXTOFF_LO - KERNBASE_LO),%eax /* start of TEXT */ 670 movl $RELOC(__rodata_start),%ecx 671 subl %eax,%ecx 672 shrl $PGSHIFT,%ecx 673 orl $(PG_V|PG_KR),%eax 674 fillkpt 675 676 /* Map the kernel rodata R. */ 677 movl $RELOC(__rodata_start),%eax 678 movl $RELOC(__data_start),%ecx 679 subl %eax,%ecx 680 shrl $PGSHIFT,%ecx 681 orl $(PG_V|PG_KR),%eax 682 fillkpt_nox 683 684 /* Map the kernel data+bss RW. */ 685 movl $RELOC(__data_start),%eax 686 movl $RELOC(__kernel_end),%ecx 687 subl %eax,%ecx 688 shrl $PGSHIFT,%ecx 689 orl $(PG_V|PG_KW),%eax 690 fillkpt_nox 691 692 /* Map [SYMS]+[PRELOADED MODULES] RW. */ 693 movl $RELOC(__kernel_end),%eax 694 movl %esi,%ecx /* start of BOOTSTRAP TABLES */ 695 subl %eax,%ecx 696 shrl $PGSHIFT,%ecx 697 orl $(PG_V|PG_KW),%eax 698 fillkpt_nox 699 700 /* Map the BOOTSTRAP TABLES RW. */ 701 movl %esi,%eax /* start of BOOTSTRAP TABLES */ 702 movl $TABLESIZE,%ecx /* length of BOOTSTRAP TABLES */ 703 shrl $PGSHIFT,%ecx 704 orl $(PG_V|PG_KW),%eax 705 fillkpt_nox 706 707 /* We are on (4). Map ISA I/O MEM RW. */ 708 movl $IOM_BEGIN,%eax 709 movl $IOM_SIZE,%ecx /* size of ISA I/O MEM */ 710 shrl $PGSHIFT,%ecx 711 orl $(PG_V|PG_KW/*|PG_N*/),%eax 712 fillkpt_nox 713 714 /* 715 * Build L2. Linked to L1. 716 */ 717 leal (PROC0_PTP2_OFF)(%esi),%ebx 718 leal (PROC0_PTP1_OFF)(%esi),%eax 719 orl $(PG_V|PG_KW),%eax 720 movl $(NKL2_KIMG_ENTRIES+1),%ecx 721 fillkpt 722 723#if L2_SLOT_KERNBASE > 0 724 /* If needed, set up level 2 entries for actual kernel mapping */ 725 leal (PROC0_PTP2_OFF + L2_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx 726 leal (PROC0_PTP1_OFF)(%esi),%eax 727 orl $(PG_V|PG_KW),%eax 728 movl $(NKL2_KIMG_ENTRIES+1),%ecx 729 fillkpt 730#endif 731 732 /* 733 * Build L3. Linked to L2. 734 */ 735 leal (PROC0_PTP3_OFF)(%esi),%ebx 736 leal (PROC0_PTP2_OFF)(%esi),%eax 737 orl $(PG_V|PG_KW),%eax 738 movl $NKL3_KIMG_ENTRIES,%ecx 739 fillkpt 740 741#if L3_SLOT_KERNBASE > 0 742 /* If needed, set up level 3 entries for actual kernel mapping */ 743 leal (PROC0_PTP3_OFF + L3_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx 744 leal (PROC0_PTP2_OFF)(%esi),%eax 745 orl $(PG_V|PG_KW),%eax 746 movl $NKL3_KIMG_ENTRIES,%ecx 747 fillkpt 748#endif 749 750 /* 751 * Build L4 for identity mapping. Linked to L3. 752 */ 753 leal (PROC0_PML4_OFF)(%esi),%ebx 754 leal (PROC0_PTP3_OFF)(%esi),%eax 755 orl $(PG_V|PG_KW),%eax 756 movl $NKL4_KIMG_ENTRIES,%ecx 757 fillkpt 758 759 /* Set up L4 entries for actual kernel mapping */ 760 leal (PROC0_PML4_OFF + L4_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx 761 leal (PROC0_PTP3_OFF)(%esi),%eax 762 orl $(PG_V|PG_KW),%eax 763 movl $NKL4_KIMG_ENTRIES,%ecx 764 fillkpt 765 766 /* Install recursive top level PDE (one entry) */ 767 leal (PROC0_PML4_OFF + PDIR_SLOT_PTE * PDE_SIZE)(%esi),%ebx 768 leal (PROC0_PML4_OFF)(%esi),%eax 769 orl $(PG_V|PG_KW),%eax 770 movl $1,%ecx 771 fillkpt_nox 772 773 /* 774 * Startup checklist: 775 * 1. Enable PAE (and SSE while here). 776 */ 777 movl %cr4,%eax 778 orl $(CR4_PAE|CR4_OSFXSR|CR4_OSXMMEXCPT),%eax 779 movl %eax,%cr4 780 781 /* 782 * 2. Set Long Mode Enable in EFER. Also enable the syscall extensions, 783 * and NOX if available. 784 */ 785 movl $MSR_EFER,%ecx 786 rdmsr 787 xorl %eax,%eax /* XXX */ 788 orl $(EFER_LME|EFER_SCE),%eax 789 movl RELOC(nox_flag),%ebx 790 cmpl $0,%ebx 791 je skip_NOX 792 orl $(EFER_NXE),%eax 793skip_NOX: 794 wrmsr 795 796 /* 797 * 3. Load %cr3 with pointer to PML4. 798 */ 799 movl %esi,%eax 800 movl %eax,%cr3 801 802 /* 803 * 4. Enable paging and the rest of it. 804 */ 805 movl %cr0,%eax 806 orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP|CR0_AM),%eax 807 movl %eax,%cr0 808 jmp compat 809compat: 810 811 /* 812 * 5. Not quite done yet, we're now in a compatibility segment, in 813 * legacy mode. We must jump to a long mode segment. Need to set up 814 * a temporary GDT with a long mode segment in it to do that. 815 */ 816 movl $RELOC(gdt64_lo),%eax 817 lgdt (%eax) 818 movl $RELOC(farjmp64),%eax 819 ljmp *(%eax) 820 821 .code64 822longmode: 823 /* 824 * 6. Finally, we're in long mode. However, we're still in the identity 825 * mapped area (could not jump out of that earlier because it would 826 * have been a > 32bit jump). We can do that now, so here we go. 827 */ 828 movabsq $longmode_hi,%rax 829 jmp *%rax 830 831longmode_hi: 832 833 /* 834 * We left the identity mapped area. Base address of 835 * the temporary gdt64 should now be in high memory. 836 */ 837 movq $RELOC(gdt64_hi),%rax 838 lgdt (%rax) 839 840 /* 841 * We have arrived. There's no need anymore for the identity mapping in 842 * low memory, remove it. 843 */ 844 movq $KERNBASE,%r8 845 846#if L2_SLOT_KERNBASE > 0 847 movq $(NKL2_KIMG_ENTRIES+1),%rcx 848 leaq (PROC0_PTP2_OFF)(%rsi),%rbx /* old, phys address */ 849 addq %r8,%rbx /* new, virt address */ 850 killkpt 851#endif 852 853#if L3_SLOT_KERNBASE > 0 854 movq $NKL3_KIMG_ENTRIES,%rcx 855 leaq (PROC0_PTP3_OFF)(%rsi),%rbx /* old, phys address */ 856 addq %r8,%rbx /* new, virt address */ 857 killkpt 858#endif 859 860 movq $NKL4_KIMG_ENTRIES,%rcx 861 leaq (PROC0_PML4_OFF)(%rsi),%rbx /* old, phys address of PML4 */ 862 addq %r8,%rbx /* new, virt address of PML4 */ 863 killkpt 864 865 /* Relocate atdevbase. */ 866 movq $(TABLESIZE+KERNBASE),%rdx 867 addq %rsi,%rdx 868 movq %rdx,_C_LABEL(atdevbase)(%rip) 869 870 /* Set up bootstrap stack. */ 871 leaq (PROC0_STK_OFF)(%rsi),%rax 872 addq %r8,%rax 873 movq %rax,(_C_LABEL(lwp0)+L_PCB)(%rip) /* XXX L_PCB != uarea */ 874 leaq (USPACE-FRAMESIZE)(%rax),%rsp 875 movq %rsi,PCB_CR3(%rax) /* pcb->pcb_cr3 */ 876 xorq %rbp,%rbp /* mark end of frames */ 877 878 xorw %ax,%ax 879 movw %ax,%gs 880 movw %ax,%fs 881 882 /* XXX merge these */ 883 leaq (TABLESIZE+IOM_SIZE)(%rsi),%rdi 884 885#else /* XEN */ 886 /* First, reset the PSL. */ 887 pushq $2 888 popfq 889 890 cld 891 892 /* 893 * Xen info: 894 * - %rsi -> start_info struct 895 * - %rsp -> stack, *theoretically* the last used page 896 * by Xen bootstrap 897 */ 898 movq %rsi, %rbx 899 900 /* Clear BSS. */ 901 xorq %rax,%rax 902 movq $_C_LABEL(__bss_start),%rdi 903 movq $_C_LABEL(_end),%rcx 904 subq %rdi,%rcx 905 rep 906 stosb 907 908 /* Copy start_info to a safe place */ 909 movq %rbx,%rsi 910 movq $_C_LABEL(start_info_union),%rdi 911 movq $64,%rcx 912 rep 913 movsq 914 915 /* 916 * Memory layout at start of the day: 917 * - Kernel image 918 * - Page frames list 919 * - start_info struct. we copied it, so it can be recycled. 920 * - xenstore 921 * - console 922 * - Xen bootstrap page tables 923 * - kernel stack. provided by Xen 924 * - guaranteed 512kB padding 925 * 926 * As we want to rebuild our page tables and place our stack 927 * in proc0 struct, all data starting from after console can be 928 * discarded after we've done a little setup. 929 */ 930 931 /* 932 * We want our own page tables, let's rebuild them 933 * We will reclaim xen space afterward INCLUDING stack 934 * so let's change it to a temporary one 935 */ 936 937 movq $tmpstk, %rax 938 subq $8, %rax 939 movq %rax, %rsp 940 941 xorl %eax,%eax 942 cpuid 943 movl %eax,_C_LABEL(cpuid_level) 944 945 movq $cpu_info_primary, %rdi 946 movq %rdi, CPU_INFO_SELF(%rdi) /* ci->ci_self = ci */ 947 movq $1, %rsi 948 call cpu_init_msrs /* cpu_init_msrs(ci, true); */ 949 950 call xen_pmap_bootstrap 951 952 /* 953 * First avail returned by xen_pmap_bootstrap in %rax 954 */ 955 movq %rax, %rsi 956 movq %rsi,(_C_LABEL(lwp0)+L_PCB) /* XXX L_PCB != uarea */ 957 958 /* 959 * Set new stack and clear segments 960 */ 961 leaq (USPACE-FRAMESIZE)(%rsi),%rsp 962 xorq %rbp,%rbp 963 964 xorw %ax,%ax 965 movw %ax,%gs 966 movw %ax,%fs 967 968 /* 969 * Set first_avail after proc0 970 */ 971 movq %rsi,%rdi 972 addq $USPACE,%rdi 973 subq $KERNBASE,%rdi /* init_x86_64 wants a physical address */ 974#endif /* XEN */ 975 976 call _C_LABEL(init_x86_64) 977 call _C_LABEL(main) 978END(start) 979 980#if defined(XEN) 981/* space for the hypercall call page */ 982#define HYPERCALL_PAGE_OFFSET 0x1000 983.org HYPERCALL_PAGE_OFFSET 984ENTRY(hypercall_page) 985.skip 0x1000 986END(hypercall_page) 987#endif /* XEN */ 988 989/* 990 * int setjmp(label_t *) 991 * 992 * Used primarily by DDB. 993 */ 994ENTRY(setjmp) 995 /* 996 * Only save registers that must be preserved across function 997 * calls according to the ABI (%rbx, %rsp, %rbp, %r12-%r15) 998 * and %rip. 999 */ 1000 movq %rdi,%rax 1001 movq %rbx,(%rax) 1002 movq %rsp,8(%rax) 1003 movq %rbp,16(%rax) 1004 movq %r12,24(%rax) 1005 movq %r13,32(%rax) 1006 movq %r14,40(%rax) 1007 movq %r15,48(%rax) 1008 movq (%rsp),%rdx 1009 movq %rdx,56(%rax) 1010 xorl %eax,%eax 1011 ret 1012END(setjmp) 1013 1014/* 1015 * int longjmp(label_t *) 1016 * 1017 * Used primarily by DDB. 1018 */ 1019ENTRY(longjmp) 1020 movq %rdi,%rax 1021 movq (%rax),%rbx 1022 movq 8(%rax),%rsp 1023 movq 16(%rax),%rbp 1024 movq 24(%rax),%r12 1025 movq 32(%rax),%r13 1026 movq 40(%rax),%r14 1027 movq 48(%rax),%r15 1028 movq 56(%rax),%rdx 1029 movq %rdx,(%rsp) 1030 movl $1,%eax 1031 ret 1032END(longjmp) 1033 1034/* 1035 * void dumpsys(void) 1036 * 1037 * Mimic cpu_switchto() for postmortem debugging. 1038 */ 1039ENTRY(dumpsys) 1040 /* Build a fake switch frame. */ 1041 pushq %rbx 1042 pushq %r12 1043 pushq %r13 1044 pushq %r14 1045 pushq %r15 1046 1047 /* Save a context. */ 1048 movq $dumppcb, %rax 1049 movq %rsp, PCB_RSP(%rax) 1050 movq %rbp, PCB_RBP(%rax) 1051 1052 call _C_LABEL(dodumpsys) 1053 1054 addq $(5*8), %rsp /* sizeof(switchframe) - sizeof(%rip) */ 1055 ret 1056END(dumpsys) 1057 1058/* 1059 * struct lwp *cpu_switchto(struct lwp *oldlwp, struct lwp *newlwp, 1060 * bool returning) 1061 * 1062 * 1. if (oldlwp != NULL), save its context. 1063 * 2. then, restore context of newlwp. 1064 * 1065 * Note that the stack frame layout is known to "struct switchframe" in 1066 * <machine/frame.h> and to the code in cpu_lwp_fork() which initializes 1067 * it for a new lwp. 1068 */ 1069ENTRY(cpu_switchto) 1070 pushq %rbx 1071 pushq %r12 1072 pushq %r13 1073 pushq %r14 1074 pushq %r15 1075 1076 movq %rdi,%r13 /* oldlwp */ 1077 movq %rsi,%r12 /* newlwp */ 1078 1079 testq %r13,%r13 /* oldlwp = NULL ? */ 1080 jz skip_save 1081 1082 /* Save old context. */ 1083 movq L_PCB(%r13),%rax 1084 movq %rsp,PCB_RSP(%rax) 1085 movq %rbp,PCB_RBP(%rax) 1086skip_save: 1087 1088 /* Switch to newlwp's stack. */ 1089 movq L_PCB(%r12),%r14 1090#ifdef XEN /* XXX debug code */ 1091 cmpq $0,PCB_RSP(%r14) 1092 jne 999f 1093 callq _C_LABEL(cpu_Debugger); 1094999: 1095#endif 1096 movq PCB_RSP(%r14),%rsp 1097 movq PCB_RBP(%r14),%rbp 1098 1099 /* 1100 * Set curlwp. This must be globally visible in order to permit 1101 * non-interlocked mutex release. 1102 */ 1103 movq %r12,%rcx 1104 xchgq %rcx,CPUVAR(CURLWP) 1105 1106 /* Skip the rest if returning to a pinned LWP. */ 1107 testb %dl,%dl /* returning = true ? */ 1108 jnz switch_return 1109 1110 /* Switch ring0 stack */ 1111#ifndef XEN 1112 movq PCB_RSP0(%r14),%rax 1113 movq %rax,CPUVAR(RSP0) 1114#else 1115 movq %r14,%rdi 1116 callq _C_LABEL(x86_64_switch_context); 1117#endif 1118 1119 /* Don't bother with the rest if switching to a system process. */ 1120 testl $LW_SYSTEM,L_FLAG(%r12) 1121 jnz switch_return 1122 1123 /* Is this process using RAS (restartable atomic sequences)? */ 1124 movq L_PROC(%r12),%rdi 1125 cmpq $0,P_RASLIST(%rdi) 1126 je no_RAS 1127 1128 /* Handle restartable atomic sequences (RAS). */ 1129 movq L_MD_REGS(%r12),%rbx 1130 movq TF_RIP(%rbx),%rsi 1131 call _C_LABEL(ras_lookup) 1132 cmpq $-1,%rax 1133 je no_RAS 1134 movq %rax,TF_RIP(%rbx) 1135no_RAS: 1136 1137 /* 1138 * Restore cr0 including FPU state (may have CR0_TS set). Note that 1139 * IPL_SCHED prevents from FPU interrupt altering the LWP's saved cr0. 1140 */ 1141#ifndef XEN 1142 movl $IPL_HIGH,CPUVAR(ILEVEL) 1143 movl PCB_CR0(%r14),%ecx /* has CR0_TS clear */ 1144 movq %cr0,%rdx 1145 1146 /* 1147 * If our floating point registers are on a different CPU, 1148 * set CR0_TS so we'll trap rather than reuse bogus state. 1149 */ 1150 cmpq CPUVAR(FPCURLWP),%r12 1151 je skip_TS 1152 orq $CR0_TS,%rcx 1153skip_TS: 1154 1155 /* Reloading CR0 is very expensive - avoid if possible. */ 1156 cmpq %rdx,%rcx 1157 je skip_CR0 1158 movq %rcx,%cr0 1159skip_CR0: 1160 1161 /* The 32bit LWPs are handled differently. */ 1162 testl $PCB_COMPAT32,PCB_FLAGS(%r14) 1163 jne lwp_64bit 1164 1165lwp_32bit: 1166 /* Zero out %fs/%gs registers. */ 1167 xorq %rax,%rax 1168 movw %ax,%fs 1169 CLI(cx) 1170 SWAPGS 1171 movw %ax,%gs 1172 SWAPGS 1173 STI(cx) 1174 1175 /* Zero out GDT descriptors. */ 1176 movq CPUVAR(GDT),%rcx 1177 movq %rax,(GUFS_SEL*8)(%rcx) 1178 movq %rax,(GUGS_SEL*8)(%rcx) 1179 1180 /* Reload 64-bit %fs/%gs MSRs. */ 1181 movl $MSR_FSBASE,%ecx 1182 movl PCB_FS(%r14),%eax 1183 movl 4+PCB_FS(%r14),%edx 1184 wrmsr 1185 movl $MSR_KERNELGSBASE,%ecx 1186 movl PCB_GS(%r14),%eax 1187 movl 4+PCB_GS(%r14),%edx 1188 wrmsr 1189 1190 jmp switch_return 1191 1192lwp_64bit: 1193 /* Reload %fs/%gs GDT descriptors. */ 1194 movq CPUVAR(GDT),%rcx 1195 movq PCB_FS(%r14),%rax 1196 movq %rax,(GUFS_SEL*8)(%rcx) 1197 movq PCB_GS(%r14),%rax 1198 movq %rax,(GUGS_SEL*8)(%rcx) 1199 1200 /* Reload %fs and %gs */ 1201 movq L_MD_REGS(%r12),%rbx 1202 movw TF_FS(%rbx),%fs 1203 CLI(ax) 1204 SWAPGS 1205 movw TF_GS(%rbx),%gs 1206 SWAPGS 1207 STI(ax) 1208#else 1209 movq %r12,%rdi 1210 callq _C_LABEL(x86_64_tls_switch) 1211#endif 1212 1213switch_return: 1214 /* Return to the new LWP, returning 'oldlwp' in %rax. */ 1215 movq %r13,%rax 1216 popq %r15 1217 popq %r14 1218 popq %r13 1219 popq %r12 1220 popq %rbx 1221 ret 1222END(cpu_switchto) 1223 1224/* 1225 * void savectx(struct pcb *pcb); 1226 * 1227 * Update pcb, saving current processor state. 1228 */ 1229ENTRY(savectx) 1230 /* Save stack pointers. */ 1231 movq %rsp,PCB_RSP(%rdi) 1232 movq %rbp,PCB_RBP(%rdi) 1233 ret 1234END(savectx) 1235 1236IDTVEC(syscall32) 1237 sysret /* go away please */ 1238IDTVEC_END(syscall32) 1239 1240/* 1241 * syscall() 1242 * 1243 * syscall insn entry. 1244 * This currently isn't much faster, but it can be made faster in the future. 1245 * (Actually we've already saved a few 100 clocks by not loading the trap gate) 1246 */ 1247IDTVEC(syscall) 1248#ifndef XEN 1249 /* 1250 * The user %rip is in %rcx and the user %flags in %r11. The kernel %cs 1251 * and %ss are loaded, but nothing else is. 1252 * 1253 * The 'swapgs' instruction gives us access to cpu-specific memory where 1254 * we can save a user register and then read the LWP's kernel stack 1255 * pointer. 1256 * 1257 * This code doesn't seem to set %ds, this may not matter since it is 1258 * ignored in 64bit mode, OTOH the syscall instruction sets %ss and that 1259 * is ignored as well. 1260 */ 1261 swapgs 1262 movq %r15,CPUVAR(SCRATCH) 1263 movq CPUVAR(CURLWP),%r15 1264 movq L_PCB(%r15),%r15 1265 movq PCB_RSP0(%r15),%r15 /* LWP's kernel stack pointer */ 1266 1267 /* Make stack look like an 'int nn' frame */ 1268#define SP(x) (x)-(TF_SS+8)(%r15) 1269 movq $(LSEL(LUDATA_SEL, SEL_UPL)),SP(TF_SS) /* user %ss */ 1270 movq %rsp,SP(TF_RSP) /* User space rsp */ 1271 1272 movq %r11,SP(TF_RFLAGS) /* old rflags from syscall insn */ 1273 movq $(LSEL(LUCODE_SEL, SEL_UPL)),SP(TF_CS) 1274 movq %rcx,SP(TF_RIP) /* syscall saves rip in rcx */ 1275 1276 leaq SP(0),%rsp /* %rsp now valid after frame */ 1277 movq CPUVAR(SCRATCH),%r15 1278#undef SP 1279 1280 movq $2,TF_ERR(%rsp) /* syscall instruction size */ 1281 movq $T_ASTFLT,TF_TRAPNO(%rsp) 1282 1283 movw %es,TF_ES(%rsp) 1284 sti 1285 INTR_SAVE_GPRS 1286 movw %fs,TF_FS(%rsp) 1287 movw %gs,TF_GS(%rsp) 1288 movw $(LSEL(LUDATA_SEL, SEL_UPL)),TF_DS(%rsp) 1289#else 1290 /* Xen already switched to kernel stack */ 1291 pushq %rsi 1292 STI(si) 1293 popq %rsi 1294 addq $0x10,%rsp /* gap to match cs:rip */ 1295 pushq $2 /* error code */ 1296 pushq $T_ASTFLT 1297 subq $TF_REGSIZE,%rsp 1298 INTR_SAVE_GPRS 1299 movw %fs,TF_FS(%rsp) 1300 movw %gs,TF_GS(%rsp) 1301 movw %es,TF_ES(%rsp) 1302 movw $(LSEL(LUDATA_SEL, SEL_UPL)),TF_DS(%rsp) 1303#endif 1304 1305do_syscall: 1306 movq CPUVAR(CURLWP),%r14 1307 incq CPUVAR(NSYSCALL) /* count it atomically */ 1308 movq %rsp,L_MD_REGS(%r14) /* save pointer to frame */ 1309 movq L_PROC(%r14),%r15 1310 andl $~MDL_IRET,L_MD_FLAGS(%r14) /* Allow sysret return */ 1311 movq %rsp,%rdi /* Pass frame as arg0 */ 1312 call *P_MD_SYSCALL(%r15) 1313.Lsyscall_checkast: 1314 /* 1315 * Disable interrupts to avoid new ASTs (etc) being added and 1316 * to ensure we don't take an interrupt with some of the user 1317 * registers loaded. 1318 */ 1319 CLI(si) 1320 /* Check for ASTs on exit to user mode. */ 1321 movl L_MD_ASTPENDING(%r14),%eax 1322 orl CPUVAR(WANT_PMAPLOAD),%eax 1323 jnz 9f 1324 1325#ifdef DIAGNOSTIC 1326 cmpl $IPL_NONE,CPUVAR(ILEVEL) 1327 jne spl_error 1328#endif 1329 1330 testl $(MDL_IRET|MDL_COMPAT32),L_MD_FLAGS(%r14) 1331 INTR_RESTORE_GPRS 1332 movw TF_ES(%rsp),%es 1333 SWAPGS 1334 jnz 2f 1335#ifndef XEN 1336 movq TF_RIP(%rsp),%rcx /* %rip for sysret */ 1337 movq TF_RFLAGS(%rsp),%r11 /* %flags for sysret */ 1338 movw TF_DS(%rsp),%ds 1339 movq TF_RSP(%rsp),%rsp 1340 sysretq 1341#else 1342 movw TF_DS(%rsp),%ds 1343 addq $TF_RIP,%rsp 1344 pushq $256 /* VGCF_IN_SYSCALL */ 1345 jmp HYPERVISOR_iret 1346#endif 1347 1348/* 1349 * If the syscall might have modified some registers, or we are a 32bit 1350 * process we must return to user with an 'iret' instruction. 1351 * If the iret faults in kernel (assumed due to illegal register values) 1352 * then a SIGSEGV will be signalled. 1353 */ 13542: 1355 movw TF_DS(%rsp),%ds 1356 addq $TF_RIP,%rsp 1357 iretq 1358 1359#ifdef DIAGNOSTIC 1360 /* Report SPL error */ 1361spl_error: 1362 movabsq $4f,%rdi 1363 movl TF_RAX(%rsp),%esi 1364 movl TF_RDI(%rsp),%edx 1365 movl %ebx,%ecx 1366 movl CPUVAR(ILEVEL),%r8d 1367 xorq %rax,%rax 1368 call _C_LABEL(printf) 1369 movl $IPL_NONE,%edi 1370 call _C_LABEL(spllower) 1371 jmp .Lsyscall_checkast 13724: .asciz "WARNING: SPL NOT LOWERED ON SYSCALL %d %d EXIT %x %x\n" 1373#endif 1374 1375/* AST pending or pmap load needed */ 13769: 1377 cmpl $0,CPUVAR(WANT_PMAPLOAD) 1378 jz 10f 1379 STI(si) 1380 call _C_LABEL(do_pmap_load) 1381 jmp .Lsyscall_checkast /* re-check ASTs */ 138210: 1383 CLEAR_ASTPENDING(%r14) 1384 STI(si) 1385 /* Pushed T_ASTFLT into tf_trapno on entry. */ 1386 movq %rsp,%rdi 1387 call _C_LABEL(trap) 1388 jmp .Lsyscall_checkast /* re-check ASTs */ 1389IDTVEC_END(syscall) 1390 1391/* 1392 * void lwp_trampoline(void); 1393 * 1394 * This is a trampoline function pushed run by newly created LWPs 1395 * in order to do additional setup in their context. 1396 */ 1397NENTRY(lwp_trampoline) 1398 movq %rbp,%rsi 1399 movq %rbp,%r14 /* for .Lsyscall_checkast */ 1400 movq %rax,%rdi 1401 xorq %rbp,%rbp 1402 call _C_LABEL(lwp_startup) 1403 movq %r13,%rdi 1404 call *%r12 1405 jmp .Lsyscall_checkast 1406END(lwp_trampoline) 1407 1408/* 1409 * oosyscall() 1410 * 1411 * Old call gate entry for syscall. only needed if we're 1412 * going to support running old i386 NetBSD 1.0 or ibcs2 binaries, etc, 1413 * on NetBSD/amd64. 1414 * The 64bit call gate can't request that arguments be copied from the 1415 * user stack (which the i386 code uses to get a gap for the flags). 1416 * push/pop are <read>:<modify_sp>:<write> cycles. 1417 */ 1418IDTVEC(oosyscall) 1419 /* Set rflags in trap frame. */ 1420 pushq (%rsp) /* move user's %eip */ 1421 pushq 16(%rsp) /* and %cs */ 1422 popq 8(%rsp) 1423 pushfq 1424 popq 16(%rsp) 1425 pushq $7 /* size of instruction for restart */ 1426 jmp osyscall1 1427IDTVEC_END(oosyscall) 1428 1429/* 1430 * osyscall() 1431 * 1432 * Trap gate entry for int $80 syscall, also used by sigreturn. 1433 */ 1434IDTVEC(osyscall) 1435#ifdef XEN 1436 movq (%rsp),%rcx 1437 movq 8(%rsp),%r11 1438 addq $0x10,%rsp 1439#endif 1440 pushq $2 /* size of instruction for restart */ 1441osyscall1: 1442 pushq $T_ASTFLT /* trap # for doing ASTs */ 1443 INTRENTRY 1444 STI(si) 1445 jmp do_syscall 1446IDTVEC_END(osyscall) 1447 1448/* 1449 * bool sse2_idlezero_page(void *pg) 1450 * 1451 * Zero a page without polluting the cache. Preemption must be 1452 * disabled by the caller. Abort if a preemption is pending. 1453 * Returns true if the page is zeroed, false if not. 1454 */ 1455ENTRY(sse2_idlezero_page) 1456 pushq %rbp 1457 movq %rsp,%rbp 1458 movl $(PAGE_SIZE/64), %ecx 1459 xorq %rax, %rax 1460 .align 16 14611: 1462 testl $RESCHED_KPREEMPT, CPUVAR(RESCHED) 1463 jnz 2f 1464 movnti %rax, 0(%rdi) 1465 movnti %rax, 8(%rdi) 1466 movnti %rax, 16(%rdi) 1467 movnti %rax, 24(%rdi) 1468 movnti %rax, 32(%rdi) 1469 movnti %rax, 40(%rdi) 1470 movnti %rax, 48(%rdi) 1471 movnti %rax, 56(%rdi) 1472 addq $64, %rdi 1473 decl %ecx 1474 jnz 1b 1475 sfence 1476 incl %eax 1477 popq %rbp 1478 ret 14792: 1480 sfence 1481 popq %rbp 1482 ret 1483END(sse2_idlezero_page) 1484 1485/* 1486 * void pagezero(vaddr_t va) 1487 * 1488 * Zero a page without polluting the cache. 1489 */ 1490 1491ENTRY(pagezero) 1492 movq $-PAGE_SIZE,%rdx 1493 subq %rdx,%rdi 1494 xorq %rax,%rax 14951: 1496 movnti %rax,(%rdi,%rdx) 1497 movnti %rax,8(%rdi,%rdx) 1498 movnti %rax,16(%rdi,%rdx) 1499 movnti %rax,24(%rdi,%rdx) 1500 movnti %rax,32(%rdi,%rdx) 1501 movnti %rax,40(%rdi,%rdx) 1502 movnti %rax,48(%rdi,%rdx) 1503 movnti %rax,56(%rdi,%rdx) 1504 addq $64,%rdx 1505 jne 1b 1506 sfence 1507 ret 1508END(pagezero) 1509