xref: /openbsd/sys/arch/amd64/amd64/locore0.S (revision 8998e210)
1/*	$OpenBSD: locore0.S,v 1.23 2024/05/12 16:49:38 guenther Exp $	*/
2/*	$NetBSD: locore.S,v 1.13 2004/03/25 18:33:17 drochner Exp $	*/
3
4/*
5 * Copyright-o-rama!
6 */
7
8/*
9 * Copyright (c) 2001 Wasabi Systems, Inc.
10 * All rights reserved.
11 *
12 * Written by Frank van der Linden for Wasabi Systems, Inc.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 *    notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 *    notice, this list of conditions and the following disclaimer in the
21 *    documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 *    must display the following acknowledgement:
24 *      This product includes software developed for the NetBSD Project by
25 *      Wasabi Systems, Inc.
26 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
27 *    or promote products derived from this software without specific prior
28 *    written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
32 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
33 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
34 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
37 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
38 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40 * POSSIBILITY OF SUCH DAMAGE.
41 */
42
43
44/*-
45 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
46 * All rights reserved.
47 *
48 * This code is derived from software contributed to The NetBSD Foundation
49 * by Charles M. Hannum.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions
53 * are met:
54 * 1. Redistributions of source code must retain the above copyright
55 *    notice, this list of conditions and the following disclaimer.
56 * 2. Redistributions in binary form must reproduce the above copyright
57 *    notice, this list of conditions and the following disclaimer in the
58 *    documentation and/or other materials provided with the distribution.
59 *
60 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
61 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
62 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
63 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
64 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
65 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
66 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
67 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
68 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
70 * POSSIBILITY OF SUCH DAMAGE.
71 */
72
73/*-
74 * Copyright (c) 1990 The Regents of the University of California.
75 * All rights reserved.
76 *
77 * This code is derived from software contributed to Berkeley by
78 * William Jolitz.
79 *
80 * Redistribution and use in source and binary forms, with or without
81 * modification, are permitted provided that the following conditions
82 * are met:
83 * 1. Redistributions of source code must retain the above copyright
84 *    notice, this list of conditions and the following disclaimer.
85 * 2. Redistributions in binary form must reproduce the above copyright
86 *    notice, this list of conditions and the following disclaimer in the
87 *    documentation and/or other materials provided with the distribution.
88 * 3. Neither the name of the University nor the names of its contributors
89 *    may be used to endorse or promote products derived from this software
90 *    without specific prior written permission.
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
95 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
102 * SUCH DAMAGE.
103 *
104 *	@(#)locore.s	7.3 (Berkeley) 5/13/91
105 */
106
107#include "assym.h"
108#include "lapic.h"
109#include "ksyms.h"
110
111#include <machine/param.h>
112#include <machine/segments.h>
113#include <machine/specialreg.h>
114
115/*
116 * override user-land alignment before including asm.h
117 */
118#define	ALIGN_DATA	.align	8,0xcc
119
120#include <machine/asm.h>
121
122/* XXX temporary kluge; these should not be here */
123/* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */
124#include <dev/isa/isareg.h>
125
126#define	_RELOC(x)	((x) - KERNBASE)
127#define	RELOC(x)	_RELOC(x)
128
129/*
130 * Some hackage to deal with 64bit symbols in 32 bit mode.
131 * This may not be needed if things are cleaned up a little.
132 */
133
134	.text
135	.globl	kernel_text
136	.set	kernel_text,KERNTEXTOFF
137
138	.code32
139
140	.globl	start
141start:	movw	$0x1234,0x472			# warm boot
142
143	/*
144	 * Load parameters from stack
145	 * (howto, bootdev, bootapiver, esym, extmem (unused), cnvmem, ac, av)
146	 */
147	movl	4(%esp),%eax
148	movl	%eax, RELOC(boothowto)
149	movl	8(%esp),%eax
150	movl	%eax, RELOC(bootdev)
151
152	/*
153	 * Syms are placed after last load and bss of the kernel.
154	 * XXX Boot ignores 2MB roundup of _end, so esyms can be < _end.
155	 */
156	movl	16(%esp), %eax
157	testl	%eax,%eax
158	jz	1f
159	addl	$(KERNBASE & 0xffffffff),%eax
160	movl	$RELOC(esym),%ebp
161	movl	%eax,(%ebp)
162	movl	$(KERNBASE >> 32),4(%ebp)
1631:
164	movl	24(%esp), %eax
165	movl	%eax, RELOC(biosbasemem)
166
167	movl	12(%esp), %eax
168	movl	%eax, RELOC(bootapiver)
169
170	/*
171	 * Copy the boot arguments to bootinfo[] in machdep.c.
172	 *
173	 * We are passed the size of the data /boot passed to us in
174	 * 28(%esp). We copy up to bootinfo_size bytes of data into
175	 * bootinfo and report back how much we copied in bootinfo_size.
176	 *
177	 * machdep.c can then take action if bootinfo_size >= bootinfo[]
178	 * (which would meant that we may have been passed too much data).
179	 */
180	movl	28(%esp), %eax
181	movl	%eax, %ecx
182	cmpl	RELOC(bootinfo_size), %ecx	/* Too much? */
183	jb	bi_size_ok
184	movl	RELOC(bootinfo_size), %ecx	/* Only copy this much */
185bi_size_ok:
186	movl	%eax, RELOC(bootinfo_size)	/* Report full amount */
187
188	movl	$RELOC(bootinfo), %edi		/* Destination */
189	movl	32(%esp), %esi			/* Source */
190	rep movsb				/* Copy this many bytes */
191
192	/* First, reset the PSL. */
193	pushl	$PSL_MBO
194	popfl
195
196	xorl	%eax,%eax
197	cpuid
198	movl	%eax,RELOC(cpuid_level)
199	movl	$RELOC(cpu_vendor),%ebp
200	movl	%ebx,(%ebp)
201	movl	%edx,4(%ebp)
202	movl	%ecx,8(%ebp)
203	movl	$0, 12(%ebp)
204
205	/*
206	 * Determine if CPU has meltdown. Certain Intel CPUs do not properly
207	 * respect page permissions when speculatively loading data into
208	 * the cache ("Meltdown" CVE). These CPUs must utilize a secondary
209	 * sanitized page table lacking kernel mappings when executing user
210	 * processes, and may not use PG_G global PTEs for kernel VAs.
211	 */
212	movl	$0x1, RELOC(cpu_meltdown)	/* assume insecure at first */
213	movl	$0x0, RELOC(pg_g_kern)
214
215	cmpl	$0x756e6547, %ebx	# "Genu"
216	jne	.Lcpu_secure
217	cmpl	$0x6c65746e, %ecx	# "ntel"
218	jne	.Lcpu_secure
219	cmpl	$0x49656e69, %edx	# "ineI"
220	jne	.Lcpu_secure
221
222	/*
223	 * Intel CPU, now check if IA32_ARCH_CAPABILITIES is supported and
224	 * if it says this CPU is safe.
225	 */
226	cmpl	$0x7,	%eax
227	jl	.Lcpu_check_finished
228
229	movl	$0x7,	%eax
230	xorl	%ecx,%ecx
231	cpuid
232	testl	$SEFF0EDX_ARCH_CAP, %edx
233	jz	.Lcpu_check_finished
234
235	/* IA32_ARCH_CAPABILITIES MSR available, use it to check CPU security */
236	movl	$MSR_ARCH_CAPABILITIES, %ecx
237	rdmsr
238	testl	$ARCH_CAP_RDCL_NO, %eax
239	jz	.Lcpu_check_finished
240
241.Lcpu_secure:
242	movl	$0x0, RELOC(cpu_meltdown)
243	movl	$PG_G, RELOC(pg_g_kern)
244
245.Lcpu_check_finished:
246	movl	$1,%eax
247	cpuid
248	movl	%eax,RELOC(cpu_id)
249	movl	%ebx,RELOC(cpu_ebxfeature)
250	movl	%ecx,RELOC(cpu_ecxfeature)
251	movl	%edx,RELOC(cpu_feature)
252
253	movl	$0x80000001, %eax
254	cpuid
255	andl	$CPUID_NXE, %edx	/* other bits may clash */
256	jz	cont
257
258	/*
259	 * We have NX, set pg_nx accordingly.
260	 * NX bit is bit 63 (bit 31 of the second 32 bit dword) - need
261	 * to use 32 bit registers here
262	 */
263	pushl	%edx
264	movl	RELOC((pg_nx + 4)), %edx	/* Second dword */
265	orl	$0x80000000, %edx		/* Bit 31 (really 63) */
266	movl	%edx, RELOC((pg_nx + 4))
267	popl	%edx
268cont:
269	orl	%edx, RELOC(cpu_feature)
270
271	/*
272	 * Finished with old stack; load new %esp now instead of later so we
273	 * can trace this code without having to worry about the trace trap
274	 * clobbering the memory test or the zeroing of the bss+bootstrap page
275	 * tables.
276	 *
277	 * The boot program should check:
278	 *	text+data <= &stack_variable - more_space_for_stack
279	 *	text+data+bss+pad+space_for_page_tables <= end_of_memory
280	 * Oops, the gdt is in the carcass of the boot program so clearing
281	 * the rest of memory is still not possible.
282	 */
283	movl	$RELOC(tmpstk),%esp
284
285/*
286 * Virtual address space of kernel:
287 *
288 * text | data | bss | [syms] | page dir | proc0 kstack | L1 ptp | L2 ptp | L3
289 *			      0          1       2      3
290 */
291
292#if L2_SLOT_KERNBASE > 0
293#define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1))
294#else
295#define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1)
296#endif
297
298#if L3_SLOT_KERNBASE > 0
299#define TABLE_L3_ENTRIES (2 * NKL3_KIMG_ENTRIES)
300#else
301#define TABLE_L3_ENTRIES NKL3_KIMG_ENTRIES
302#endif
303
304
305#define PROC0_PML4_OFF	0
306#define PROC0_STK_OFF	(PROC0_PML4_OFF + NBPG)
307#define PROC0_PTP3_OFF	(PROC0_STK_OFF + UPAGES * NBPG)
308#define PROC0_PTP2_OFF	(PROC0_PTP3_OFF + NKL4_KIMG_ENTRIES * NBPG)
309#define PROC0_PTP1_OFF	(PROC0_PTP2_OFF + TABLE_L3_ENTRIES * NBPG)
310#define	PROC0_DMP3_OFF	(PROC0_PTP1_OFF + TABLE_L2_ENTRIES * NBPG)
311#define PROC0_DMP2_OFF	(PROC0_DMP3_OFF + NDML3_ENTRIES * NBPG)
312#define TABLESIZE \
313    ((NKL4_KIMG_ENTRIES + TABLE_L3_ENTRIES + TABLE_L2_ENTRIES + 1 + UPAGES + \
314	NDML3_ENTRIES + NDML2_ENTRIES + 3) * NBPG)
315
316#define fillkpt \
3171:	movl	%eax,(%ebx)	;	/* store phys addr */ \
318	movl	$0,4(%ebx)	;	/* upper 32 bits 0 */ \
319	addl	$8,%ebx		;	/* next pte/pde */ \
320	addl	$NBPG,%eax	;	/* next phys page */ \
321	loop	1b		;	/* till finished */
322
323
324#define fillkpt_nx \
325	pushl	%ebp				;	/* save */ \
3261:	movl	%eax,(%ebx)			;	/* store phys addr */ \
327	movl	RELOC((pg_nx + 4)), %ebp	;	/* NX bit? */ \
328	movl	%ebp,4(%ebx)			;	/* upper 32 bits */ \
329	addl	$8,%ebx				;	/* next pte/pde */ \
330	addl	$NBPG,%eax			;	/* next phys page */ \
331	loop	1b				;	/* till finished */ \
332	popl	%ebp
333
334	/* Find end of kernel image. */
335	movl	$RELOC(end),%edi
336#if (NKSYMS || defined(DDB))
337	/* Save the symbols (if loaded). */
338	movl	RELOC(esym),%eax
339	testl	%eax,%eax
340	jz	1f
341	subl	$(KERNBASE & 0xffffffff),%eax	/* XXX */
342	/* Page tables must be after symbols and after kernel image. */
343	cmpl	%eax,%edi
344	jg	1f
345	movl	%eax,%edi
3461:
347#endif
348	/* Clear tables */
349	movl	%edi,%esi
350	addl	$PGOFSET,%esi
351	andl	$~PGOFSET,%esi
352
353	movl	%esi,%edi
354	xorl	%eax,%eax
355	cld
356	movl	$TABLESIZE,%ecx
357	shrl	$2,%ecx
358	rep
359	stosl
360
361	leal	(PROC0_PTP1_OFF)(%esi), %ebx
362
363	/*
364	 * Compute etext - KERNBASE. This can't be > 4G, or we can't deal
365	 * with it anyway, since we can't load it in 32 bit mode. So use
366	 * the bottom 32 bits.
367	 */
368	movl	$RELOC(etext),%edx
369	addl	$PGOFSET,%edx
370	andl	$~PGOFSET,%edx
371
372	/*
373	 * Skip the first 16 MB.
374	 */
375	movl	$(KERNTEXTOFF - KERNBASE),%eax
376	movl	%eax,%ecx
377	shrl	$(PGSHIFT-3),%ecx	/* ((n >> PGSHIFT) << 3) for # pdes */
378	addl	%ecx,%ebx
379
380	/* Map kernel text RO, X */
381	movl	%edx,%ecx
382	subl	%eax,%ecx
383	shrl	$PGSHIFT,%ecx
384	orl	$(PG_V|PG_KR),%eax
385	fillkpt
386
387	/* Map .rodata RO, NX */
388	movl	$RELOC(__rodata_start), %eax
389	movl	$RELOC(erodata), %ecx
390	addl	$PGOFSET, %ecx
391	andl	$~PGOFSET, %ecx
392	subl	%eax, %ecx
393	shrl	$PGSHIFT, %ecx
394	orl	$(PG_V|PG_KR), %eax
395	fillkpt_nx
396
397	/* Map the data and BSS sections RW, NX */
398	movl	$RELOC(__data_start), %eax
399	movl	$RELOC(__kernel_bss_end),%ecx
400	addl	$PGOFSET, %ecx
401	andl	$~PGOFSET, %ecx
402	subl	%eax, %ecx
403	shrl	$PGSHIFT,%ecx
404	orl	$(PG_V|PG_KW), %eax
405	fillkpt_nx
406
407	/* Map "hole" at end of BSS RO, NX */
408	movl	$RELOC(__kernel_bss_end), %eax
409	movl	$RELOC(end), %ecx
410	addl	$PGOFSET, %ecx
411	andl	$~PGOFSET, %ecx
412	cmpl	%eax, %ecx
413	je	map_syms
414	subl	%eax, %ecx
415	shrl	$PGSHIFT, %ecx
416	orl	$(PG_V|PG_KR), %eax
417	fillkpt_nx
418
419map_syms:
420	/* Map symbol space RO, NX */
421	movl	$RELOC(end), %eax
422	movl	%esi, %ecx
423	addl	$PGOFSET, %ecx
424	andl	$~PGOFSET, %ecx
425	cmpl	%eax, %ecx
426	je	map_tables
427	subl	%eax, %ecx
428	shrl	$PGSHIFT, %ecx
429	orl	$(PG_V|PG_KR), %eax
430	fillkpt_nx
431
432map_tables:
433	/* Map the bootstrap tables RW, NX */
434	movl	%esi, %edx
435	leal	(PG_V|PG_KW)(%edx),%eax
436	movl	$TABLESIZE,%ecx
437	shrl	$PGSHIFT,%ecx
438	fillkpt_nx
439
440	/* Map ISA I/O mem (later atdevbase) RW, NX */
441	movl	$(IOM_BEGIN|PG_V|PG_KW/*|PG_N*/),%eax
442	movl	$(IOM_SIZE>>PGSHIFT),%ecx
443	fillkpt_nx
444
445	/* Set up level 2 pages (RWX) */
446	leal	(PROC0_PTP2_OFF)(%esi),%ebx
447	leal	(PROC0_PTP1_OFF)(%esi),%eax
448	orl	$(PG_V|PG_KW), %eax
449	movl	$(NKL2_KIMG_ENTRIES+1),%ecx
450	fillkpt
451
452#if L2_SLOT_KERNBASE > 0
453	/* If needed, set up L2 entries for actual kernel mapping (RWX) */
454	leal	(PROC0_PTP2_OFF+ L2_SLOT_KERNBASE*8)(%esi),%ebx
455	leal	(PROC0_PTP1_OFF)(%esi),%eax
456	orl	$(PG_V|PG_KW), %eax
457	movl	$(NKL2_KIMG_ENTRIES+1),%ecx
458	fillkpt
459#endif
460
461	/* Set up level 3 pages (RWX) */
462	leal	(PROC0_PTP3_OFF)(%esi),%ebx
463	leal	(PROC0_PTP2_OFF)(%esi),%eax
464	orl	$(PG_V|PG_KW), %eax
465	movl	$NKL3_KIMG_ENTRIES,%ecx
466	fillkpt
467
468#if L3_SLOT_KERNBASE > 0
469	/* If needed, set up L3 entries for actual kernel mapping (RWX) */
470	leal	(PROC0_PTP3_OFF+ L3_SLOT_KERNBASE*8)(%esi),%ebx
471	leal	(PROC0_PTP2_OFF)(%esi),%eax
472	orl	$(PG_V|PG_KW), %eax
473	movl	$NKL3_KIMG_ENTRIES,%ecx
474	fillkpt
475#endif
476
477	/* Set up top level entries for identity mapping (RWX) */
478	leal	(PROC0_PML4_OFF)(%esi),%ebx
479	leal	(PROC0_PTP3_OFF)(%esi),%eax
480	orl	$(PG_V|PG_KW), %eax
481	movl	$NKL4_KIMG_ENTRIES,%ecx
482	fillkpt
483
484	/* Set up top level entries for actual kernel mapping (RWX) */
485	leal	(PROC0_PML4_OFF + L4_SLOT_KERNBASE*8)(%esi),%ebx
486	leal	(PROC0_PTP3_OFF)(%esi),%eax
487	orl	$(PG_V|PG_KW), %eax
488	movl	$NKL4_KIMG_ENTRIES,%ecx
489	fillkpt
490
491	/*
492	 * Map the first 4 GB with the direct map. We'll map the rest
493	 * in pmap_bootstrap. But we always need the first 4GB during
494	 * bootstrap. The direct map is mapped RW, NX. We also change
495	 * the permissions on the 2MB pages corresponding to the kernel
496	 * PAs to RO to prevent someone writing to the kernel area
497	 * via the direct map.
498	 */
499	leal	(PROC0_DMP2_OFF)(%esi), %ebx
500	xorl	%eax, %eax
501	movl	$(NDML2_ENTRIES * NPDPG), %ecx
5021:	orl	$(PG_V|PG_KW|PG_PS), %eax
503	orl	RELOC(pg_g_kern), %eax
504	cmpl	$__kernel_phys_base, %eax
505	jl	store_pte
506	cmpl	$__kernel_phys_end, %eax
507	jg	store_pte
508	andl	$(~PG_KW), %eax
509store_pte:
510	movl	%eax, (%ebx)
511	pushl	%ebp
512	movl	RELOC((pg_nx + 4)), %ebp
513	movl	%ebp, 4(%ebx)
514	popl	%ebp
515	addl	$8, %ebx
516	addl	$NBPD_L2, %eax
517	loop	1b
518
519	leal	(PROC0_DMP3_OFF)(%esi), %ebx
520	leal	(PROC0_DMP2_OFF)(%esi), %eax
521	orl	$(PG_V|PG_KW), %eax
522	movl	$NDML2_ENTRIES, %ecx
523	fillkpt_nx
524
525	leal	(PROC0_PML4_OFF + PDIR_SLOT_DIRECT * 8)(%esi), %ebx
526	leal	(PROC0_DMP3_OFF)(%esi), %eax
527	orl	$(PG_V|PG_KW), %eax
528	movl	$NDML3_ENTRIES, %ecx
529	fillkpt_nx
530
531	/* Install recursive top level PDE */
532	leal	(PROC0_PML4_OFF + PDIR_SLOT_PTE*8)(%esi),%ebx
533	leal	(PROC0_PML4_OFF)(%esi),%eax
534	orl	$(PG_V|PG_KW),%eax
535	movl	%eax,(%ebx)
536	pushl	%ebp
537	movl	RELOC((pg_nx + 4)), %ebp
538	movl	%ebp, 4(%ebx)
539	popl	%ebp
540
541	/*
542	 * Startup checklist:
543	 * 1. Enable PAE (and SSE while here).
544	 */
545	movl	%cr4,%eax
546	orl	$(CR4_DEFAULT),%eax
547	movl	%eax,%cr4
548
549	/*
550	 * 2. Set Long Mode Enable in EFER. Also enable the
551	 *    syscall extensions and NX (if available).
552	 */
553	movl	$MSR_EFER,%ecx
554	rdmsr
555	xorl	%eax,%eax	/* XXX */
556	orl	$(EFER_LME|EFER_SCE),%eax
557	movl	RELOC((pg_nx + 4)), %ebx
558	cmpl	$0, %ebx
559	je	write_efer
560	orl	$(EFER_NXE), %eax
561write_efer:
562	wrmsr
563
564	/*
565	 * 3. Load %cr3 with pointer to PML4.
566	 */
567	movl	%esi,%eax
568	movl	%eax,%cr3
569
570	/*
571	 * 4. Enable paging and the rest of it.
572	 */
573	movl	%cr0,%eax
574	orl	$CR0_DEFAULT,%eax
575	movl	%eax,%cr0
576	jmp	compat
577compat:
578
579	/*
580	 * 5.
581	 * Not quite done yet, we're now in a compatibility segment,
582	 * in legacy mode. We must jump to a long mode segment.
583	 * Need to set up a temporary GDT with a long mode segment
584	 * in it to do that.
585	 */
586
587	movl	$RELOC(gdt64),%eax
588	lgdt	(%eax)
589	movl	$RELOC(farjmp64),%eax
590	ljmp	*(%eax)
591
592.code64
593longmode:
594	/*
595	 * 6.
596	 * Finally, we're in long mode. However, we're still
597	 * in the identity mapped area (could not jump out
598	 * of that earlier because it would have been a > 32bit
599	 * jump). We can do that now, so here we go.
600	 */
601	movabsq	$longmode_hi,%rax
602	jmp	*%rax
603longmode_hi:
604	/*
605	 * We have arrived.
606	 * There's no need anymore for the identity mapping in low
607	 * memory, remove it.
608	 */
609	movq	$KERNBASE,%r8
610
611#if L2_SLOT_KERNBASE > 0
612	movq	$(NKL2_KIMG_ENTRIES+1),%rcx
613	leaq	(PROC0_PTP2_OFF)(%rsi),%rbx
614	addq	%r8, %rbx
6151:	movq	$0 ,(%rbx)
616	addq	$8,%rbx
617	loop	1b
618#endif
619
620#if L3_SLOT_KERNBASE > 0
621	movq	$NKL3_KIMG_ENTRIES,%rcx
622	leaq	(PROC0_PTP3_OFF)(%rsi),%rbx
623	addq	%r8, %rbx
6241:	movq	$0 ,(%rbx)
625	addq	$8,%rbx
626	loop	1b
627#endif
628
629	movq	$NKL4_KIMG_ENTRIES,%rcx
630	leaq	(PROC0_PML4_OFF)(%rsi),%rbx	# old, phys address of PML4
631	addq	%r8, %rbx			# new, virtual address of PML4
6321:	movq	$0, (%rbx)
633	addq	$8,%rbx
634	loop	1b
635
636	/* Relocate atdevbase. */
637	movq	$(TABLESIZE+KERNBASE),%rdx
638	addq	%rsi,%rdx
639	movq	%rdx,atdevbase(%rip)
640
641	/* Record start of symbols */
642	movq	$__kernel_bss_end, ssym(%rip)
643
644	/* Set up bootstrap stack. */
645	leaq	(PROC0_STK_OFF)(%rsi),%rax
646	addq	%r8,%rax
647	movq	%rax,proc0paddr(%rip)
648	leaq	(USPACE-FRAMESIZE)(%rax),%rsp
649
650	/*
651	 * Set proc0's %cr3 to bootstrap page tables. Will be overwritten when
652	 * pmap_randomize is called later.
653	 */
654	movq	%rsi,PCB_CR3(%rax)	# pcb->pcb_cr3
655
656	xorq	%rbp,%rbp		# mark end of frames
657
658	xorw	%ax,%ax
659	movw	%ax,%gs
660	movw	%ax,%fs
661
662	leaq	TABLESIZE(%rsi),%rdi
663	subq	$(NBPG*3), %rdi
664
665	/* XXX merge these */
666	call	init_x86_64
667	call	main
668
669	.section .codepatch,"a"
670	.align	8, 0xcc
671	.globl codepatch_begin
672codepatch_begin:
673	.previous
674
675	.section .codepatchend,"a"
676	.globl codepatch_end
677codepatch_end:
678	.previous
679
680	.data
681	.globl	gdt64
682gdt64:
683	.word	gdt64_end-gdt64_start-1
684	.quad	_RELOC(gdt64_start)
685	.align 64, 0xcc
686
687gdt64_start:
688	.quad 0x0000000000000000	/* always empty */
689	.quad 0x00af9a000000ffff	/* kernel CS */
690	.quad 0x00cf92000000ffff	/* kernel DS */
691gdt64_end:
692
693farjmp64:
694	.long	longmode-KERNBASE
695	.word	GSEL(GCODE_SEL, SEL_KPL)
696
697	.align 8, 0xcc
698	.space 512
699tmpstk:
700
701