1 /*
2 * Copyright (c) 2018-2021 Maxime Villard, m00nbsd.net
3 * All rights reserved.
4 *
5 * This code is part of the NVMM hypervisor.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include "smallkern.h"
30 #include "pdir.h"
31 #include "trap.h"
32
33 #include <sys/bitops.h>
34 #include <machine/reg.h>
35 #include <machine/specialreg.h>
36 #include <machine/frame.h>
37 #include <machine/tss.h>
38 #include <machine/segments.h>
39
40 /* GDT offsets */
41 #define SMALLKERN_GDT_NUL_OFF (0 * 8)
42 #define SMALLKERN_GDT_CS_OFF (1 * 8)
43 #define SMALLKERN_GDT_DS_OFF (2 * 8)
44 #define SMALLKERN_GDT_TSS_OFF (3 * 8)
45
46 #ifdef __DragonFly__
47 #define SDT_SYS386TSS SDT_SYSTSS /* 9: system 64-bit TSS available */
48 #define SDT_SYS386IGT SDT_SYSIGT /* 14: system 64-bit interrupt gate */
49 #define APICBASE_PHYSADDR APICBASE_ADDRESS /* 0xfffff000: physical address */
50 #define sys_segment_descriptor system_segment_descriptor
51 #define x86_64_tss x86_64tss
52 #define __arraycount(x) nitems(x)
53 #endif /* __DragonFly__ */
54
fatal(char * msg)55 void fatal(char *msg)
56 {
57 print("\n");
58 print_ext(RED_ON_BLACK, "********** FATAL ***********\n");
59 print_ext(RED_ON_BLACK, msg);
60 print("\n");
61 print_ext(RED_ON_BLACK, "****************************\n");
62
63 while (1);
64 }
65
66 /* -------------------------------------------------------------------------- */
67
68 struct smallframe {
69 uint64_t sf_trapno;
70 uint64_t sf_err;
71 uint64_t sf_rip;
72 uint64_t sf_cs;
73 uint64_t sf_rflags;
74 uint64_t sf_rsp;
75 uint64_t sf_ss;
76 };
77
78 static void setregion(struct region_descriptor *, void *, uint16_t);
79 static void setgate(struct gate_descriptor *, void *, int, int, int, int);
80 static void set_sys_segment(struct sys_segment_descriptor *, void *,
81 size_t, int, int, int);
82 static void set_sys_gdt(int, void *, size_t, int, int, int);
83 static void init_tss(void);
84 static void init_idt(void);
85
86 static char *trap_type[] = {
87 "privileged instruction fault", /* 0 T_PRIVINFLT */
88 "breakpoint trap", /* 1 T_BPTFLT */
89 "arithmetic trap", /* 2 T_ARITHTRAP */
90 "asynchronous system trap", /* 3 T_ASTFLT */
91 "protection fault", /* 4 T_PROTFLT */
92 "trace trap", /* 5 T_TRCTRAP */
93 "page fault", /* 6 T_PAGEFLT */
94 "alignment fault", /* 7 T_ALIGNFLT */
95 "integer divide fault", /* 8 T_DIVIDE */
96 "non-maskable interrupt", /* 9 T_NMI */
97 "overflow trap", /* 10 T_OFLOW */
98 "bounds check fault", /* 11 T_BOUND */
99 "FPU not available fault", /* 12 T_DNA */
100 "double fault", /* 13 T_DOUBLEFLT */
101 "FPU operand fetch fault", /* 14 T_FPOPFLT */
102 "invalid TSS fault", /* 15 T_TSSFLT */
103 "segment not present fault", /* 16 T_SEGNPFLT */
104 "stack fault", /* 17 T_STKFLT */
105 "machine check fault", /* 18 T_MCA */
106 "SSE FP exception", /* 19 T_XMM */
107 "hardware interrupt", /* 20 T_RESERVED */
108 };
109 size_t trap_types = __arraycount(trap_type);
110
111 static uint8_t idtstore[PAGE_SIZE] __aligned(PAGE_SIZE);
112 static uint8_t faultstack[PAGE_SIZE] __aligned(PAGE_SIZE);
113 static struct x86_64_tss smallkern_tss;
114
115 static void
triple_fault(void)116 triple_fault(void)
117 {
118 char *p = NULL;
119 memset(&idtstore, 0, PAGE_SIZE);
120 *p = 0;
121 }
122
123 /*
124 * Trap handler.
125 */
126 void
trap(struct smallframe * sf)127 trap(struct smallframe *sf)
128 {
129 uint64_t trapno = sf->sf_trapno;
130 static int ntrap = 0;
131 static float f = 0.0;
132 char *buf;
133
134 f += 1.0f;
135 if (ntrap++ == 6) {
136 triple_fault();
137 }
138 if (ntrap != (int)f) {
139 print_ext(RED_ON_BLACK, "!!! FPU BUG !!!\n");
140 }
141
142 if (trapno < trap_types) {
143 buf = trap_type[trapno];
144 } else {
145 buf = "unknown trap";
146 }
147
148 if (trapno == T_RESERVED) {
149 /* Disable external interrupts. */
150 lcr8(15);
151 }
152
153 print("\n");
154 print_ext(RED_ON_BLACK, "****** FAULT OCCURRED ******\n");
155 print_ext(RED_ON_BLACK, buf);
156 print("\n");
157 print_ext(RED_ON_BLACK, "****************************\n");
158 print("\n");
159
160 sti();
161
162 while (1);
163 }
164
165 static void
setregion(struct region_descriptor * rd,void * base,uint16_t limit)166 setregion(struct region_descriptor *rd, void *base, uint16_t limit)
167 {
168 rd->rd_limit = limit;
169 rd->rd_base = (uint64_t)base;
170 }
171
172 static void
setgate(struct gate_descriptor * gd,void * func,int ist,int type,int dpl,int sel)173 setgate(struct gate_descriptor *gd, void *func, int ist, int type, int dpl,
174 int sel)
175 {
176 gd->gd_looffset = (uint64_t)func & 0xffff;
177 gd->gd_selector = sel;
178 gd->gd_ist = ist;
179 gd->gd_type = type;
180 gd->gd_dpl = dpl;
181 gd->gd_p = 1;
182 gd->gd_hioffset = (uint64_t)func >> 16;
183 gd->gd_xx1 = 0;
184 #ifdef __NetBSD__
185 gd->gd_zero = 0;
186 gd->gd_xx2 = 0;
187 gd->gd_xx3 = 0;
188 #endif
189 }
190
191 static void
set_sys_segment(struct sys_segment_descriptor * sd,void * base,size_t limit,int type,int dpl,int gran)192 set_sys_segment(struct sys_segment_descriptor *sd, void *base, size_t limit,
193 int type, int dpl, int gran)
194 {
195 memset(sd, 0, sizeof(*sd));
196 sd->sd_lolimit = (unsigned)limit;
197 sd->sd_lobase = (uint64_t)base;
198 sd->sd_type = type;
199 sd->sd_dpl = dpl;
200 sd->sd_p = 1;
201 sd->sd_hilimit = (unsigned)limit >> 16;
202 sd->sd_gran = gran;
203 sd->sd_hibase = (uint64_t)base >> 24;
204 }
205
206 static void
set_sys_gdt(int slotoff,void * base,size_t limit,int type,int dpl,int gran)207 set_sys_gdt(int slotoff, void *base, size_t limit, int type, int dpl, int gran)
208 {
209 struct sys_segment_descriptor sd;
210
211 set_sys_segment(&sd, base, limit, type, dpl, gran);
212
213 memcpy(&gdt64_start + slotoff, &sd, sizeof(sd));
214 }
215
init_tss(void)216 static void init_tss(void)
217 {
218 memset(&smallkern_tss, 0, sizeof(smallkern_tss));
219 #ifdef __NetBSD__
220 smallkern_tss.tss_ist[0] = (uintptr_t)(&faultstack[PAGE_SIZE-1]) & ~0xf;
221 #else /* DragonFly */
222 smallkern_tss.tss_ist1 = (uintptr_t)(&faultstack[PAGE_SIZE-1]) & ~0xf;
223 #endif
224
225 set_sys_gdt(SMALLKERN_GDT_TSS_OFF, &smallkern_tss,
226 sizeof(struct x86_64_tss) - 1, SDT_SYS386TSS, SEL_KPL, 0);
227 }
228
init_idt(void)229 static void init_idt(void)
230 {
231 struct region_descriptor region;
232 struct gate_descriptor *idt;
233 size_t i;
234
235 idt = (struct gate_descriptor *)&idtstore;
236 for (i = 0; i < NCPUIDT; i++) {
237 setgate(&idt[i], x86_exceptions[i], 0, SDT_SYS386IGT,
238 SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
239 }
240 for (i = NCPUIDT; i < 256; i++) {
241 setgate(&idt[i], &Xintr, 0, SDT_SYS386IGT,
242 SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
243 }
244
245 setregion(®ion, &idtstore, PAGE_SIZE - 1);
246 lidt(®ion);
247 }
248
249 /* -------------------------------------------------------------------------- */
250
251 /*
252 * Main entry point of the kernel.
253 */
254 void
main(paddr_t pa_start __unused)255 main(paddr_t pa_start __unused)
256 {
257 u_int descs[4];
258 uint32_t *reg, val;
259
260 print_banner();
261
262 /*
263 * Init the TSS and IDT. We mostly don't care about this, they are just
264 * here to properly handle traps.
265 */
266 init_tss();
267 print_state(true, "TSS created");
268 init_idt();
269 print_state(true, "IDT created");
270
271 /* Reset CR8. */
272 lcr8(0);
273
274 /* Enable FPU. */
275 clts();
276
277 /* Enable interrupts. */
278 sti();
279
280 /* Ensure APICBASE is correct (default). */
281 if ((rdmsr(MSR_APICBASE) & APICBASE_PHYSADDR) == 0xfee00000) {
282 print_state(true, "APICBASE is correct");
283 } else {
284 print_state(false, "wrong APICBASE");
285 }
286
287 /* Ensure PG_NX is disabled. */
288 if (!nox_flag) {
289 print_state(true, "PG_NX is disabled");
290 } else {
291 print_state(false, "PG_NX is enabled!");
292 }
293
294 /* Ensure we are on cpu120. */
295 cpuid(1, 0, descs);
296 if (__SHIFTOUT(descs[1], CPUID_LOCAL_APIC_ID) == 120) {
297 print_state(true, "Running on cpu120");
298 } else {
299 print_state(false, "Not running on cpu120!");
300 }
301
302 /* Ensure the LAPIC information matches. */
303 #define LAPIC_ID 0x020
304 # define LAPIC_ID_MASK 0xff000000
305 # define LAPIC_ID_SHIFT 24
306 reg = (uint32_t *)lapicbase;
307 val = reg[LAPIC_ID/4];
308 if (__SHIFTOUT(val, LAPIC_ID_MASK) == 120) {
309 print_state(true, "LAPIC information matches");
310 } else {
311 print_state(false, "LAPIC information does not match!");
312 }
313
314 /*
315 * Will cause a #UD.
316 */
317 vmmcall();
318 }
319