1 /* 2 * Copyright (c) 2018-2021 Maxime Villard, m00nbsd.net 3 * All rights reserved. 4 * 5 * This code is part of the NVMM hypervisor. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include "smallkern.h" 30 #include "pdir.h" 31 #include "trap.h" 32 33 #include <sys/bitops.h> 34 #include <machine/reg.h> 35 #include <machine/specialreg.h> 36 #include <machine/frame.h> 37 #include <machine/tss.h> 38 #include <machine/segments.h> 39 40 /* GDT offsets */ 41 #define SMALLKERN_GDT_NUL_OFF (0 * 8) 42 #define SMALLKERN_GDT_CS_OFF (1 * 8) 43 #define SMALLKERN_GDT_DS_OFF (2 * 8) 44 #define SMALLKERN_GDT_TSS_OFF (3 * 8) 45 46 #ifdef __DragonFly__ 47 #define SDT_SYS386TSS SDT_SYSTSS /* 9: system 64-bit TSS available */ 48 #define SDT_SYS386IGT SDT_SYSIGT /* 14: system 64-bit interrupt gate */ 49 #define APICBASE_PHYSADDR APICBASE_ADDRESS /* 0xfffff000: physical address */ 50 #define sys_segment_descriptor system_segment_descriptor 51 #define x86_64_tss x86_64tss 52 #define __arraycount(x) nitems(x) 53 #endif /* __DragonFly__ */ 54 55 void fatal(char *msg) 56 { 57 print("\n"); 58 print_ext(RED_ON_BLACK, "********** FATAL ***********\n"); 59 print_ext(RED_ON_BLACK, msg); 60 print("\n"); 61 print_ext(RED_ON_BLACK, "****************************\n"); 62 63 while (1); 64 } 65 66 /* -------------------------------------------------------------------------- */ 67 68 struct smallframe { 69 uint64_t sf_trapno; 70 uint64_t sf_err; 71 uint64_t sf_rip; 72 uint64_t sf_cs; 73 uint64_t sf_rflags; 74 uint64_t sf_rsp; 75 uint64_t sf_ss; 76 }; 77 78 static void setregion(struct region_descriptor *, void *, uint16_t); 79 static void setgate(struct gate_descriptor *, void *, int, int, int, int); 80 static void set_sys_segment(struct sys_segment_descriptor *, void *, 81 size_t, int, int, int); 82 static void set_sys_gdt(int, void *, size_t, int, int, int); 83 static void init_tss(void); 84 static void init_idt(void); 85 86 static char *trap_type[] = { 87 "privileged instruction fault", /* 0 T_PRIVINFLT */ 88 "breakpoint trap", /* 1 T_BPTFLT */ 89 "arithmetic trap", /* 2 T_ARITHTRAP */ 90 "asynchronous system trap", /* 3 T_ASTFLT */ 91 "protection fault", /* 4 T_PROTFLT */ 92 "trace trap", /* 5 T_TRCTRAP */ 93 "page fault", /* 6 T_PAGEFLT */ 94 "alignment fault", /* 7 T_ALIGNFLT */ 95 "integer divide fault", /* 8 T_DIVIDE */ 96 "non-maskable interrupt", /* 9 T_NMI */ 97 "overflow trap", /* 10 T_OFLOW */ 98 "bounds check fault", /* 11 T_BOUND */ 99 "FPU not available fault", /* 12 T_DNA */ 100 "double fault", /* 13 T_DOUBLEFLT */ 101 "FPU operand fetch fault", /* 14 T_FPOPFLT */ 102 "invalid TSS fault", /* 15 T_TSSFLT */ 103 "segment not present fault", /* 16 T_SEGNPFLT */ 104 "stack fault", /* 17 T_STKFLT */ 105 "machine check fault", /* 18 T_MCA */ 106 "SSE FP exception", /* 19 T_XMM */ 107 "hardware interrupt", /* 20 T_RESERVED */ 108 }; 109 size_t trap_types = __arraycount(trap_type); 110 111 static uint8_t idtstore[PAGE_SIZE] __aligned(PAGE_SIZE); 112 static uint8_t faultstack[PAGE_SIZE] __aligned(PAGE_SIZE); 113 static struct x86_64_tss smallkern_tss; 114 115 static void 116 triple_fault(void) 117 { 118 char *p = NULL; 119 memset(&idtstore, 0, PAGE_SIZE); 120 *p = 0; 121 } 122 123 /* 124 * Trap handler. 125 */ 126 void 127 trap(struct smallframe *sf) 128 { 129 uint64_t trapno = sf->sf_trapno; 130 static int ntrap = 0; 131 static float f = 0.0; 132 char *buf; 133 134 f += 1.0f; 135 if (ntrap++ == 6) { 136 triple_fault(); 137 } 138 if (ntrap != (int)f) { 139 print_ext(RED_ON_BLACK, "!!! FPU BUG !!!\n"); 140 } 141 142 if (trapno < trap_types) { 143 buf = trap_type[trapno]; 144 } else { 145 buf = "unknown trap"; 146 } 147 148 if (trapno == T_RESERVED) { 149 /* Disable external interrupts. */ 150 lcr8(15); 151 } 152 153 print("\n"); 154 print_ext(RED_ON_BLACK, "****** FAULT OCCURRED ******\n"); 155 print_ext(RED_ON_BLACK, buf); 156 print("\n"); 157 print_ext(RED_ON_BLACK, "****************************\n"); 158 print("\n"); 159 160 sti(); 161 162 while (1); 163 } 164 165 static void 166 setregion(struct region_descriptor *rd, void *base, uint16_t limit) 167 { 168 rd->rd_limit = limit; 169 rd->rd_base = (uint64_t)base; 170 } 171 172 static void 173 setgate(struct gate_descriptor *gd, void *func, int ist, int type, int dpl, 174 int sel) 175 { 176 gd->gd_looffset = (uint64_t)func & 0xffff; 177 gd->gd_selector = sel; 178 gd->gd_ist = ist; 179 gd->gd_type = type; 180 gd->gd_dpl = dpl; 181 gd->gd_p = 1; 182 gd->gd_hioffset = (uint64_t)func >> 16; 183 gd->gd_xx1 = 0; 184 #ifdef __NetBSD__ 185 gd->gd_zero = 0; 186 gd->gd_xx2 = 0; 187 gd->gd_xx3 = 0; 188 #endif 189 } 190 191 static void 192 set_sys_segment(struct sys_segment_descriptor *sd, void *base, size_t limit, 193 int type, int dpl, int gran) 194 { 195 memset(sd, 0, sizeof(*sd)); 196 sd->sd_lolimit = (unsigned)limit; 197 sd->sd_lobase = (uint64_t)base; 198 sd->sd_type = type; 199 sd->sd_dpl = dpl; 200 sd->sd_p = 1; 201 sd->sd_hilimit = (unsigned)limit >> 16; 202 sd->sd_gran = gran; 203 sd->sd_hibase = (uint64_t)base >> 24; 204 } 205 206 static void 207 set_sys_gdt(int slotoff, void *base, size_t limit, int type, int dpl, int gran) 208 { 209 struct sys_segment_descriptor sd; 210 211 set_sys_segment(&sd, base, limit, type, dpl, gran); 212 213 memcpy(&gdt64_start + slotoff, &sd, sizeof(sd)); 214 } 215 216 static void init_tss(void) 217 { 218 memset(&smallkern_tss, 0, sizeof(smallkern_tss)); 219 #ifdef __NetBSD__ 220 smallkern_tss.tss_ist[0] = (uintptr_t)(&faultstack[PAGE_SIZE-1]) & ~0xf; 221 #else /* DragonFly */ 222 smallkern_tss.tss_ist1 = (uintptr_t)(&faultstack[PAGE_SIZE-1]) & ~0xf; 223 #endif 224 225 set_sys_gdt(SMALLKERN_GDT_TSS_OFF, &smallkern_tss, 226 sizeof(struct x86_64_tss) - 1, SDT_SYS386TSS, SEL_KPL, 0); 227 } 228 229 static void init_idt(void) 230 { 231 struct region_descriptor region; 232 struct gate_descriptor *idt; 233 size_t i; 234 235 idt = (struct gate_descriptor *)&idtstore; 236 for (i = 0; i < NCPUIDT; i++) { 237 setgate(&idt[i], x86_exceptions[i], 0, SDT_SYS386IGT, 238 SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 239 } 240 for (i = NCPUIDT; i < 256; i++) { 241 setgate(&idt[i], &Xintr, 0, SDT_SYS386IGT, 242 SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 243 } 244 245 setregion(®ion, &idtstore, PAGE_SIZE - 1); 246 lidt(®ion); 247 } 248 249 /* -------------------------------------------------------------------------- */ 250 251 /* 252 * Main entry point of the kernel. 253 */ 254 void 255 main(paddr_t pa_start __unused) 256 { 257 u_int descs[4]; 258 uint32_t *reg, val; 259 260 print_banner(); 261 262 /* 263 * Init the TSS and IDT. We mostly don't care about this, they are just 264 * here to properly handle traps. 265 */ 266 init_tss(); 267 print_state(true, "TSS created"); 268 init_idt(); 269 print_state(true, "IDT created"); 270 271 /* Reset CR8. */ 272 lcr8(0); 273 274 /* Enable FPU. */ 275 clts(); 276 277 /* Enable interrupts. */ 278 sti(); 279 280 /* Ensure APICBASE is correct (default). */ 281 if ((rdmsr(MSR_APICBASE) & APICBASE_PHYSADDR) == 0xfee00000) { 282 print_state(true, "APICBASE is correct"); 283 } else { 284 print_state(false, "wrong APICBASE"); 285 } 286 287 /* Ensure PG_NX is disabled. */ 288 if (!nox_flag) { 289 print_state(true, "PG_NX is disabled"); 290 } else { 291 print_state(false, "PG_NX is enabled!"); 292 } 293 294 /* Ensure we are on cpu120. */ 295 cpuid(1, 0, descs); 296 if (__SHIFTOUT(descs[1], CPUID_LOCAL_APIC_ID) == 120) { 297 print_state(true, "Running on cpu120"); 298 } else { 299 print_state(false, "Not running on cpu120!"); 300 } 301 302 /* Ensure the LAPIC information matches. */ 303 #define LAPIC_ID 0x020 304 # define LAPIC_ID_MASK 0xff000000 305 # define LAPIC_ID_SHIFT 24 306 reg = (uint32_t *)lapicbase; 307 val = reg[LAPIC_ID/4]; 308 if (__SHIFTOUT(val, LAPIC_ID_MASK) == 120) { 309 print_state(true, "LAPIC information matches"); 310 } else { 311 print_state(false, "LAPIC information does not match!"); 312 } 313 314 /* 315 * Will cause a #UD. 316 */ 317 vmmcall(); 318 } 319