1*55e95b16Srtm /* 2*55e95b16Srtm * This file contains definitions for the x86 memory management unit (MMU), 3*55e95b16Srtm * including paging- and segmentation-related data structures and constants, 4*55e95b16Srtm * the %cr0, %cr4, and %eflags registers, and traps. 5*55e95b16Srtm */ 6*55e95b16Srtm 7*55e95b16Srtm /* 8*55e95b16Srtm * 9*55e95b16Srtm * Part 1. Paging data structures and constants. 10*55e95b16Srtm * 11*55e95b16Srtm */ 12*55e95b16Srtm 13*55e95b16Srtm // A linear address 'la' has a three-part structure as follows: 14*55e95b16Srtm // 15*55e95b16Srtm // +--------10------+-------10-------+---------12----------+ 16*55e95b16Srtm // | Page Directory | Page Table | Offset within Page | 17*55e95b16Srtm // | Index | Index | | 18*55e95b16Srtm // +----------------+----------------+---------------------+ 19*55e95b16Srtm // \--- PDX(la) --/ \--- PTX(la) --/ \---- PGOFF(la) ----/ 20*55e95b16Srtm // \----------- PPN(la) -----------/ 21*55e95b16Srtm // 22*55e95b16Srtm // The PDX, PTX, PGOFF, and PPN macros decompose linear addresses as shown. 23*55e95b16Srtm // To construct a linear address la from PDX(la), PTX(la), and PGOFF(la), 24*55e95b16Srtm // use PGADDR(PDX(la), PTX(la), PGOFF(la)). 25*55e95b16Srtm 26*55e95b16Srtm // page number field of address 27*55e95b16Srtm #define PPN(la) (((uintptr_t) (la)) >> PTXSHIFT) 28*55e95b16Srtm #define VPN(la) PPN(la) // used to index into vpt[] 29*55e95b16Srtm 30*55e95b16Srtm // page directory index 31*55e95b16Srtm #define PDX(la) ((((uintptr_t) (la)) >> PDXSHIFT) & 0x3FF) 32*55e95b16Srtm #define VPD(la) PDX(la) // used to index into vpd[] 33*55e95b16Srtm 34*55e95b16Srtm // page table index 35*55e95b16Srtm #define PTX(la) ((((uintptr_t) (la)) >> PTXSHIFT) & 0x3FF) 36*55e95b16Srtm 37*55e95b16Srtm // offset in page 38*55e95b16Srtm #define PGOFF(la) (((uintptr_t) (la)) & 0xFFF) 39*55e95b16Srtm 40*55e95b16Srtm // construct linear address from indexes and offset 41*55e95b16Srtm #define PGADDR(d, t, o) ((void*) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o))) 42*55e95b16Srtm 43*55e95b16Srtm // Page directory and page table constants. 44*55e95b16Srtm #define NPDENTRIES 1024 // page directory entries per page directory 45*55e95b16Srtm #define NPTENTRIES 1024 // page table entries per page table 46*55e95b16Srtm 47*55e95b16Srtm #define PGSIZE 4096 // bytes mapped by a page 48*55e95b16Srtm #define PGSHIFT 12 // log2(PGSIZE) 49*55e95b16Srtm 50*55e95b16Srtm #define PTSIZE (PGSIZE*NPTENTRIES) // bytes mapped by a page directory entry 51*55e95b16Srtm #define PTSHIFT 22 // log2(PTSIZE) 52*55e95b16Srtm 53*55e95b16Srtm #define PTXSHIFT 12 // offset of PTX in a linear address 54*55e95b16Srtm #define PDXSHIFT 22 // offset of PDX in a linear address 55*55e95b16Srtm 56*55e95b16Srtm // Page table/directory entry flags. 57*55e95b16Srtm #define PTE_P 0x001 // Present 58*55e95b16Srtm #define PTE_W 0x002 // Writeable 59*55e95b16Srtm #define PTE_U 0x004 // User 60*55e95b16Srtm #define PTE_PWT 0x008 // Write-Through 61*55e95b16Srtm #define PTE_PCD 0x010 // Cache-Disable 62*55e95b16Srtm #define PTE_A 0x020 // Accessed 63*55e95b16Srtm #define PTE_D 0x040 // Dirty 64*55e95b16Srtm #define PTE_PS 0x080 // Page Size 65*55e95b16Srtm #define PTE_MBZ 0x180 // Bits must be zero 66*55e95b16Srtm 67*55e95b16Srtm // The PTE_AVAIL bits aren't used by the kernel or interpreted by the 68*55e95b16Srtm // hardware, so user processes are allowed to set them arbitrarily. 69*55e95b16Srtm #define PTE_AVAIL 0xE00 // Available for software use 70*55e95b16Srtm 71*55e95b16Srtm // Only flags in PTE_USER may be used in system calls. 72*55e95b16Srtm #define PTE_USER (PTE_AVAIL | PTE_P | PTE_W | PTE_U) 73*55e95b16Srtm 74*55e95b16Srtm // address in page table entry 75*55e95b16Srtm #define PTE_ADDR(pte) ((physaddr_t) (pte) & ~0xFFF) 76*55e95b16Srtm 77*55e95b16Srtm // Control Register flags 78*55e95b16Srtm #define CR0_PE 0x00000001 // Protection Enable 79*55e95b16Srtm #define CR0_MP 0x00000002 // Monitor coProcessor 80*55e95b16Srtm #define CR0_EM 0x00000004 // Emulation 81*55e95b16Srtm #define CR0_TS 0x00000008 // Task Switched 82*55e95b16Srtm #define CR0_ET 0x00000010 // Extension Type 83*55e95b16Srtm #define CR0_NE 0x00000020 // Numeric Errror 84*55e95b16Srtm #define CR0_WP 0x00010000 // Write Protect 85*55e95b16Srtm #define CR0_AM 0x00040000 // Alignment Mask 86*55e95b16Srtm #define CR0_NW 0x20000000 // Not Writethrough 87*55e95b16Srtm #define CR0_CD 0x40000000 // Cache Disable 88*55e95b16Srtm #define CR0_PG 0x80000000 // Paging 89*55e95b16Srtm 90*55e95b16Srtm #define CR4_PCE 0x00000100 // Performance counter enable 91*55e95b16Srtm #define CR4_MCE 0x00000040 // Machine Check Enable 92*55e95b16Srtm #define CR4_PSE 0x00000010 // Page Size Extensions 93*55e95b16Srtm #define CR4_DE 0x00000008 // Debugging Extensions 94*55e95b16Srtm #define CR4_TSD 0x00000004 // Time Stamp Disable 95*55e95b16Srtm #define CR4_PVI 0x00000002 // Protected-Mode Virtual Interrupts 96*55e95b16Srtm #define CR4_VME 0x00000001 // V86 Mode Extensions 97*55e95b16Srtm 98*55e95b16Srtm // Eflags register 99*55e95b16Srtm #define FL_CF 0x00000001 // Carry Flag 100*55e95b16Srtm #define FL_PF 0x00000004 // Parity Flag 101*55e95b16Srtm #define FL_AF 0x00000010 // Auxiliary carry Flag 102*55e95b16Srtm #define FL_ZF 0x00000040 // Zero Flag 103*55e95b16Srtm #define FL_SF 0x00000080 // Sign Flag 104*55e95b16Srtm #define FL_TF 0x00000100 // Trap Flag 105*55e95b16Srtm #define FL_IF 0x00000200 // Interrupt Flag 106*55e95b16Srtm #define FL_DF 0x00000400 // Direction Flag 107*55e95b16Srtm #define FL_OF 0x00000800 // Overflow Flag 108*55e95b16Srtm #define FL_IOPL_MASK 0x00003000 // I/O Privilege Level bitmask 109*55e95b16Srtm #define FL_IOPL_0 0x00000000 // IOPL == 0 110*55e95b16Srtm #define FL_IOPL_1 0x00001000 // IOPL == 1 111*55e95b16Srtm #define FL_IOPL_2 0x00002000 // IOPL == 2 112*55e95b16Srtm #define FL_IOPL_3 0x00003000 // IOPL == 3 113*55e95b16Srtm #define FL_NT 0x00004000 // Nested Task 114*55e95b16Srtm #define FL_RF 0x00010000 // Resume Flag 115*55e95b16Srtm #define FL_VM 0x00020000 // Virtual 8086 mode 116*55e95b16Srtm #define FL_AC 0x00040000 // Alignment Check 117*55e95b16Srtm #define FL_VIF 0x00080000 // Virtual Interrupt Flag 118*55e95b16Srtm #define FL_VIP 0x00100000 // Virtual Interrupt Pending 119*55e95b16Srtm #define FL_ID 0x00200000 // ID flag 120*55e95b16Srtm 121*55e95b16Srtm // Page fault error codes 122*55e95b16Srtm #define FEC_PR 0x1 // Page fault caused by protection violation 123*55e95b16Srtm #define FEC_WR 0x2 // Page fault caused by a write 124*55e95b16Srtm #define FEC_U 0x4 // Page fault occured while in user mode 125*55e95b16Srtm 126*55e95b16Srtm 127*55e95b16Srtm /* 128*55e95b16Srtm * 129*55e95b16Srtm * Part 2. Segmentation data structures and constants. 130*55e95b16Srtm * 131*55e95b16Srtm */ 132*55e95b16Srtm 133*55e95b16Srtm #ifdef __ASSEMBLER__ 134*55e95b16Srtm 135*55e95b16Srtm /* 136*55e95b16Srtm * Macros to build GDT entries in assembly. 137*55e95b16Srtm */ 138*55e95b16Srtm #define SEG_NULL \ 139*55e95b16Srtm .word 0, 0; \ 140*55e95b16Srtm .byte 0, 0, 0, 0 141*55e95b16Srtm #define SEG(type,base,lim) \ 142*55e95b16Srtm .word (((lim) >> 12) & 0xffff), ((base) & 0xffff); \ 143*55e95b16Srtm .byte (((base) >> 16) & 0xff), (0x90 | (type)), \ 144*55e95b16Srtm (0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff) 145*55e95b16Srtm 146*55e95b16Srtm #else // not __ASSEMBLER__ 147*55e95b16Srtm 148*55e95b16Srtm // Segment Descriptors 149*55e95b16Srtm struct Segdesc { 150*55e95b16Srtm unsigned sd_lim_15_0 : 16; // Low bits of segment limit 151*55e95b16Srtm unsigned sd_base_15_0 : 16; // Low bits of segment base address 152*55e95b16Srtm unsigned sd_base_23_16 : 8; // Middle bits of segment base address 153*55e95b16Srtm unsigned sd_type : 4; // Segment type (see STS_ constants) 154*55e95b16Srtm unsigned sd_s : 1; // 0 = system, 1 = application 155*55e95b16Srtm unsigned sd_dpl : 2; // Descriptor Privilege Level 156*55e95b16Srtm unsigned sd_p : 1; // Present 157*55e95b16Srtm unsigned sd_lim_19_16 : 4; // High bits of segment limit 158*55e95b16Srtm unsigned sd_avl : 1; // Unused (available for software use) 159*55e95b16Srtm unsigned sd_rsv1 : 1; // Reserved 160*55e95b16Srtm unsigned sd_db : 1; // 0 = 16-bit segment, 1 = 32-bit segment 161*55e95b16Srtm unsigned sd_g : 1; // Granularity: limit scaled by 4K when set 162*55e95b16Srtm unsigned sd_base_31_24 : 8; // High bits of segment base address 163*55e95b16Srtm }; 164*55e95b16Srtm // Null segment 165*55e95b16Srtm #define SEG_NULL (struct Segdesc){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } 166*55e95b16Srtm // Segment that is loadable but faults when used 167*55e95b16Srtm #define SEG_FAULT (struct Segdesc){ 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0 } 168*55e95b16Srtm // Normal segment 169*55e95b16Srtm #define SEG(type, base, lim, dpl) (struct Segdesc) \ 170*55e95b16Srtm { ((lim) >> 12) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff, \ 171*55e95b16Srtm type, 1, dpl, 1, (unsigned) (lim) >> 28, 0, 0, 1, 1, \ 172*55e95b16Srtm (unsigned) (base) >> 24 } 173*55e95b16Srtm #define SEG16(type, base, lim, dpl) (struct Segdesc) \ 174*55e95b16Srtm { (lim) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff, \ 175*55e95b16Srtm type, 1, dpl, 1, (unsigned) (lim) >> 16, 0, 0, 1, 0, \ 176*55e95b16Srtm (unsigned) (base) >> 24 } 177*55e95b16Srtm 178*55e95b16Srtm #endif /* !__ASSEMBLER__ */ 179*55e95b16Srtm 180*55e95b16Srtm // Application segment type bits 181*55e95b16Srtm #define STA_X 0x8 // Executable segment 182*55e95b16Srtm #define STA_E 0x4 // Expand down (non-executable segments) 183*55e95b16Srtm #define STA_C 0x4 // Conforming code segment (executable only) 184*55e95b16Srtm #define STA_W 0x2 // Writeable (non-executable segments) 185*55e95b16Srtm #define STA_R 0x2 // Readable (executable segments) 186*55e95b16Srtm #define STA_A 0x1 // Accessed 187*55e95b16Srtm 188*55e95b16Srtm // System segment type bits 189*55e95b16Srtm #define STS_T16A 0x1 // Available 16-bit TSS 190*55e95b16Srtm #define STS_LDT 0x2 // Local Descriptor Table 191*55e95b16Srtm #define STS_T16B 0x3 // Busy 16-bit TSS 192*55e95b16Srtm #define STS_CG16 0x4 // 16-bit Call Gate 193*55e95b16Srtm #define STS_TG 0x5 // Task Gate / Coum Transmitions 194*55e95b16Srtm #define STS_IG16 0x6 // 16-bit Interrupt Gate 195*55e95b16Srtm #define STS_TG16 0x7 // 16-bit Trap Gate 196*55e95b16Srtm #define STS_T32A 0x9 // Available 32-bit TSS 197*55e95b16Srtm #define STS_T32B 0xB // Busy 32-bit TSS 198*55e95b16Srtm #define STS_CG32 0xC // 32-bit Call Gate 199*55e95b16Srtm #define STS_IG32 0xE // 32-bit Interrupt Gate 200*55e95b16Srtm #define STS_TG32 0xF // 32-bit Trap Gate 201*55e95b16Srtm 202*55e95b16Srtm 203*55e95b16Srtm /* 204*55e95b16Srtm * 205*55e95b16Srtm * Part 3. Traps. 206*55e95b16Srtm * 207*55e95b16Srtm */ 208*55e95b16Srtm 209*55e95b16Srtm #ifndef __ASSEMBLER__ 210*55e95b16Srtm 211*55e95b16Srtm // Task state segment format (as described by the Pentium architecture book) 212*55e95b16Srtm struct Taskstate { 213*55e95b16Srtm uint32_t ts_link; // Old ts selector 214*55e95b16Srtm uintptr_t ts_esp0; // Stack pointers and segment selectors 215*55e95b16Srtm uint16_t ts_ss0; // after an increase in privilege level 216*55e95b16Srtm uint16_t ts_padding1; 217*55e95b16Srtm uintptr_t ts_esp1; 218*55e95b16Srtm uint16_t ts_ss1; 219*55e95b16Srtm uint16_t ts_padding2; 220*55e95b16Srtm uintptr_t ts_esp2; 221*55e95b16Srtm uint16_t ts_ss2; 222*55e95b16Srtm uint16_t ts_padding3; 223*55e95b16Srtm physaddr_t ts_cr3; // Page directory base 224*55e95b16Srtm uintptr_t ts_eip; // Saved state from last task switch 225*55e95b16Srtm uint32_t ts_eflags; 226*55e95b16Srtm uint32_t ts_eax; // More saved state (registers) 227*55e95b16Srtm uint32_t ts_ecx; 228*55e95b16Srtm uint32_t ts_edx; 229*55e95b16Srtm uint32_t ts_ebx; 230*55e95b16Srtm uintptr_t ts_esp; 231*55e95b16Srtm uintptr_t ts_ebp; 232*55e95b16Srtm uint32_t ts_esi; 233*55e95b16Srtm uint32_t ts_edi; 234*55e95b16Srtm uint16_t ts_es; // Even more saved state (segment selectors) 235*55e95b16Srtm uint16_t ts_padding4; 236*55e95b16Srtm uint16_t ts_cs; 237*55e95b16Srtm uint16_t ts_padding5; 238*55e95b16Srtm uint16_t ts_ss; 239*55e95b16Srtm uint16_t ts_padding6; 240*55e95b16Srtm uint16_t ts_ds; 241*55e95b16Srtm uint16_t ts_padding7; 242*55e95b16Srtm uint16_t ts_fs; 243*55e95b16Srtm uint16_t ts_padding8; 244*55e95b16Srtm uint16_t ts_gs; 245*55e95b16Srtm uint16_t ts_padding9; 246*55e95b16Srtm uint16_t ts_ldt; 247*55e95b16Srtm uint16_t ts_padding10; 248*55e95b16Srtm uint16_t ts_t; // Trap on task switch 249*55e95b16Srtm uint16_t ts_iomb; // I/O map base address 250*55e95b16Srtm }; 251*55e95b16Srtm 252*55e95b16Srtm // Gate descriptors for interrupts and traps 253*55e95b16Srtm struct Gatedesc { 254*55e95b16Srtm unsigned gd_off_15_0 : 16; // low 16 bits of offset in segment 255*55e95b16Srtm unsigned gd_ss : 16; // segment selector 256*55e95b16Srtm unsigned gd_args : 5; // # args, 0 for interrupt/trap gates 257*55e95b16Srtm unsigned gd_rsv1 : 3; // reserved(should be zero I guess) 258*55e95b16Srtm unsigned gd_type : 4; // type(STS_{TG,IG32,TG32}) 259*55e95b16Srtm unsigned gd_s : 1; // must be 0 (system) 260*55e95b16Srtm unsigned gd_dpl : 2; // descriptor(meaning new) privilege level 261*55e95b16Srtm unsigned gd_p : 1; // Present 262*55e95b16Srtm unsigned gd_off_31_16 : 16; // high bits of offset in segment 263*55e95b16Srtm }; 264*55e95b16Srtm 265*55e95b16Srtm // Set up a normal interrupt/trap gate descriptor. 266*55e95b16Srtm // - istrap: 1 for a trap (= exception) gate, 0 for an interrupt gate. 267*55e95b16Srtm // - sel: Code segment selector for interrupt/trap handler 268*55e95b16Srtm // - off: Offset in code segment for interrupt/trap handler 269*55e95b16Srtm // - dpl: Descriptor Privilege Level - 270*55e95b16Srtm // the privilege level required for software to invoke 271*55e95b16Srtm // this interrupt/trap gate explicitly using an int instruction. 272*55e95b16Srtm #define SETGATE(gate, istrap, sel, off, dpl) \ 273*55e95b16Srtm { \ 274*55e95b16Srtm (gate).gd_off_15_0 = (uint32_t) (off) & 0xffff; \ 275*55e95b16Srtm (gate).gd_ss = (sel); \ 276*55e95b16Srtm (gate).gd_args = 0; \ 277*55e95b16Srtm (gate).gd_rsv1 = 0; \ 278*55e95b16Srtm (gate).gd_type = (istrap) ? STS_TG32 : STS_IG32; \ 279*55e95b16Srtm (gate).gd_s = 0; \ 280*55e95b16Srtm (gate).gd_dpl = (dpl); \ 281*55e95b16Srtm (gate).gd_p = 1; \ 282*55e95b16Srtm (gate).gd_off_31_16 = (uint32_t) (off) >> 16; \ 283*55e95b16Srtm } 284*55e95b16Srtm 285*55e95b16Srtm // Set up a call gate descriptor. 286*55e95b16Srtm #define SETCALLGATE(gate, ss, off, dpl) \ 287*55e95b16Srtm { \ 288*55e95b16Srtm (gate).gd_off_15_0 = (uint32_t) (off) & 0xffff; \ 289*55e95b16Srtm (gate).gd_ss = (ss); \ 290*55e95b16Srtm (gate).gd_args = 0; \ 291*55e95b16Srtm (gate).gd_rsv1 = 0; \ 292*55e95b16Srtm (gate).gd_type = STS_CG32; \ 293*55e95b16Srtm (gate).gd_s = 0; \ 294*55e95b16Srtm (gate).gd_dpl = (dpl); \ 295*55e95b16Srtm (gate).gd_p = 1; \ 296*55e95b16Srtm (gate).gd_off_31_16 = (uint32_t) (off) >> 16; \ 297*55e95b16Srtm } 298*55e95b16Srtm 299*55e95b16Srtm // Pseudo-descriptors used for LGDT, LLDT and LIDT instructions. 300*55e95b16Srtm struct Pseudodesc { 301*55e95b16Srtm uint16_t pd__garbage; // LGDT supposed to be from address 4N+2 302*55e95b16Srtm uint16_t pd_lim; // Limit 303*55e95b16Srtm uint32_t pd_base __attribute__ ((packed)); // Base address 304*55e95b16Srtm }; 305*55e95b16Srtm #define PD_ADDR(desc) (&(desc).pd_lim) 306*55e95b16Srtm 307*55e95b16Srtm #endif /* !__ASSEMBLER__ */ 308*55e95b16Srtm 309