155e95b16Srtm /* 255e95b16Srtm * This file contains definitions for the x86 memory management unit (MMU), 355e95b16Srtm * including paging- and segmentation-related data structures and constants, 455e95b16Srtm * the %cr0, %cr4, and %eflags registers, and traps. 555e95b16Srtm */ 655e95b16Srtm 755e95b16Srtm /* 855e95b16Srtm * 955e95b16Srtm * Part 1. Paging data structures and constants. 1055e95b16Srtm * 1155e95b16Srtm */ 1255e95b16Srtm 1355e95b16Srtm // A linear address 'la' has a three-part structure as follows: 1455e95b16Srtm // 1555e95b16Srtm // +--------10------+-------10-------+---------12----------+ 1655e95b16Srtm // | Page Directory | Page Table | Offset within Page | 1755e95b16Srtm // | Index | Index | | 1855e95b16Srtm // +----------------+----------------+---------------------+ 1955e95b16Srtm // \--- PDX(la) --/ \--- PTX(la) --/ \---- PGOFF(la) ----/ 2055e95b16Srtm // \----------- PPN(la) -----------/ 2155e95b16Srtm // 2255e95b16Srtm // The PDX, PTX, PGOFF, and PPN macros decompose linear addresses as shown. 2355e95b16Srtm // To construct a linear address la from PDX(la), PTX(la), and PGOFF(la), 2455e95b16Srtm // use PGADDR(PDX(la), PTX(la), PGOFF(la)). 2555e95b16Srtm 2655e95b16Srtm // page number field of address 2755e95b16Srtm #define PPN(la) (((uintptr_t) (la)) >> PTXSHIFT) 2855e95b16Srtm #define VPN(la) PPN(la) // used to index into vpt[] 2955e95b16Srtm 3055e95b16Srtm // page directory index 3155e95b16Srtm #define PDX(la) ((((uintptr_t) (la)) >> PDXSHIFT) & 0x3FF) 3255e95b16Srtm #define VPD(la) PDX(la) // used to index into vpd[] 3355e95b16Srtm 3455e95b16Srtm // page table index 3555e95b16Srtm #define PTX(la) ((((uintptr_t) (la)) >> PTXSHIFT) & 0x3FF) 3655e95b16Srtm 3755e95b16Srtm // offset in page 3855e95b16Srtm #define PGOFF(la) (((uintptr_t) (la)) & 0xFFF) 3955e95b16Srtm 4055e95b16Srtm // construct linear address from indexes and offset 4155e95b16Srtm #define PGADDR(d, t, o) ((void*) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o))) 4255e95b16Srtm 4355e95b16Srtm // Page directory and page table constants. 4455e95b16Srtm #define NPDENTRIES 1024 // page directory entries per page directory 4555e95b16Srtm #define NPTENTRIES 1024 // page table entries per page table 4655e95b16Srtm 4755e95b16Srtm #define PGSIZE 4096 // bytes mapped by a page 4855e95b16Srtm #define PGSHIFT 12 // log2(PGSIZE) 4955e95b16Srtm 5055e95b16Srtm #define PTSIZE (PGSIZE*NPTENTRIES) // bytes mapped by a page directory entry 5155e95b16Srtm #define PTSHIFT 22 // log2(PTSIZE) 5255e95b16Srtm 5355e95b16Srtm #define PTXSHIFT 12 // offset of PTX in a linear address 5455e95b16Srtm #define PDXSHIFT 22 // offset of PDX in a linear address 5555e95b16Srtm 5655e95b16Srtm // Page table/directory entry flags. 5755e95b16Srtm #define PTE_P 0x001 // Present 5855e95b16Srtm #define PTE_W 0x002 // Writeable 5955e95b16Srtm #define PTE_U 0x004 // User 6055e95b16Srtm #define PTE_PWT 0x008 // Write-Through 6155e95b16Srtm #define PTE_PCD 0x010 // Cache-Disable 6255e95b16Srtm #define PTE_A 0x020 // Accessed 6355e95b16Srtm #define PTE_D 0x040 // Dirty 6455e95b16Srtm #define PTE_PS 0x080 // Page Size 6555e95b16Srtm #define PTE_MBZ 0x180 // Bits must be zero 6655e95b16Srtm 6755e95b16Srtm // The PTE_AVAIL bits aren't used by the kernel or interpreted by the 6855e95b16Srtm // hardware, so user processes are allowed to set them arbitrarily. 6955e95b16Srtm #define PTE_AVAIL 0xE00 // Available for software use 7055e95b16Srtm 7155e95b16Srtm // Only flags in PTE_USER may be used in system calls. 7255e95b16Srtm #define PTE_USER (PTE_AVAIL | PTE_P | PTE_W | PTE_U) 7355e95b16Srtm 7455e95b16Srtm // address in page table entry 7555e95b16Srtm #define PTE_ADDR(pte) ((physaddr_t) (pte) & ~0xFFF) 7655e95b16Srtm 7755e95b16Srtm // Control Register flags 7855e95b16Srtm #define CR0_PE 0x00000001 // Protection Enable 7955e95b16Srtm #define CR0_MP 0x00000002 // Monitor coProcessor 8055e95b16Srtm #define CR0_EM 0x00000004 // Emulation 8155e95b16Srtm #define CR0_TS 0x00000008 // Task Switched 8255e95b16Srtm #define CR0_ET 0x00000010 // Extension Type 8355e95b16Srtm #define CR0_NE 0x00000020 // Numeric Errror 8455e95b16Srtm #define CR0_WP 0x00010000 // Write Protect 8555e95b16Srtm #define CR0_AM 0x00040000 // Alignment Mask 8655e95b16Srtm #define CR0_NW 0x20000000 // Not Writethrough 8755e95b16Srtm #define CR0_CD 0x40000000 // Cache Disable 8855e95b16Srtm #define CR0_PG 0x80000000 // Paging 8955e95b16Srtm 9055e95b16Srtm #define CR4_PCE 0x00000100 // Performance counter enable 9155e95b16Srtm #define CR4_MCE 0x00000040 // Machine Check Enable 9255e95b16Srtm #define CR4_PSE 0x00000010 // Page Size Extensions 9355e95b16Srtm #define CR4_DE 0x00000008 // Debugging Extensions 9455e95b16Srtm #define CR4_TSD 0x00000004 // Time Stamp Disable 9555e95b16Srtm #define CR4_PVI 0x00000002 // Protected-Mode Virtual Interrupts 9655e95b16Srtm #define CR4_VME 0x00000001 // V86 Mode Extensions 9755e95b16Srtm 9855e95b16Srtm // Eflags register 9955e95b16Srtm #define FL_CF 0x00000001 // Carry Flag 10055e95b16Srtm #define FL_PF 0x00000004 // Parity Flag 10155e95b16Srtm #define FL_AF 0x00000010 // Auxiliary carry Flag 10255e95b16Srtm #define FL_ZF 0x00000040 // Zero Flag 10355e95b16Srtm #define FL_SF 0x00000080 // Sign Flag 10455e95b16Srtm #define FL_TF 0x00000100 // Trap Flag 10555e95b16Srtm #define FL_IF 0x00000200 // Interrupt Flag 10655e95b16Srtm #define FL_DF 0x00000400 // Direction Flag 10755e95b16Srtm #define FL_OF 0x00000800 // Overflow Flag 10855e95b16Srtm #define FL_IOPL_MASK 0x00003000 // I/O Privilege Level bitmask 10955e95b16Srtm #define FL_IOPL_0 0x00000000 // IOPL == 0 11055e95b16Srtm #define FL_IOPL_1 0x00001000 // IOPL == 1 11155e95b16Srtm #define FL_IOPL_2 0x00002000 // IOPL == 2 11255e95b16Srtm #define FL_IOPL_3 0x00003000 // IOPL == 3 11355e95b16Srtm #define FL_NT 0x00004000 // Nested Task 11455e95b16Srtm #define FL_RF 0x00010000 // Resume Flag 11555e95b16Srtm #define FL_VM 0x00020000 // Virtual 8086 mode 11655e95b16Srtm #define FL_AC 0x00040000 // Alignment Check 11755e95b16Srtm #define FL_VIF 0x00080000 // Virtual Interrupt Flag 11855e95b16Srtm #define FL_VIP 0x00100000 // Virtual Interrupt Pending 11955e95b16Srtm #define FL_ID 0x00200000 // ID flag 12055e95b16Srtm 12155e95b16Srtm // Page fault error codes 12255e95b16Srtm #define FEC_PR 0x1 // Page fault caused by protection violation 12355e95b16Srtm #define FEC_WR 0x2 // Page fault caused by a write 12455e95b16Srtm #define FEC_U 0x4 // Page fault occured while in user mode 12555e95b16Srtm 12655e95b16Srtm 12755e95b16Srtm /* 12855e95b16Srtm * 12955e95b16Srtm * Part 2. Segmentation data structures and constants. 13055e95b16Srtm * 13155e95b16Srtm */ 13255e95b16Srtm 13355e95b16Srtm #ifdef __ASSEMBLER__ 13455e95b16Srtm 13555e95b16Srtm /* 13655e95b16Srtm * Macros to build GDT entries in assembly. 13755e95b16Srtm */ 13855e95b16Srtm #define SEG_NULL \ 13955e95b16Srtm .word 0, 0; \ 14055e95b16Srtm .byte 0, 0, 0, 0 14155e95b16Srtm #define SEG(type,base,lim) \ 14255e95b16Srtm .word (((lim) >> 12) & 0xffff), ((base) & 0xffff); \ 14355e95b16Srtm .byte (((base) >> 16) & 0xff), (0x90 | (type)), \ 14455e95b16Srtm (0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff) 14555e95b16Srtm 14655e95b16Srtm #else // not __ASSEMBLER__ 14755e95b16Srtm 14855e95b16Srtm // Segment Descriptors 14955e95b16Srtm struct Segdesc { 150*ef2bd07aSrsc unsigned lim_15_0 : 16; // Low bits of segment limit 151*ef2bd07aSrsc unsigned base_15_0 : 16; // Low bits of segment base address 152*ef2bd07aSrsc unsigned base_23_16 : 8; // Middle bits of segment base address 153*ef2bd07aSrsc unsigned type : 4; // Segment type (see STS_ constants) 154*ef2bd07aSrsc unsigned s : 1; // 0 = system, 1 = application 155*ef2bd07aSrsc unsigned dpl : 2; // Descriptor Privilege Level 156*ef2bd07aSrsc unsigned p : 1; // Present 157*ef2bd07aSrsc unsigned lim_19_16 : 4; // High bits of segment limit 158*ef2bd07aSrsc unsigned avl : 1; // Unused (available for software use) 159*ef2bd07aSrsc unsigned rsv1 : 1; // Reserved 160*ef2bd07aSrsc unsigned db : 1; // 0 = 16-bit segment, 1 = 32-bit segment 161*ef2bd07aSrsc unsigned g : 1; // Granularity: limit scaled by 4K when set 162*ef2bd07aSrsc unsigned base_31_24 : 8; // High bits of segment base address 16355e95b16Srtm }; 16455e95b16Srtm // Null segment 16555e95b16Srtm #define SEG_NULL (struct Segdesc){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } 16655e95b16Srtm // Segment that is loadable but faults when used 16755e95b16Srtm #define SEG_FAULT (struct Segdesc){ 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0 } 16855e95b16Srtm // Normal segment 16955e95b16Srtm #define SEG(type, base, lim, dpl) (struct Segdesc) \ 17055e95b16Srtm { ((lim) >> 12) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff, \ 17155e95b16Srtm type, 1, dpl, 1, (unsigned) (lim) >> 28, 0, 0, 1, 1, \ 17255e95b16Srtm (unsigned) (base) >> 24 } 17355e95b16Srtm #define SEG16(type, base, lim, dpl) (struct Segdesc) \ 17455e95b16Srtm { (lim) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff, \ 17555e95b16Srtm type, 1, dpl, 1, (unsigned) (lim) >> 16, 0, 0, 1, 0, \ 17655e95b16Srtm (unsigned) (base) >> 24 } 17755e95b16Srtm 17855e95b16Srtm #endif /* !__ASSEMBLER__ */ 17955e95b16Srtm 18055e95b16Srtm // Application segment type bits 18155e95b16Srtm #define STA_X 0x8 // Executable segment 18255e95b16Srtm #define STA_E 0x4 // Expand down (non-executable segments) 18355e95b16Srtm #define STA_C 0x4 // Conforming code segment (executable only) 18455e95b16Srtm #define STA_W 0x2 // Writeable (non-executable segments) 18555e95b16Srtm #define STA_R 0x2 // Readable (executable segments) 18655e95b16Srtm #define STA_A 0x1 // Accessed 18755e95b16Srtm 18855e95b16Srtm // System segment type bits 18955e95b16Srtm #define STS_T16A 0x1 // Available 16-bit TSS 19055e95b16Srtm #define STS_LDT 0x2 // Local Descriptor Table 19155e95b16Srtm #define STS_T16B 0x3 // Busy 16-bit TSS 19255e95b16Srtm #define STS_CG16 0x4 // 16-bit Call Gate 19355e95b16Srtm #define STS_TG 0x5 // Task Gate / Coum Transmitions 19455e95b16Srtm #define STS_IG16 0x6 // 16-bit Interrupt Gate 19555e95b16Srtm #define STS_TG16 0x7 // 16-bit Trap Gate 19655e95b16Srtm #define STS_T32A 0x9 // Available 32-bit TSS 19755e95b16Srtm #define STS_T32B 0xB // Busy 32-bit TSS 19855e95b16Srtm #define STS_CG32 0xC // 32-bit Call Gate 19955e95b16Srtm #define STS_IG32 0xE // 32-bit Interrupt Gate 20055e95b16Srtm #define STS_TG32 0xF // 32-bit Trap Gate 20155e95b16Srtm 20255e95b16Srtm 20355e95b16Srtm /* 20455e95b16Srtm * 20555e95b16Srtm * Part 3. Traps. 20655e95b16Srtm * 20755e95b16Srtm */ 20855e95b16Srtm 20955e95b16Srtm #ifndef __ASSEMBLER__ 21055e95b16Srtm 21155e95b16Srtm // Task state segment format (as described by the Pentium architecture book) 21255e95b16Srtm struct Taskstate { 213*ef2bd07aSrsc uint32_t link; // Old ts selector 214*ef2bd07aSrsc uintptr_t esp0; // Stack pointers and segment selectors 215*ef2bd07aSrsc uint16_t ss0; // after an increase in privilege level 216*ef2bd07aSrsc uint16_t padding1; 217*ef2bd07aSrsc uintptr_t esp1; 218*ef2bd07aSrsc uint16_t ss1; 219*ef2bd07aSrsc uint16_t padding2; 220*ef2bd07aSrsc uintptr_t esp2; 221*ef2bd07aSrsc uint16_t ss2; 222*ef2bd07aSrsc uint16_t padding3; 223*ef2bd07aSrsc physaddr_t cr3; // Page directory base 224*ef2bd07aSrsc uintptr_t eip; // Saved state from last task switch 225*ef2bd07aSrsc uint32_t eflags; 226*ef2bd07aSrsc uint32_t eax; // More saved state (registers) 227*ef2bd07aSrsc uint32_t ecx; 228*ef2bd07aSrsc uint32_t edx; 229*ef2bd07aSrsc uint32_t ebx; 230*ef2bd07aSrsc uintptr_t esp; 231*ef2bd07aSrsc uintptr_t ebp; 232*ef2bd07aSrsc uint32_t esi; 233*ef2bd07aSrsc uint32_t edi; 234*ef2bd07aSrsc uint16_t es; // Even more saved state (segment selectors) 235*ef2bd07aSrsc uint16_t padding4; 236*ef2bd07aSrsc uint16_t cs; 237*ef2bd07aSrsc uint16_t padding5; 238*ef2bd07aSrsc uint16_t ss; 239*ef2bd07aSrsc uint16_t padding6; 240*ef2bd07aSrsc uint16_t ds; 241*ef2bd07aSrsc uint16_t padding7; 242*ef2bd07aSrsc uint16_t fs; 243*ef2bd07aSrsc uint16_t padding8; 244*ef2bd07aSrsc uint16_t gs; 245*ef2bd07aSrsc uint16_t padding9; 246*ef2bd07aSrsc uint16_t ldt; 247*ef2bd07aSrsc uint16_t padding10; 248*ef2bd07aSrsc uint16_t t; // Trap on task switch 249*ef2bd07aSrsc uint16_t iomb; // I/O map base address 25055e95b16Srtm }; 25155e95b16Srtm 25255e95b16Srtm // Gate descriptors for interrupts and traps 25355e95b16Srtm struct Gatedesc { 254*ef2bd07aSrsc unsigned off_15_0 : 16; // low 16 bits of offset in segment 255*ef2bd07aSrsc unsigned ss : 16; // segment selector 256*ef2bd07aSrsc unsigned args : 5; // # args, 0 for interrupt/trap gates 257*ef2bd07aSrsc unsigned rsv1 : 3; // reserved(should be zero I guess) 258*ef2bd07aSrsc unsigned type : 4; // type(STS_{TG,IG32,TG32}) 259*ef2bd07aSrsc unsigned s : 1; // must be 0 (system) 260*ef2bd07aSrsc unsigned dpl : 2; // descriptor(meaning new) privilege level 261*ef2bd07aSrsc unsigned p : 1; // Present 262*ef2bd07aSrsc unsigned off_31_16 : 16; // high bits of offset in segment 26355e95b16Srtm }; 26455e95b16Srtm 26555e95b16Srtm // Set up a normal interrupt/trap gate descriptor. 26655e95b16Srtm // - istrap: 1 for a trap (= exception) gate, 0 for an interrupt gate. 26755e95b16Srtm // - sel: Code segment selector for interrupt/trap handler 26855e95b16Srtm // - off: Offset in code segment for interrupt/trap handler 26955e95b16Srtm // - dpl: Descriptor Privilege Level - 27055e95b16Srtm // the privilege level required for software to invoke 27155e95b16Srtm // this interrupt/trap gate explicitly using an int instruction. 272*ef2bd07aSrsc #define SETGATE(gate, istrap, sel, off, d) \ 27355e95b16Srtm { \ 274*ef2bd07aSrsc (gate).off_15_0 = (uint32_t) (off) & 0xffff; \ 275*ef2bd07aSrsc (gate).ss = (sel); \ 276*ef2bd07aSrsc (gate).args = 0; \ 277*ef2bd07aSrsc (gate).rsv1 = 0; \ 278*ef2bd07aSrsc (gate).type = (istrap) ? STS_TG32 : STS_IG32; \ 279*ef2bd07aSrsc (gate).s = 0; \ 280*ef2bd07aSrsc (gate).dpl = (d); \ 281*ef2bd07aSrsc (gate).p = 1; \ 282*ef2bd07aSrsc (gate).off_31_16 = (uint32_t) (off) >> 16; \ 28355e95b16Srtm } 28455e95b16Srtm 28555e95b16Srtm // Set up a call gate descriptor. 286*ef2bd07aSrsc #define SETCALLGATE(gate, ss, off, d) \ 28755e95b16Srtm { \ 288*ef2bd07aSrsc (gate).off_15_0 = (uint32_t) (off) & 0xffff; \ 289*ef2bd07aSrsc (gate).ss = (ss); \ 290*ef2bd07aSrsc (gate).args = 0; \ 291*ef2bd07aSrsc (gate).rsv1 = 0; \ 292*ef2bd07aSrsc (gate).type = STS_CG32; \ 293*ef2bd07aSrsc (gate).s = 0; \ 294*ef2bd07aSrsc (gate).dpl = (d); \ 295*ef2bd07aSrsc (gate).p = 1; \ 296*ef2bd07aSrsc (gate).off_31_16 = (uint32_t) (off) >> 16; \ 29755e95b16Srtm } 29855e95b16Srtm 29955e95b16Srtm // Pseudo-descriptors used for LGDT, LLDT and LIDT instructions. 30055e95b16Srtm struct Pseudodesc { 301*ef2bd07aSrsc uint16_t _garbage; // LGDT supposed to be from address 4N+2 302*ef2bd07aSrsc uint16_t lim; // Limit 303*ef2bd07aSrsc uint32_t base __attribute__ ((packed)); // Base address 30455e95b16Srtm }; 30555e95b16Srtm #define PD_ADDR(desc) (&(desc).pd_lim) 30655e95b16Srtm 30755e95b16Srtm #endif /* !__ASSEMBLER__ */ 30855e95b16Srtm 309