xref: /xv6-public/mmu.h (revision ef2bd07a)
155e95b16Srtm /*
255e95b16Srtm  * This file contains definitions for the x86 memory management unit (MMU),
355e95b16Srtm  * including paging- and segmentation-related data structures and constants,
455e95b16Srtm  * the %cr0, %cr4, and %eflags registers, and traps.
555e95b16Srtm  */
655e95b16Srtm 
755e95b16Srtm /*
855e95b16Srtm  *
955e95b16Srtm  *	Part 1.  Paging data structures and constants.
1055e95b16Srtm  *
1155e95b16Srtm  */
1255e95b16Srtm 
1355e95b16Srtm // A linear address 'la' has a three-part structure as follows:
1455e95b16Srtm //
1555e95b16Srtm // +--------10------+-------10-------+---------12----------+
1655e95b16Srtm // | Page Directory |   Page Table   | Offset within Page  |
1755e95b16Srtm // |      Index     |      Index     |                     |
1855e95b16Srtm // +----------------+----------------+---------------------+
1955e95b16Srtm //  \--- PDX(la) --/ \--- PTX(la) --/ \---- PGOFF(la) ----/
2055e95b16Srtm //  \----------- PPN(la) -----------/
2155e95b16Srtm //
2255e95b16Srtm // The PDX, PTX, PGOFF, and PPN macros decompose linear addresses as shown.
2355e95b16Srtm // To construct a linear address la from PDX(la), PTX(la), and PGOFF(la),
2455e95b16Srtm // use PGADDR(PDX(la), PTX(la), PGOFF(la)).
2555e95b16Srtm 
2655e95b16Srtm // page number field of address
2755e95b16Srtm #define PPN(la)		(((uintptr_t) (la)) >> PTXSHIFT)
2855e95b16Srtm #define VPN(la)		PPN(la)		// used to index into vpt[]
2955e95b16Srtm 
3055e95b16Srtm // page directory index
3155e95b16Srtm #define PDX(la)		((((uintptr_t) (la)) >> PDXSHIFT) & 0x3FF)
3255e95b16Srtm #define VPD(la)		PDX(la)		// used to index into vpd[]
3355e95b16Srtm 
3455e95b16Srtm // page table index
3555e95b16Srtm #define PTX(la)		((((uintptr_t) (la)) >> PTXSHIFT) & 0x3FF)
3655e95b16Srtm 
3755e95b16Srtm // offset in page
3855e95b16Srtm #define PGOFF(la)	(((uintptr_t) (la)) & 0xFFF)
3955e95b16Srtm 
4055e95b16Srtm // construct linear address from indexes and offset
4155e95b16Srtm #define PGADDR(d, t, o)	((void*) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o)))
4255e95b16Srtm 
4355e95b16Srtm // Page directory and page table constants.
4455e95b16Srtm #define NPDENTRIES	1024		// page directory entries per page directory
4555e95b16Srtm #define NPTENTRIES	1024		// page table entries per page table
4655e95b16Srtm 
4755e95b16Srtm #define PGSIZE		4096		// bytes mapped by a page
4855e95b16Srtm #define PGSHIFT		12		// log2(PGSIZE)
4955e95b16Srtm 
5055e95b16Srtm #define PTSIZE		(PGSIZE*NPTENTRIES) // bytes mapped by a page directory entry
5155e95b16Srtm #define PTSHIFT		22		// log2(PTSIZE)
5255e95b16Srtm 
5355e95b16Srtm #define PTXSHIFT	12		// offset of PTX in a linear address
5455e95b16Srtm #define PDXSHIFT	22		// offset of PDX in a linear address
5555e95b16Srtm 
5655e95b16Srtm // Page table/directory entry flags.
5755e95b16Srtm #define PTE_P		0x001	// Present
5855e95b16Srtm #define PTE_W		0x002	// Writeable
5955e95b16Srtm #define PTE_U		0x004	// User
6055e95b16Srtm #define PTE_PWT		0x008	// Write-Through
6155e95b16Srtm #define PTE_PCD		0x010	// Cache-Disable
6255e95b16Srtm #define PTE_A		0x020	// Accessed
6355e95b16Srtm #define PTE_D		0x040	// Dirty
6455e95b16Srtm #define PTE_PS		0x080	// Page Size
6555e95b16Srtm #define PTE_MBZ		0x180	// Bits must be zero
6655e95b16Srtm 
6755e95b16Srtm // The PTE_AVAIL bits aren't used by the kernel or interpreted by the
6855e95b16Srtm // hardware, so user processes are allowed to set them arbitrarily.
6955e95b16Srtm #define PTE_AVAIL	0xE00	// Available for software use
7055e95b16Srtm 
7155e95b16Srtm // Only flags in PTE_USER may be used in system calls.
7255e95b16Srtm #define PTE_USER	(PTE_AVAIL | PTE_P | PTE_W | PTE_U)
7355e95b16Srtm 
7455e95b16Srtm // address in page table entry
7555e95b16Srtm #define PTE_ADDR(pte)	((physaddr_t) (pte) & ~0xFFF)
7655e95b16Srtm 
7755e95b16Srtm // Control Register flags
7855e95b16Srtm #define CR0_PE		0x00000001	// Protection Enable
7955e95b16Srtm #define CR0_MP		0x00000002	// Monitor coProcessor
8055e95b16Srtm #define CR0_EM		0x00000004	// Emulation
8155e95b16Srtm #define CR0_TS		0x00000008	// Task Switched
8255e95b16Srtm #define CR0_ET		0x00000010	// Extension Type
8355e95b16Srtm #define CR0_NE		0x00000020	// Numeric Errror
8455e95b16Srtm #define CR0_WP		0x00010000	// Write Protect
8555e95b16Srtm #define CR0_AM		0x00040000	// Alignment Mask
8655e95b16Srtm #define CR0_NW		0x20000000	// Not Writethrough
8755e95b16Srtm #define CR0_CD		0x40000000	// Cache Disable
8855e95b16Srtm #define CR0_PG		0x80000000	// Paging
8955e95b16Srtm 
9055e95b16Srtm #define CR4_PCE		0x00000100	// Performance counter enable
9155e95b16Srtm #define CR4_MCE		0x00000040	// Machine Check Enable
9255e95b16Srtm #define CR4_PSE		0x00000010	// Page Size Extensions
9355e95b16Srtm #define CR4_DE		0x00000008	// Debugging Extensions
9455e95b16Srtm #define CR4_TSD		0x00000004	// Time Stamp Disable
9555e95b16Srtm #define CR4_PVI		0x00000002	// Protected-Mode Virtual Interrupts
9655e95b16Srtm #define CR4_VME		0x00000001	// V86 Mode Extensions
9755e95b16Srtm 
9855e95b16Srtm // Eflags register
9955e95b16Srtm #define FL_CF		0x00000001	// Carry Flag
10055e95b16Srtm #define FL_PF		0x00000004	// Parity Flag
10155e95b16Srtm #define FL_AF		0x00000010	// Auxiliary carry Flag
10255e95b16Srtm #define FL_ZF		0x00000040	// Zero Flag
10355e95b16Srtm #define FL_SF		0x00000080	// Sign Flag
10455e95b16Srtm #define FL_TF		0x00000100	// Trap Flag
10555e95b16Srtm #define FL_IF		0x00000200	// Interrupt Flag
10655e95b16Srtm #define FL_DF		0x00000400	// Direction Flag
10755e95b16Srtm #define FL_OF		0x00000800	// Overflow Flag
10855e95b16Srtm #define FL_IOPL_MASK	0x00003000	// I/O Privilege Level bitmask
10955e95b16Srtm #define FL_IOPL_0	0x00000000	//   IOPL == 0
11055e95b16Srtm #define FL_IOPL_1	0x00001000	//   IOPL == 1
11155e95b16Srtm #define FL_IOPL_2	0x00002000	//   IOPL == 2
11255e95b16Srtm #define FL_IOPL_3	0x00003000	//   IOPL == 3
11355e95b16Srtm #define FL_NT		0x00004000	// Nested Task
11455e95b16Srtm #define FL_RF		0x00010000	// Resume Flag
11555e95b16Srtm #define FL_VM		0x00020000	// Virtual 8086 mode
11655e95b16Srtm #define FL_AC		0x00040000	// Alignment Check
11755e95b16Srtm #define FL_VIF		0x00080000	// Virtual Interrupt Flag
11855e95b16Srtm #define FL_VIP		0x00100000	// Virtual Interrupt Pending
11955e95b16Srtm #define FL_ID		0x00200000	// ID flag
12055e95b16Srtm 
12155e95b16Srtm // Page fault error codes
12255e95b16Srtm #define FEC_PR		0x1	// Page fault caused by protection violation
12355e95b16Srtm #define FEC_WR		0x2	// Page fault caused by a write
12455e95b16Srtm #define FEC_U		0x4	// Page fault occured while in user mode
12555e95b16Srtm 
12655e95b16Srtm 
12755e95b16Srtm /*
12855e95b16Srtm  *
12955e95b16Srtm  *	Part 2.  Segmentation data structures and constants.
13055e95b16Srtm  *
13155e95b16Srtm  */
13255e95b16Srtm 
13355e95b16Srtm #ifdef __ASSEMBLER__
13455e95b16Srtm 
13555e95b16Srtm /*
13655e95b16Srtm  * Macros to build GDT entries in assembly.
13755e95b16Srtm  */
13855e95b16Srtm #define SEG_NULL						\
13955e95b16Srtm 	.word 0, 0;						\
14055e95b16Srtm 	.byte 0, 0, 0, 0
14155e95b16Srtm #define SEG(type,base,lim)					\
14255e95b16Srtm 	.word (((lim) >> 12) & 0xffff), ((base) & 0xffff);	\
14355e95b16Srtm 	.byte (((base) >> 16) & 0xff), (0x90 | (type)),		\
14455e95b16Srtm 		(0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff)
14555e95b16Srtm 
14655e95b16Srtm #else	// not __ASSEMBLER__
14755e95b16Srtm 
14855e95b16Srtm // Segment Descriptors
14955e95b16Srtm struct Segdesc {
150*ef2bd07aSrsc 	unsigned lim_15_0 : 16;  // Low bits of segment limit
151*ef2bd07aSrsc 	unsigned base_15_0 : 16; // Low bits of segment base address
152*ef2bd07aSrsc 	unsigned base_23_16 : 8; // Middle bits of segment base address
153*ef2bd07aSrsc 	unsigned type : 4;       // Segment type (see STS_ constants)
154*ef2bd07aSrsc 	unsigned s : 1;          // 0 = system, 1 = application
155*ef2bd07aSrsc 	unsigned dpl : 2;        // Descriptor Privilege Level
156*ef2bd07aSrsc 	unsigned p : 1;          // Present
157*ef2bd07aSrsc 	unsigned lim_19_16 : 4;  // High bits of segment limit
158*ef2bd07aSrsc 	unsigned avl : 1;        // Unused (available for software use)
159*ef2bd07aSrsc 	unsigned rsv1 : 1;       // Reserved
160*ef2bd07aSrsc 	unsigned db : 1;         // 0 = 16-bit segment, 1 = 32-bit segment
161*ef2bd07aSrsc 	unsigned g : 1;          // Granularity: limit scaled by 4K when set
162*ef2bd07aSrsc 	unsigned base_31_24 : 8; // High bits of segment base address
16355e95b16Srtm };
16455e95b16Srtm // Null segment
16555e95b16Srtm #define SEG_NULL	(struct Segdesc){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
16655e95b16Srtm // Segment that is loadable but faults when used
16755e95b16Srtm #define SEG_FAULT	(struct Segdesc){ 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0 }
16855e95b16Srtm // Normal segment
16955e95b16Srtm #define SEG(type, base, lim, dpl) (struct Segdesc)			\
17055e95b16Srtm { ((lim) >> 12) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff,	\
17155e95b16Srtm     type, 1, dpl, 1, (unsigned) (lim) >> 28, 0, 0, 1, 1,		\
17255e95b16Srtm     (unsigned) (base) >> 24 }
17355e95b16Srtm #define SEG16(type, base, lim, dpl) (struct Segdesc)			\
17455e95b16Srtm { (lim) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff,		\
17555e95b16Srtm     type, 1, dpl, 1, (unsigned) (lim) >> 16, 0, 0, 1, 0,		\
17655e95b16Srtm     (unsigned) (base) >> 24 }
17755e95b16Srtm 
17855e95b16Srtm #endif /* !__ASSEMBLER__ */
17955e95b16Srtm 
18055e95b16Srtm // Application segment type bits
18155e95b16Srtm #define STA_X		0x8	    // Executable segment
18255e95b16Srtm #define STA_E		0x4	    // Expand down (non-executable segments)
18355e95b16Srtm #define STA_C		0x4	    // Conforming code segment (executable only)
18455e95b16Srtm #define STA_W		0x2	    // Writeable (non-executable segments)
18555e95b16Srtm #define STA_R		0x2	    // Readable (executable segments)
18655e95b16Srtm #define STA_A		0x1	    // Accessed
18755e95b16Srtm 
18855e95b16Srtm // System segment type bits
18955e95b16Srtm #define STS_T16A	0x1	    // Available 16-bit TSS
19055e95b16Srtm #define STS_LDT		0x2	    // Local Descriptor Table
19155e95b16Srtm #define STS_T16B	0x3	    // Busy 16-bit TSS
19255e95b16Srtm #define STS_CG16	0x4	    // 16-bit Call Gate
19355e95b16Srtm #define STS_TG		0x5	    // Task Gate / Coum Transmitions
19455e95b16Srtm #define STS_IG16	0x6	    // 16-bit Interrupt Gate
19555e95b16Srtm #define STS_TG16	0x7	    // 16-bit Trap Gate
19655e95b16Srtm #define STS_T32A	0x9	    // Available 32-bit TSS
19755e95b16Srtm #define STS_T32B	0xB	    // Busy 32-bit TSS
19855e95b16Srtm #define STS_CG32	0xC	    // 32-bit Call Gate
19955e95b16Srtm #define STS_IG32	0xE	    // 32-bit Interrupt Gate
20055e95b16Srtm #define STS_TG32	0xF	    // 32-bit Trap Gate
20155e95b16Srtm 
20255e95b16Srtm 
20355e95b16Srtm /*
20455e95b16Srtm  *
20555e95b16Srtm  *	Part 3.  Traps.
20655e95b16Srtm  *
20755e95b16Srtm  */
20855e95b16Srtm 
20955e95b16Srtm #ifndef __ASSEMBLER__
21055e95b16Srtm 
21155e95b16Srtm // Task state segment format (as described by the Pentium architecture book)
21255e95b16Srtm struct Taskstate {
213*ef2bd07aSrsc 	uint32_t link;	// Old ts selector
214*ef2bd07aSrsc 	uintptr_t esp0;	// Stack pointers and segment selectors
215*ef2bd07aSrsc 	uint16_t ss0;	//   after an increase in privilege level
216*ef2bd07aSrsc 	uint16_t padding1;
217*ef2bd07aSrsc 	uintptr_t esp1;
218*ef2bd07aSrsc 	uint16_t ss1;
219*ef2bd07aSrsc 	uint16_t padding2;
220*ef2bd07aSrsc 	uintptr_t esp2;
221*ef2bd07aSrsc 	uint16_t ss2;
222*ef2bd07aSrsc 	uint16_t padding3;
223*ef2bd07aSrsc 	physaddr_t cr3;	// Page directory base
224*ef2bd07aSrsc 	uintptr_t eip;	// Saved state from last task switch
225*ef2bd07aSrsc 	uint32_t eflags;
226*ef2bd07aSrsc 	uint32_t eax;	// More saved state (registers)
227*ef2bd07aSrsc 	uint32_t ecx;
228*ef2bd07aSrsc 	uint32_t edx;
229*ef2bd07aSrsc 	uint32_t ebx;
230*ef2bd07aSrsc 	uintptr_t esp;
231*ef2bd07aSrsc 	uintptr_t ebp;
232*ef2bd07aSrsc 	uint32_t esi;
233*ef2bd07aSrsc 	uint32_t edi;
234*ef2bd07aSrsc 	uint16_t es;		// Even more saved state (segment selectors)
235*ef2bd07aSrsc 	uint16_t padding4;
236*ef2bd07aSrsc 	uint16_t cs;
237*ef2bd07aSrsc 	uint16_t padding5;
238*ef2bd07aSrsc 	uint16_t ss;
239*ef2bd07aSrsc 	uint16_t padding6;
240*ef2bd07aSrsc 	uint16_t ds;
241*ef2bd07aSrsc 	uint16_t padding7;
242*ef2bd07aSrsc 	uint16_t fs;
243*ef2bd07aSrsc 	uint16_t padding8;
244*ef2bd07aSrsc 	uint16_t gs;
245*ef2bd07aSrsc 	uint16_t padding9;
246*ef2bd07aSrsc 	uint16_t ldt;
247*ef2bd07aSrsc 	uint16_t padding10;
248*ef2bd07aSrsc 	uint16_t t;		// Trap on task switch
249*ef2bd07aSrsc 	uint16_t iomb;	// I/O map base address
25055e95b16Srtm };
25155e95b16Srtm 
25255e95b16Srtm // Gate descriptors for interrupts and traps
25355e95b16Srtm struct Gatedesc {
254*ef2bd07aSrsc 	unsigned off_15_0 : 16;   // low 16 bits of offset in segment
255*ef2bd07aSrsc 	unsigned ss : 16;         // segment selector
256*ef2bd07aSrsc 	unsigned args : 5;        // # args, 0 for interrupt/trap gates
257*ef2bd07aSrsc 	unsigned rsv1 : 3;        // reserved(should be zero I guess)
258*ef2bd07aSrsc 	unsigned type : 4;        // type(STS_{TG,IG32,TG32})
259*ef2bd07aSrsc 	unsigned s : 1;           // must be 0 (system)
260*ef2bd07aSrsc 	unsigned dpl : 2;         // descriptor(meaning new) privilege level
261*ef2bd07aSrsc 	unsigned p : 1;           // Present
262*ef2bd07aSrsc 	unsigned off_31_16 : 16;  // high bits of offset in segment
26355e95b16Srtm };
26455e95b16Srtm 
26555e95b16Srtm // Set up a normal interrupt/trap gate descriptor.
26655e95b16Srtm // - istrap: 1 for a trap (= exception) gate, 0 for an interrupt gate.
26755e95b16Srtm // - sel: Code segment selector for interrupt/trap handler
26855e95b16Srtm // - off: Offset in code segment for interrupt/trap handler
26955e95b16Srtm // - dpl: Descriptor Privilege Level -
27055e95b16Srtm //	  the privilege level required for software to invoke
27155e95b16Srtm //	  this interrupt/trap gate explicitly using an int instruction.
272*ef2bd07aSrsc #define SETGATE(gate, istrap, sel, off, d)			\
27355e95b16Srtm {								\
274*ef2bd07aSrsc 	(gate).off_15_0 = (uint32_t) (off) & 0xffff;		\
275*ef2bd07aSrsc 	(gate).ss = (sel);					\
276*ef2bd07aSrsc 	(gate).args = 0;					\
277*ef2bd07aSrsc 	(gate).rsv1 = 0;					\
278*ef2bd07aSrsc 	(gate).type = (istrap) ? STS_TG32 : STS_IG32;	\
279*ef2bd07aSrsc 	(gate).s = 0;					\
280*ef2bd07aSrsc 	(gate).dpl = (d);					\
281*ef2bd07aSrsc 	(gate).p = 1;					\
282*ef2bd07aSrsc 	(gate).off_31_16 = (uint32_t) (off) >> 16;		\
28355e95b16Srtm }
28455e95b16Srtm 
28555e95b16Srtm // Set up a call gate descriptor.
286*ef2bd07aSrsc #define SETCALLGATE(gate, ss, off, d)           	        \
28755e95b16Srtm {								\
288*ef2bd07aSrsc 	(gate).off_15_0 = (uint32_t) (off) & 0xffff;		\
289*ef2bd07aSrsc 	(gate).ss = (ss);					\
290*ef2bd07aSrsc 	(gate).args = 0;					\
291*ef2bd07aSrsc 	(gate).rsv1 = 0;					\
292*ef2bd07aSrsc 	(gate).type = STS_CG32;				\
293*ef2bd07aSrsc 	(gate).s = 0;					\
294*ef2bd07aSrsc 	(gate).dpl = (d);					\
295*ef2bd07aSrsc 	(gate).p = 1;					\
296*ef2bd07aSrsc 	(gate).off_31_16 = (uint32_t) (off) >> 16;		\
29755e95b16Srtm }
29855e95b16Srtm 
29955e95b16Srtm // Pseudo-descriptors used for LGDT, LLDT and LIDT instructions.
30055e95b16Srtm struct Pseudodesc {
301*ef2bd07aSrsc 	uint16_t _garbage;         // LGDT supposed to be from address 4N+2
302*ef2bd07aSrsc 	uint16_t lim;              // Limit
303*ef2bd07aSrsc 	uint32_t base __attribute__ ((packed));       // Base address
30455e95b16Srtm };
30555e95b16Srtm #define PD_ADDR(desc)	(&(desc).pd_lim)
30655e95b16Srtm 
30755e95b16Srtm #endif /* !__ASSEMBLER__ */
30855e95b16Srtm 
309