xref: /xv6-public/mmu.h (revision 55e95b16)
1*55e95b16Srtm /*
2*55e95b16Srtm  * This file contains definitions for the x86 memory management unit (MMU),
3*55e95b16Srtm  * including paging- and segmentation-related data structures and constants,
4*55e95b16Srtm  * the %cr0, %cr4, and %eflags registers, and traps.
5*55e95b16Srtm  */
6*55e95b16Srtm 
7*55e95b16Srtm /*
8*55e95b16Srtm  *
9*55e95b16Srtm  *	Part 1.  Paging data structures and constants.
10*55e95b16Srtm  *
11*55e95b16Srtm  */
12*55e95b16Srtm 
13*55e95b16Srtm // A linear address 'la' has a three-part structure as follows:
14*55e95b16Srtm //
15*55e95b16Srtm // +--------10------+-------10-------+---------12----------+
16*55e95b16Srtm // | Page Directory |   Page Table   | Offset within Page  |
17*55e95b16Srtm // |      Index     |      Index     |                     |
18*55e95b16Srtm // +----------------+----------------+---------------------+
19*55e95b16Srtm //  \--- PDX(la) --/ \--- PTX(la) --/ \---- PGOFF(la) ----/
20*55e95b16Srtm //  \----------- PPN(la) -----------/
21*55e95b16Srtm //
22*55e95b16Srtm // The PDX, PTX, PGOFF, and PPN macros decompose linear addresses as shown.
23*55e95b16Srtm // To construct a linear address la from PDX(la), PTX(la), and PGOFF(la),
24*55e95b16Srtm // use PGADDR(PDX(la), PTX(la), PGOFF(la)).
25*55e95b16Srtm 
26*55e95b16Srtm // page number field of address
27*55e95b16Srtm #define PPN(la)		(((uintptr_t) (la)) >> PTXSHIFT)
28*55e95b16Srtm #define VPN(la)		PPN(la)		// used to index into vpt[]
29*55e95b16Srtm 
30*55e95b16Srtm // page directory index
31*55e95b16Srtm #define PDX(la)		((((uintptr_t) (la)) >> PDXSHIFT) & 0x3FF)
32*55e95b16Srtm #define VPD(la)		PDX(la)		// used to index into vpd[]
33*55e95b16Srtm 
34*55e95b16Srtm // page table index
35*55e95b16Srtm #define PTX(la)		((((uintptr_t) (la)) >> PTXSHIFT) & 0x3FF)
36*55e95b16Srtm 
37*55e95b16Srtm // offset in page
38*55e95b16Srtm #define PGOFF(la)	(((uintptr_t) (la)) & 0xFFF)
39*55e95b16Srtm 
40*55e95b16Srtm // construct linear address from indexes and offset
41*55e95b16Srtm #define PGADDR(d, t, o)	((void*) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o)))
42*55e95b16Srtm 
43*55e95b16Srtm // Page directory and page table constants.
44*55e95b16Srtm #define NPDENTRIES	1024		// page directory entries per page directory
45*55e95b16Srtm #define NPTENTRIES	1024		// page table entries per page table
46*55e95b16Srtm 
47*55e95b16Srtm #define PGSIZE		4096		// bytes mapped by a page
48*55e95b16Srtm #define PGSHIFT		12		// log2(PGSIZE)
49*55e95b16Srtm 
50*55e95b16Srtm #define PTSIZE		(PGSIZE*NPTENTRIES) // bytes mapped by a page directory entry
51*55e95b16Srtm #define PTSHIFT		22		// log2(PTSIZE)
52*55e95b16Srtm 
53*55e95b16Srtm #define PTXSHIFT	12		// offset of PTX in a linear address
54*55e95b16Srtm #define PDXSHIFT	22		// offset of PDX in a linear address
55*55e95b16Srtm 
56*55e95b16Srtm // Page table/directory entry flags.
57*55e95b16Srtm #define PTE_P		0x001	// Present
58*55e95b16Srtm #define PTE_W		0x002	// Writeable
59*55e95b16Srtm #define PTE_U		0x004	// User
60*55e95b16Srtm #define PTE_PWT		0x008	// Write-Through
61*55e95b16Srtm #define PTE_PCD		0x010	// Cache-Disable
62*55e95b16Srtm #define PTE_A		0x020	// Accessed
63*55e95b16Srtm #define PTE_D		0x040	// Dirty
64*55e95b16Srtm #define PTE_PS		0x080	// Page Size
65*55e95b16Srtm #define PTE_MBZ		0x180	// Bits must be zero
66*55e95b16Srtm 
67*55e95b16Srtm // The PTE_AVAIL bits aren't used by the kernel or interpreted by the
68*55e95b16Srtm // hardware, so user processes are allowed to set them arbitrarily.
69*55e95b16Srtm #define PTE_AVAIL	0xE00	// Available for software use
70*55e95b16Srtm 
71*55e95b16Srtm // Only flags in PTE_USER may be used in system calls.
72*55e95b16Srtm #define PTE_USER	(PTE_AVAIL | PTE_P | PTE_W | PTE_U)
73*55e95b16Srtm 
74*55e95b16Srtm // address in page table entry
75*55e95b16Srtm #define PTE_ADDR(pte)	((physaddr_t) (pte) & ~0xFFF)
76*55e95b16Srtm 
77*55e95b16Srtm // Control Register flags
78*55e95b16Srtm #define CR0_PE		0x00000001	// Protection Enable
79*55e95b16Srtm #define CR0_MP		0x00000002	// Monitor coProcessor
80*55e95b16Srtm #define CR0_EM		0x00000004	// Emulation
81*55e95b16Srtm #define CR0_TS		0x00000008	// Task Switched
82*55e95b16Srtm #define CR0_ET		0x00000010	// Extension Type
83*55e95b16Srtm #define CR0_NE		0x00000020	// Numeric Errror
84*55e95b16Srtm #define CR0_WP		0x00010000	// Write Protect
85*55e95b16Srtm #define CR0_AM		0x00040000	// Alignment Mask
86*55e95b16Srtm #define CR0_NW		0x20000000	// Not Writethrough
87*55e95b16Srtm #define CR0_CD		0x40000000	// Cache Disable
88*55e95b16Srtm #define CR0_PG		0x80000000	// Paging
89*55e95b16Srtm 
90*55e95b16Srtm #define CR4_PCE		0x00000100	// Performance counter enable
91*55e95b16Srtm #define CR4_MCE		0x00000040	// Machine Check Enable
92*55e95b16Srtm #define CR4_PSE		0x00000010	// Page Size Extensions
93*55e95b16Srtm #define CR4_DE		0x00000008	// Debugging Extensions
94*55e95b16Srtm #define CR4_TSD		0x00000004	// Time Stamp Disable
95*55e95b16Srtm #define CR4_PVI		0x00000002	// Protected-Mode Virtual Interrupts
96*55e95b16Srtm #define CR4_VME		0x00000001	// V86 Mode Extensions
97*55e95b16Srtm 
98*55e95b16Srtm // Eflags register
99*55e95b16Srtm #define FL_CF		0x00000001	// Carry Flag
100*55e95b16Srtm #define FL_PF		0x00000004	// Parity Flag
101*55e95b16Srtm #define FL_AF		0x00000010	// Auxiliary carry Flag
102*55e95b16Srtm #define FL_ZF		0x00000040	// Zero Flag
103*55e95b16Srtm #define FL_SF		0x00000080	// Sign Flag
104*55e95b16Srtm #define FL_TF		0x00000100	// Trap Flag
105*55e95b16Srtm #define FL_IF		0x00000200	// Interrupt Flag
106*55e95b16Srtm #define FL_DF		0x00000400	// Direction Flag
107*55e95b16Srtm #define FL_OF		0x00000800	// Overflow Flag
108*55e95b16Srtm #define FL_IOPL_MASK	0x00003000	// I/O Privilege Level bitmask
109*55e95b16Srtm #define FL_IOPL_0	0x00000000	//   IOPL == 0
110*55e95b16Srtm #define FL_IOPL_1	0x00001000	//   IOPL == 1
111*55e95b16Srtm #define FL_IOPL_2	0x00002000	//   IOPL == 2
112*55e95b16Srtm #define FL_IOPL_3	0x00003000	//   IOPL == 3
113*55e95b16Srtm #define FL_NT		0x00004000	// Nested Task
114*55e95b16Srtm #define FL_RF		0x00010000	// Resume Flag
115*55e95b16Srtm #define FL_VM		0x00020000	// Virtual 8086 mode
116*55e95b16Srtm #define FL_AC		0x00040000	// Alignment Check
117*55e95b16Srtm #define FL_VIF		0x00080000	// Virtual Interrupt Flag
118*55e95b16Srtm #define FL_VIP		0x00100000	// Virtual Interrupt Pending
119*55e95b16Srtm #define FL_ID		0x00200000	// ID flag
120*55e95b16Srtm 
121*55e95b16Srtm // Page fault error codes
122*55e95b16Srtm #define FEC_PR		0x1	// Page fault caused by protection violation
123*55e95b16Srtm #define FEC_WR		0x2	// Page fault caused by a write
124*55e95b16Srtm #define FEC_U		0x4	// Page fault occured while in user mode
125*55e95b16Srtm 
126*55e95b16Srtm 
127*55e95b16Srtm /*
128*55e95b16Srtm  *
129*55e95b16Srtm  *	Part 2.  Segmentation data structures and constants.
130*55e95b16Srtm  *
131*55e95b16Srtm  */
132*55e95b16Srtm 
133*55e95b16Srtm #ifdef __ASSEMBLER__
134*55e95b16Srtm 
135*55e95b16Srtm /*
136*55e95b16Srtm  * Macros to build GDT entries in assembly.
137*55e95b16Srtm  */
138*55e95b16Srtm #define SEG_NULL						\
139*55e95b16Srtm 	.word 0, 0;						\
140*55e95b16Srtm 	.byte 0, 0, 0, 0
141*55e95b16Srtm #define SEG(type,base,lim)					\
142*55e95b16Srtm 	.word (((lim) >> 12) & 0xffff), ((base) & 0xffff);	\
143*55e95b16Srtm 	.byte (((base) >> 16) & 0xff), (0x90 | (type)),		\
144*55e95b16Srtm 		(0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff)
145*55e95b16Srtm 
146*55e95b16Srtm #else	// not __ASSEMBLER__
147*55e95b16Srtm 
148*55e95b16Srtm // Segment Descriptors
149*55e95b16Srtm struct Segdesc {
150*55e95b16Srtm 	unsigned sd_lim_15_0 : 16;  // Low bits of segment limit
151*55e95b16Srtm 	unsigned sd_base_15_0 : 16; // Low bits of segment base address
152*55e95b16Srtm 	unsigned sd_base_23_16 : 8; // Middle bits of segment base address
153*55e95b16Srtm 	unsigned sd_type : 4;       // Segment type (see STS_ constants)
154*55e95b16Srtm 	unsigned sd_s : 1;          // 0 = system, 1 = application
155*55e95b16Srtm 	unsigned sd_dpl : 2;        // Descriptor Privilege Level
156*55e95b16Srtm 	unsigned sd_p : 1;          // Present
157*55e95b16Srtm 	unsigned sd_lim_19_16 : 4;  // High bits of segment limit
158*55e95b16Srtm 	unsigned sd_avl : 1;        // Unused (available for software use)
159*55e95b16Srtm 	unsigned sd_rsv1 : 1;       // Reserved
160*55e95b16Srtm 	unsigned sd_db : 1;         // 0 = 16-bit segment, 1 = 32-bit segment
161*55e95b16Srtm 	unsigned sd_g : 1;          // Granularity: limit scaled by 4K when set
162*55e95b16Srtm 	unsigned sd_base_31_24 : 8; // High bits of segment base address
163*55e95b16Srtm };
164*55e95b16Srtm // Null segment
165*55e95b16Srtm #define SEG_NULL	(struct Segdesc){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
166*55e95b16Srtm // Segment that is loadable but faults when used
167*55e95b16Srtm #define SEG_FAULT	(struct Segdesc){ 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0 }
168*55e95b16Srtm // Normal segment
169*55e95b16Srtm #define SEG(type, base, lim, dpl) (struct Segdesc)			\
170*55e95b16Srtm { ((lim) >> 12) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff,	\
171*55e95b16Srtm     type, 1, dpl, 1, (unsigned) (lim) >> 28, 0, 0, 1, 1,		\
172*55e95b16Srtm     (unsigned) (base) >> 24 }
173*55e95b16Srtm #define SEG16(type, base, lim, dpl) (struct Segdesc)			\
174*55e95b16Srtm { (lim) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff,		\
175*55e95b16Srtm     type, 1, dpl, 1, (unsigned) (lim) >> 16, 0, 0, 1, 0,		\
176*55e95b16Srtm     (unsigned) (base) >> 24 }
177*55e95b16Srtm 
178*55e95b16Srtm #endif /* !__ASSEMBLER__ */
179*55e95b16Srtm 
180*55e95b16Srtm // Application segment type bits
181*55e95b16Srtm #define STA_X		0x8	    // Executable segment
182*55e95b16Srtm #define STA_E		0x4	    // Expand down (non-executable segments)
183*55e95b16Srtm #define STA_C		0x4	    // Conforming code segment (executable only)
184*55e95b16Srtm #define STA_W		0x2	    // Writeable (non-executable segments)
185*55e95b16Srtm #define STA_R		0x2	    // Readable (executable segments)
186*55e95b16Srtm #define STA_A		0x1	    // Accessed
187*55e95b16Srtm 
188*55e95b16Srtm // System segment type bits
189*55e95b16Srtm #define STS_T16A	0x1	    // Available 16-bit TSS
190*55e95b16Srtm #define STS_LDT		0x2	    // Local Descriptor Table
191*55e95b16Srtm #define STS_T16B	0x3	    // Busy 16-bit TSS
192*55e95b16Srtm #define STS_CG16	0x4	    // 16-bit Call Gate
193*55e95b16Srtm #define STS_TG		0x5	    // Task Gate / Coum Transmitions
194*55e95b16Srtm #define STS_IG16	0x6	    // 16-bit Interrupt Gate
195*55e95b16Srtm #define STS_TG16	0x7	    // 16-bit Trap Gate
196*55e95b16Srtm #define STS_T32A	0x9	    // Available 32-bit TSS
197*55e95b16Srtm #define STS_T32B	0xB	    // Busy 32-bit TSS
198*55e95b16Srtm #define STS_CG32	0xC	    // 32-bit Call Gate
199*55e95b16Srtm #define STS_IG32	0xE	    // 32-bit Interrupt Gate
200*55e95b16Srtm #define STS_TG32	0xF	    // 32-bit Trap Gate
201*55e95b16Srtm 
202*55e95b16Srtm 
203*55e95b16Srtm /*
204*55e95b16Srtm  *
205*55e95b16Srtm  *	Part 3.  Traps.
206*55e95b16Srtm  *
207*55e95b16Srtm  */
208*55e95b16Srtm 
209*55e95b16Srtm #ifndef __ASSEMBLER__
210*55e95b16Srtm 
211*55e95b16Srtm // Task state segment format (as described by the Pentium architecture book)
212*55e95b16Srtm struct Taskstate {
213*55e95b16Srtm 	uint32_t ts_link;	// Old ts selector
214*55e95b16Srtm 	uintptr_t ts_esp0;	// Stack pointers and segment selectors
215*55e95b16Srtm 	uint16_t ts_ss0;	//   after an increase in privilege level
216*55e95b16Srtm 	uint16_t ts_padding1;
217*55e95b16Srtm 	uintptr_t ts_esp1;
218*55e95b16Srtm 	uint16_t ts_ss1;
219*55e95b16Srtm 	uint16_t ts_padding2;
220*55e95b16Srtm 	uintptr_t ts_esp2;
221*55e95b16Srtm 	uint16_t ts_ss2;
222*55e95b16Srtm 	uint16_t ts_padding3;
223*55e95b16Srtm 	physaddr_t ts_cr3;	// Page directory base
224*55e95b16Srtm 	uintptr_t ts_eip;	// Saved state from last task switch
225*55e95b16Srtm 	uint32_t ts_eflags;
226*55e95b16Srtm 	uint32_t ts_eax;	// More saved state (registers)
227*55e95b16Srtm 	uint32_t ts_ecx;
228*55e95b16Srtm 	uint32_t ts_edx;
229*55e95b16Srtm 	uint32_t ts_ebx;
230*55e95b16Srtm 	uintptr_t ts_esp;
231*55e95b16Srtm 	uintptr_t ts_ebp;
232*55e95b16Srtm 	uint32_t ts_esi;
233*55e95b16Srtm 	uint32_t ts_edi;
234*55e95b16Srtm 	uint16_t ts_es;		// Even more saved state (segment selectors)
235*55e95b16Srtm 	uint16_t ts_padding4;
236*55e95b16Srtm 	uint16_t ts_cs;
237*55e95b16Srtm 	uint16_t ts_padding5;
238*55e95b16Srtm 	uint16_t ts_ss;
239*55e95b16Srtm 	uint16_t ts_padding6;
240*55e95b16Srtm 	uint16_t ts_ds;
241*55e95b16Srtm 	uint16_t ts_padding7;
242*55e95b16Srtm 	uint16_t ts_fs;
243*55e95b16Srtm 	uint16_t ts_padding8;
244*55e95b16Srtm 	uint16_t ts_gs;
245*55e95b16Srtm 	uint16_t ts_padding9;
246*55e95b16Srtm 	uint16_t ts_ldt;
247*55e95b16Srtm 	uint16_t ts_padding10;
248*55e95b16Srtm 	uint16_t ts_t;		// Trap on task switch
249*55e95b16Srtm 	uint16_t ts_iomb;	// I/O map base address
250*55e95b16Srtm };
251*55e95b16Srtm 
252*55e95b16Srtm // Gate descriptors for interrupts and traps
253*55e95b16Srtm struct Gatedesc {
254*55e95b16Srtm 	unsigned gd_off_15_0 : 16;   // low 16 bits of offset in segment
255*55e95b16Srtm 	unsigned gd_ss : 16;         // segment selector
256*55e95b16Srtm 	unsigned gd_args : 5;        // # args, 0 for interrupt/trap gates
257*55e95b16Srtm 	unsigned gd_rsv1 : 3;        // reserved(should be zero I guess)
258*55e95b16Srtm 	unsigned gd_type : 4;        // type(STS_{TG,IG32,TG32})
259*55e95b16Srtm 	unsigned gd_s : 1;           // must be 0 (system)
260*55e95b16Srtm 	unsigned gd_dpl : 2;         // descriptor(meaning new) privilege level
261*55e95b16Srtm 	unsigned gd_p : 1;           // Present
262*55e95b16Srtm 	unsigned gd_off_31_16 : 16;  // high bits of offset in segment
263*55e95b16Srtm };
264*55e95b16Srtm 
265*55e95b16Srtm // Set up a normal interrupt/trap gate descriptor.
266*55e95b16Srtm // - istrap: 1 for a trap (= exception) gate, 0 for an interrupt gate.
267*55e95b16Srtm // - sel: Code segment selector for interrupt/trap handler
268*55e95b16Srtm // - off: Offset in code segment for interrupt/trap handler
269*55e95b16Srtm // - dpl: Descriptor Privilege Level -
270*55e95b16Srtm //	  the privilege level required for software to invoke
271*55e95b16Srtm //	  this interrupt/trap gate explicitly using an int instruction.
272*55e95b16Srtm #define SETGATE(gate, istrap, sel, off, dpl)			\
273*55e95b16Srtm {								\
274*55e95b16Srtm 	(gate).gd_off_15_0 = (uint32_t) (off) & 0xffff;		\
275*55e95b16Srtm 	(gate).gd_ss = (sel);					\
276*55e95b16Srtm 	(gate).gd_args = 0;					\
277*55e95b16Srtm 	(gate).gd_rsv1 = 0;					\
278*55e95b16Srtm 	(gate).gd_type = (istrap) ? STS_TG32 : STS_IG32;	\
279*55e95b16Srtm 	(gate).gd_s = 0;					\
280*55e95b16Srtm 	(gate).gd_dpl = (dpl);					\
281*55e95b16Srtm 	(gate).gd_p = 1;					\
282*55e95b16Srtm 	(gate).gd_off_31_16 = (uint32_t) (off) >> 16;		\
283*55e95b16Srtm }
284*55e95b16Srtm 
285*55e95b16Srtm // Set up a call gate descriptor.
286*55e95b16Srtm #define SETCALLGATE(gate, ss, off, dpl)           	        \
287*55e95b16Srtm {								\
288*55e95b16Srtm 	(gate).gd_off_15_0 = (uint32_t) (off) & 0xffff;		\
289*55e95b16Srtm 	(gate).gd_ss = (ss);					\
290*55e95b16Srtm 	(gate).gd_args = 0;					\
291*55e95b16Srtm 	(gate).gd_rsv1 = 0;					\
292*55e95b16Srtm 	(gate).gd_type = STS_CG32;				\
293*55e95b16Srtm 	(gate).gd_s = 0;					\
294*55e95b16Srtm 	(gate).gd_dpl = (dpl);					\
295*55e95b16Srtm 	(gate).gd_p = 1;					\
296*55e95b16Srtm 	(gate).gd_off_31_16 = (uint32_t) (off) >> 16;		\
297*55e95b16Srtm }
298*55e95b16Srtm 
299*55e95b16Srtm // Pseudo-descriptors used for LGDT, LLDT and LIDT instructions.
300*55e95b16Srtm struct Pseudodesc {
301*55e95b16Srtm 	uint16_t pd__garbage;         // LGDT supposed to be from address 4N+2
302*55e95b16Srtm 	uint16_t pd_lim;              // Limit
303*55e95b16Srtm 	uint32_t pd_base __attribute__ ((packed));       // Base address
304*55e95b16Srtm };
305*55e95b16Srtm #define PD_ADDR(desc)	(&(desc).pd_lim)
306*55e95b16Srtm 
307*55e95b16Srtm #endif /* !__ASSEMBLER__ */
308*55e95b16Srtm 
309