xref: /xv6-public/vm.c (revision 6f232758)
1 #include "param.h"
2 #include "types.h"
3 #include "defs.h"
4 #include "x86.h"
5 #include "memlayout.h"
6 #include "mmu.h"
7 #include "proc.h"
8 #include "elf.h"
9 
10 extern char data[];  // defined in data.S
11 pde_t *kpgdir;  // for use in scheduler()
12 struct segdesc gdt[NSEGS];
13 
14 // Set up CPU's kernel segment descriptors.
15 // Run once on entry on each CPU.
16 void
17 seginit(void)
18 {
19   struct cpu *c;
20 
21   // Map "logical" addresses to virtual addresses using identity map.
22   // Cannot share a CODE descriptor for both kernel and user
23   // because it would have to have DPL_USR, but the CPU forbids
24   // an interrupt from CPL=0 to DPL=3.
25   c = &cpus[cpunum()];
26   c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0);
27   c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0);
28   c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER);
29   c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER);
30 
31   // Map cpu, and curproc
32   c->gdt[SEG_KCPU] = SEG(STA_W, &c->cpu, 8, 0);
33 
34   lgdt(c->gdt, sizeof(c->gdt));
35   loadgs(SEG_KCPU << 3);
36 
37   // Initialize cpu-local storage.
38   cpu = c;
39   proc = 0;
40 }
41 
42 // Return the address of the PTE in page table pgdir
43 // that corresponds to virtual address va.  If alloc!=0,
44 // create any required page table pages.
45 static pte_t *
46 walkpgdir(pde_t *pgdir, const void *va, char* (*alloc)(void))
47 {
48   pde_t *pde;
49   pte_t *pgtab;
50 
51   pde = &pgdir[PDX(va)];
52   if(*pde & PTE_P){
53     pgtab = (pte_t*)p2v(PTE_ADDR(*pde));
54   } else {
55     if(!alloc || (pgtab = (pte_t*)alloc()) == 0)
56       return 0;
57     // Make sure all those PTE_P bits are zero.
58     memset(pgtab, 0, PGSIZE);
59     // The permissions here are overly generous, but they can
60     // be further restricted by the permissions in the page table
61     // entries, if necessary.
62     *pde = v2p(pgtab) | PTE_P | PTE_W | PTE_U;
63   }
64   return &pgtab[PTX(va)];
65 }
66 
67 // Create PTEs for virtual addresses starting at va that refer to
68 // physical addresses starting at pa. va and size might not
69 // be page-aligned.
70 static int
71 mappages(pde_t *pgdir, void *va, uint size, uint pa, int perm, char* (*alloc)(void))
72 {
73   char *a, *last;
74   pte_t *pte;
75 
76   a = (char *) PGROUNDDOWN((uint) va);
77   last = (char *) PGROUNDDOWN(((uint) va) + size - 1);
78   for(;;){
79     pte = walkpgdir(pgdir, a, alloc);
80     if(pte == 0)
81       return -1;
82     if(*pte & PTE_P)
83       panic("remap");
84     *pte = pa | perm | PTE_P;
85     if(a == last)
86       break;
87     a += PGSIZE;
88     pa += PGSIZE;
89   }
90   return 0;
91 }
92 
93 // The mappings from logical to virtual are one to one (i.e.,
94 // segmentation doesn't do anything).
95 // There is one page table per process, plus one that's used
96 // when a CPU is not running any process (kpgdir).
97 // A user process uses the same page table as the kernel; the
98 // page protection bits prevent it from using anything other
99 // than its memory.
100 //
101 // setupkvm() and exec() set up every page table like this:
102 //   0..KERNBASE      : user memory (text, data, stack, heap), mapped to some unused phys mem
103 //   KERNBASE..KERNBASE+EXTMEM: mapped to 0..EXTMEM  (below extended memory)
104 //   KERNBASE+EXTMEM..KERNBASE+end : mapped to EXTMEM..end (mapped without write permission)
105 //   KERNBASE+end..KERBASE+PHYSTOP     : mapped to end..PHYSTOP (rw data + free memory)
106 //   0xfe000000..0    : mapped direct (devices such as ioapic)
107 //
108 // The kernel allocates memory for its heap and for user memory
109 // between kernend and the end of physical memory (PHYSTOP).
110 // The virtual address space of each user program includes the kernel
111 // (which is inaccessible in user mode).  The user program sits in
112 // the bottom of the address space, and the kernel at the top at KERNBASE.
113 static struct kmap {
114   void *virt;
115   uint phys_start;
116   uint phys_end;
117   int perm;
118 } kmap[] = {
119   { P2V(0), 0, 1024*1024, PTE_W},  // First 1Mbyte contains BIOS and some IO devices
120   { (void *)KERNLINK, V2P(KERNLINK), V2P(data),  0},  // kernel text, rodata
121   { data, V2P(data), PHYSTOP,  PTE_W},  // kernel data, memory
122   { (void*)DEVSPACE, DEVSPACE, 0, PTE_W},  // more devices
123 };
124 
125 // Set up kernel part of a page table.
126 pde_t*
127 setupkvm(char* (*alloc)(void))
128 {
129   pde_t *pgdir;
130   struct kmap *k;
131 
132   if((pgdir = (pde_t*)alloc()) == 0)
133     return 0;
134   memset(pgdir, 0, PGSIZE);
135   k = kmap;
136   if (p2v(PHYSTOP) > (void *) DEVSPACE)
137     panic("PHYSTOP too high");
138   for(k = kmap; k < &kmap[NELEM(kmap)]; k++)
139     if(mappages(pgdir, k->virt, k->phys_end - k->phys_start, (uint)k->phys_start,
140 		k->perm, alloc) < 0)
141       return 0;
142 
143   return pgdir;
144 }
145 
146 // Allocate one page table for the machine for the kernel address
147 // space for scheduler processes.
148 void
149 kvmalloc(void)
150 {
151   kpgdir = setupkvm(enter_alloc);
152   switchkvm();
153 }
154 
155 // Switch h/w page table register to the kernel-only page table,
156 // for when no process is running.
157 void
158 switchkvm(void)
159 {
160   lcr3(v2p(kpgdir));   // switch to the kernel page table
161 }
162 
163 // Switch TSS and h/w page table to correspond to process p.
164 void
165 switchuvm(struct proc *p)
166 {
167   pushcli();
168   cpu->gdt[SEG_TSS] = SEG16(STS_T32A, &cpu->ts, sizeof(cpu->ts)-1, 0);
169   cpu->gdt[SEG_TSS].s = 0;
170   cpu->ts.ss0 = SEG_KDATA << 3;
171   cpu->ts.esp0 = (uint)proc->kstack + KSTACKSIZE;
172   ltr(SEG_TSS << 3);
173   if(p->pgdir == 0)
174     panic("switchuvm: no pgdir");
175   lcr3(v2p(p->pgdir));  // switch to new address space
176   popcli();
177 }
178 
179 // Load the initcode into address 0 of pgdir.
180 // sz must be less than a page.
181 void
182 inituvm(pde_t *pgdir, char *init, uint sz)
183 {
184   char *mem;
185 
186   if(sz >= PGSIZE)
187     panic("inituvm: more than a page");
188   mem = kalloc();
189   memset(mem, 0, PGSIZE);
190   mappages(pgdir, 0, PGSIZE, v2p(mem), PTE_W|PTE_U, kalloc);
191   memmove(mem, init, sz);
192 }
193 
194 // Load a program segment into pgdir.  addr must be page-aligned
195 // and the pages from addr to addr+sz must already be mapped.
196 int
197 loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz)
198 {
199   uint i, pa, n;
200   pte_t *pte;
201 
202   if((uint)addr % PGSIZE != 0)
203     panic("loaduvm: addr must be page aligned");
204   for(i = 0; i < sz; i += PGSIZE){
205     if((pte = walkpgdir(pgdir, addr+i, 0)) == 0)
206       panic("loaduvm: address should exist");
207     pa = PTE_ADDR(*pte);
208     if(sz - i < PGSIZE)
209       n = sz - i;
210     else
211       n = PGSIZE;
212     if(readi(ip, p2v(pa), offset+i, n) != n)
213       return -1;
214   }
215   return 0;
216 }
217 
218 // Allocate page tables and physical memory to grow process from oldsz to
219 // newsz, which need not be page aligned.  Returns new size or 0 on error.
220 int
221 allocuvm(pde_t *pgdir, uint oldsz, uint newsz)
222 {
223   char *mem;
224   uint a;
225 
226   if(newsz >= KERNBASE)
227     return 0;
228   if(newsz < oldsz)
229     return oldsz;
230 
231   a = PGROUNDUP(oldsz);
232   for(; a < newsz; a += PGSIZE){
233     mem = kalloc();
234     if(mem == 0){
235       cprintf("allocuvm out of memory\n");
236       deallocuvm(pgdir, newsz, oldsz);
237       return 0;
238     }
239     memset(mem, 0, PGSIZE);
240     mappages(pgdir, (char*)a, PGSIZE, v2p(mem), PTE_W|PTE_U, kalloc);
241   }
242   return newsz;
243 }
244 
245 // Deallocate user pages to bring the process size from oldsz to
246 // newsz.  oldsz and newsz need not be page-aligned, nor does newsz
247 // need to be less than oldsz.  oldsz can be larger than the actual
248 // process size.  Returns the new process size.
249 int
250 deallocuvm(pde_t *pgdir, uint oldsz, uint newsz)
251 {
252   pte_t *pte;
253   uint a, pa;
254 
255   if(newsz >= oldsz)
256     return oldsz;
257 
258   a = PGROUNDUP(newsz);
259   for(; a  < oldsz; a += PGSIZE){
260     pte = walkpgdir(pgdir, (char*)a, 0);
261     if(pte && (*pte & PTE_P) != 0){
262       pa = PTE_ADDR(*pte);
263       if(pa == 0)
264         panic("kfree");
265       char *v = p2v(pa);
266       kfree(v);
267       *pte = 0;
268     }
269   }
270   return newsz;
271 }
272 
273 // Free a page table and all the physical memory pages
274 // in the user part.
275 void
276 freevm(pde_t *pgdir)
277 {
278   uint i;
279 
280   if(pgdir == 0)
281     panic("freevm: no pgdir");
282   deallocuvm(pgdir, KERNBASE, 0);
283   for(i = 0; i < NPDENTRIES; i++){
284     if(pgdir[i] & PTE_P) {
285       char * v = p2v(PTE_ADDR(pgdir[i]));
286       kfree(v);
287     }
288   }
289   kfree((char*)pgdir);
290 }
291 
292 // Given a parent process's page table, create a copy
293 // of it for a child.
294 pde_t*
295 copyuvm(pde_t *pgdir, uint sz)
296 {
297   pde_t *d;
298   pte_t *pte;
299   uint pa, i;
300   char *mem;
301 
302   if((d = setupkvm(kalloc)) == 0)
303     return 0;
304   for(i = 0; i < sz; i += PGSIZE){
305     if((pte = walkpgdir(pgdir, (void*)i, 0)) == 0)
306       panic("copyuvm: pte should exist");
307     if(!(*pte & PTE_P))
308       panic("copyuvm: page not present");
309     pa = PTE_ADDR(*pte);
310     if((mem = kalloc()) == 0)
311       goto bad;
312     memmove(mem, (char*)p2v(pa), PGSIZE);
313     if(mappages(d, (void*)i, PGSIZE, v2p(mem), PTE_W|PTE_U, kalloc) < 0)
314       goto bad;
315   }
316   return d;
317 
318 bad:
319   freevm(d);
320   return 0;
321 }
322 
323 //PAGEBREAK!
324 // Map user virtual address to kernel address.
325 char*
326 uva2ka(pde_t *pgdir, char *uva)
327 {
328   pte_t *pte;
329 
330   pte = walkpgdir(pgdir, uva, 0);
331   if((*pte & PTE_P) == 0)
332     return 0;
333   if((*pte & PTE_U) == 0)
334     return 0;
335   return (char*)p2v(PTE_ADDR(*pte));
336 }
337 
338 // Copy len bytes from p to user address va in page table pgdir.
339 // Most useful when pgdir is not the current page table.
340 // uva2ka ensures this only works for PTE_U pages.
341 int
342 copyout(pde_t *pgdir, uint va, void *p, uint len)
343 {
344   char *buf, *pa0;
345   uint n, va0;
346 
347   buf = (char*)p;
348   while(len > 0){
349     va0 = (uint)PGROUNDDOWN(va);
350     pa0 = uva2ka(pgdir, (char*)va0);
351     if(pa0 == 0)
352       return -1;
353     n = PGSIZE - (va - va0);
354     if(n > len)
355       n = len;
356     memmove(pa0 + (va - va0), buf, n);
357     len -= n;
358     buf += n;
359     va = va0 + PGSIZE;
360   }
361   return 0;
362 }
363