xref: /minix/minix/servers/vm/pagefaults.c (revision 90b80121)
1 
2 #define _SYSTEM 1
3 
4 #include <minix/callnr.h>
5 #include <minix/com.h>
6 #include <minix/config.h>
7 #include <minix/const.h>
8 #include <minix/ds.h>
9 #include <minix/endpoint.h>
10 #include <minix/minlib.h>
11 #include <minix/type.h>
12 #include <minix/ipc.h>
13 #include <minix/sysutil.h>
14 #include <minix/syslib.h>
15 #include <minix/safecopies.h>
16 #include <minix/bitmap.h>
17 #include <minix/vfsif.h>
18 
19 #include <machine/vmparam.h>
20 
21 #include <errno.h>
22 #include <string.h>
23 #include <stdio.h>
24 #include <fcntl.h>
25 #include <signal.h>
26 #include <assert.h>
27 
28 #include "glo.h"
29 #include "proto.h"
30 #include "util.h"
31 #include "region.h"
32 
33 struct pf_state {
34         endpoint_t ep;
35         vir_bytes vaddr;
36 	u32_t err;
37 };
38 
39 struct hm_state {
40 	endpoint_t caller;	/* KERNEL or process? if NONE, no callback */
41 	endpoint_t requestor;	/* on behalf of whom? */
42 	int transid;		/* VFS transaction id if valid */
43 	struct vmproc *vmp;	/* target address space */
44 	vir_bytes mem, len;	/* memory range */
45 	int wrflag;		/* must it be writable or not */
46 	int valid;		/* sanity check */
47 	int vfs_avail;		/* may vfs be called to satisfy this range? */
48 #define VALID	0xc0ff1
49 };
50 
51 static void handle_memory_continue(struct vmproc *vmp, message *m,
52         void *arg, void *statearg);
53 static int handle_memory_step(struct hm_state *hmstate, int retry);
54 static void handle_memory_final(struct hm_state *state, int result);
55 
56 /*===========================================================================*
57  *				pf_errstr	     		     	*
58  *===========================================================================*/
59 char *pf_errstr(u32_t err)
60 {
61 	static char buf[100];
62 
63 	snprintf(buf, sizeof(buf), "err 0x%lx ", (long)err);
64 	if(PFERR_NOPAGE(err)) strcat(buf, "nopage ");
65 	if(PFERR_PROT(err)) strcat(buf, "protection ");
66 	if(PFERR_WRITE(err)) strcat(buf, "write");
67 	if(PFERR_READ(err)) strcat(buf, "read");
68 
69 	return buf;
70 }
71 
72 static void pf_cont(struct vmproc *vmp, message *m, void *arg, void *statearg);
73 
74 static void handle_memory_continue(struct vmproc *vmp, message *m, void *arg, void *statearg);
75 
76 static void handle_pagefault(endpoint_t ep, vir_bytes addr, u32_t err, int retry)
77 {
78 	struct vmproc *vmp;
79 	int s, result;
80 	struct vir_region *region;
81 	vir_bytes offset;
82 	int p, wr = PFERR_WRITE(err);
83 	int io = 0;
84 
85 	if(vm_isokendpt(ep, &p) != OK)
86 		panic("handle_pagefault: endpoint wrong: %d", ep);
87 
88 	vmp = &vmproc[p];
89 	assert(vmp->vm_flags & VMF_INUSE);
90 
91 	/* See if address is valid at all. */
92 	if(!(region = map_lookup(vmp, addr, NULL))) {
93 		if(PFERR_PROT(err))  {
94 			printf("VM: pagefault: SIGSEGV %d protected addr 0x%lx; %s\n",
95 				ep, addr, pf_errstr(err));
96 		} else {
97 			assert(PFERR_NOPAGE(err));
98 			printf("VM: pagefault: SIGSEGV %d bad addr 0x%lx; %s\n",
99 					ep, addr, pf_errstr(err));
100 			sys_diagctl_stacktrace(ep);
101 		}
102 		if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK)
103 			panic("sys_kill failed: %d", s);
104 		if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, 0 /*unused*/)) != OK)
105 			panic("do_pagefaults: sys_vmctl failed: %d", ep);
106 		return;
107 	}
108 
109 	/* If process was writing, see if it's writable. */
110 	if(!(region->flags & VR_WRITABLE) && wr) {
111 		printf("VM: pagefault: SIGSEGV %d ro map 0x%lx %s\n",
112 				ep, addr, pf_errstr(err));
113 		if((s=sys_kill(vmp->vm_endpoint, SIGSEGV)) != OK)
114 			panic("sys_kill failed: %d", s);
115 		if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, 0 /*unused*/)) != OK)
116 			panic("do_pagefaults: sys_vmctl failed: %d", ep);
117 		return;
118 	}
119 
120 	assert(addr >= region->vaddr);
121 	offset = addr - region->vaddr;
122 
123 	/* Access is allowed; handle it. */
124 	if(retry) {
125 		result = map_pf(vmp, region, offset, wr, NULL, NULL, 0, &io);
126 		assert(result != SUSPEND);
127 	} else {
128 		struct pf_state state;
129 		state.ep = ep;
130 		state.vaddr = addr;
131 		state.err = err;
132 		result = map_pf(vmp, region, offset, wr, pf_cont,
133 			&state, sizeof(state), &io);
134 	}
135 	if (io)
136 		vmp->vm_major_page_fault++;
137 	else
138 		vmp->vm_minor_page_fault++;
139 
140 	if(result == SUSPEND) {
141 		return;
142 	}
143 
144 	if(result != OK) {
145 		printf("VM: pagefault: SIGSEGV %d pagefault not handled\n", ep);
146 		if((s=sys_kill(ep, SIGSEGV)) != OK)
147 			panic("sys_kill failed: %d", s);
148 		if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, 0 /*unused*/)) != OK)
149 			panic("do_pagefaults: sys_vmctl failed: %d", ep);
150 		return;
151 	}
152 
153         pt_clearmapcache();
154 
155 	/* Pagefault is handled, so now reactivate the process. */
156 	if((s=sys_vmctl(ep, VMCTL_CLEAR_PAGEFAULT, 0 /*unused*/)) != OK)
157 		panic("do_pagefaults: sys_vmctl failed: %d", ep);
158 }
159 
160 
161 static void pf_cont(struct vmproc *vmp, message *m,
162         void *arg, void *statearg)
163 {
164 	struct pf_state *state = statearg;
165 	int p;
166 	if(vm_isokendpt(state->ep, &p) != OK) return;	/* signal */
167 	handle_pagefault(state->ep, state->vaddr, state->err, 1);
168 }
169 
170 static void handle_memory_continue(struct vmproc *vmp, message *m,
171         void *arg, void *statearg)
172 {
173 	int r;
174 	struct hm_state *state = statearg;
175 	assert(state);
176 	assert(state->caller != NONE);
177 	assert(state->valid == VALID);
178 
179 	if(m->VMV_RESULT != OK) {
180 		printf("VM: handle_memory_continue: vfs request failed\n");
181 		handle_memory_final(state, m->VMV_RESULT);
182 		return;
183 	}
184 
185 	r = handle_memory_step(state, TRUE /*retry*/);
186 
187 	assert(state->valid == VALID);
188 
189 	if(r == SUSPEND) {
190 		return;
191 	}
192 
193 	assert(state->valid == VALID);
194 
195 	handle_memory_final(state, r);
196 }
197 
198 static void handle_memory_final(struct hm_state *state, int result)
199 {
200 	int r, flag;
201 
202 	assert(state);
203 	assert(state->valid == VALID);
204 
205 	if(state->caller == KERNEL) {
206 		if((r=sys_vmctl(state->requestor, VMCTL_MEMREQ_REPLY, result)) != OK)
207 			panic("handle_memory_continue: sys_vmctl failed: %d", r);
208 	} else if(state->caller != NONE) {
209 		/* Send a reply msg */
210 		message msg;
211 		memset(&msg, 0, sizeof(msg));
212 		msg.m_type = result;
213 
214 		if(IS_VFS_FS_TRANSID(state->transid)) {
215 			assert(state->caller == VFS_PROC_NR);
216 			/* If a transaction ID was set, reset it */
217 			msg.m_type = TRNS_ADD_ID(msg.m_type, state->transid);
218 			flag = AMF_NOREPLY;
219 		} else
220 			flag = 0;
221 
222 		/*
223 		 * Use AMF_NOREPLY only if there was a transaction ID, which
224 		 * signifies that VFS issued the request asynchronously.
225 		 */
226 		if(asynsend3(state->caller, &msg, flag) != OK) {
227 			panic("handle_memory_final: asynsend3 failed");
228 		}
229 
230 		assert(state->valid == VALID);
231 
232 		/* fail fast if anyone tries to access this state again */
233 		memset(state, 0, sizeof(*state));
234 	}
235 }
236 
237 /*===========================================================================*
238  *				do_pagefaults	     		     *
239  *===========================================================================*/
240 void do_pagefaults(message *m)
241 {
242 	handle_pagefault(m->m_source, m->VPF_ADDR, m->VPF_FLAGS, 0);
243 }
244 
245 int handle_memory_once(struct vmproc *vmp, vir_bytes mem, vir_bytes len,
246 	int wrflag)
247 {
248 	int r;
249 	r = handle_memory_start(vmp, mem, len, wrflag, NONE, NONE, 0, 0);
250 	assert(r != SUSPEND);
251 	return r;
252 }
253 
254 int handle_memory_start(struct vmproc *vmp, vir_bytes mem, vir_bytes len,
255 	int wrflag, endpoint_t caller, endpoint_t requestor, int transid,
256 	int vfs_avail)
257 {
258 	int r;
259 	struct hm_state state;
260 	vir_bytes o;
261 
262 	if((o = mem % PAGE_SIZE)) {
263 		mem -= o;
264 		len += o;
265 	}
266 
267 	len = roundup(len, PAGE_SIZE);
268 
269 	state.vmp = vmp;
270 	state.mem = mem;
271 	state.len = len;
272 	state.wrflag = wrflag;
273 	state.requestor = requestor;
274 	state.caller = caller;
275 	state.transid = transid;
276 	state.valid = VALID;
277 	state.vfs_avail = vfs_avail;
278 
279 	r = handle_memory_step(&state, FALSE /*retry*/);
280 
281 	if(r == SUSPEND) {
282 		assert(caller != NONE);
283 		assert(vfs_avail);
284 	} else {
285 		handle_memory_final(&state, r);
286 	}
287 
288 	return r;
289 }
290 
291 /*===========================================================================*
292  *				   do_memory	     			     *
293  *===========================================================================*/
294 void do_memory(void)
295 {
296 	endpoint_t who, who_s, requestor;
297 	vir_bytes mem, mem_s;
298 	vir_bytes len;
299 	int wrflag;
300 
301 	while(1) {
302 		int p, r = OK;
303 		struct vmproc *vmp;
304 
305 		r = sys_vmctl_get_memreq(&who, &mem, &len, &wrflag, &who_s,
306 			&mem_s, &requestor);
307 
308 		switch(r) {
309 		case VMPTYPE_CHECK:
310 		{
311 			int transid = 0;
312 			int vfs_avail;
313 
314 			if(vm_isokendpt(who, &p) != OK)
315 				panic("do_memory: bad endpoint: %d", who);
316 			vmp = &vmproc[p];
317 
318 			assert(!IS_VFS_FS_TRANSID(transid));
319 
320 			/* is VFS blocked? */
321 			if(requestor == VFS_PROC_NR) vfs_avail = 0;
322 			else vfs_avail = 1;
323 
324 			handle_memory_start(vmp, mem, len, wrflag,
325 				KERNEL, requestor, transid, vfs_avail);
326 
327 			break;
328 		}
329 
330 		default:
331 			return;
332 		}
333 	}
334 }
335 
336 static int handle_memory_step(struct hm_state *hmstate, int retry)
337 {
338 	struct vir_region *region;
339 	vir_bytes offset, length, sublen;
340 	int r;
341 
342 	/* Page-align memory and length. */
343 	assert(hmstate);
344 	assert(hmstate->valid == VALID);
345 	assert(!(hmstate->mem % VM_PAGE_SIZE));
346 	assert(!(hmstate->len % VM_PAGE_SIZE));
347 
348 	while(hmstate->len > 0) {
349 		if(!(region = map_lookup(hmstate->vmp, hmstate->mem, NULL))) {
350 #if VERBOSE
351 			map_printmap(hmstate->vmp);
352 			printf("VM: do_memory: memory doesn't exist\n");
353 #endif
354 			return EFAULT;
355 		} else if(!(region->flags & VR_WRITABLE) && hmstate->wrflag) {
356 #if VERBOSE
357 			printf("VM: do_memory: write to unwritable map\n");
358 #endif
359 			return EFAULT;
360 		}
361 
362 		assert(region->vaddr <= hmstate->mem);
363 		assert(!(region->vaddr % VM_PAGE_SIZE));
364 		offset = hmstate->mem - region->vaddr;
365 		length = hmstate->len;
366 		if (offset + length > region->length)
367 			length = region->length - offset;
368 
369 		/*
370 		 * Handle one page at a time.  While it seems beneficial to
371 		 * handle multiple pages in one go, the opposite is true:
372 		 * map_handle_memory will handle one page at a time anyway, and
373 		 * if we give it the whole range multiple times, it will have
374 		 * to recheck pages it already handled.  In addition, in order
375 		 * to handle one-shot pages, we need to know whether we are
376 		 * retrying a single page, and that is not possible if this is
377 		 * hidden in map_handle_memory.
378 		 */
379 		while (length > 0) {
380 			sublen = VM_PAGE_SIZE;
381 
382 			assert(sublen <= length);
383 			assert(offset + sublen <= region->length);
384 
385 			/*
386 			 * Upon the second try for this range, do not allow
387 			 * calling into VFS again.  This prevents eternal loops
388 			 * in case the FS messes up, and allows one-shot pages
389 			 * to be mapped in on the second call.
390 			 */
391 			if((region->def_memtype == &mem_type_mappedfile &&
392 			    (!hmstate->vfs_avail || retry)) ||
393 			    hmstate->caller == NONE) {
394 				r = map_handle_memory(hmstate->vmp, region,
395 				    offset, sublen, hmstate->wrflag, NULL,
396 				    NULL, 0);
397 				assert(r != SUSPEND);
398 			} else {
399 				r = map_handle_memory(hmstate->vmp, region,
400 				    offset, sublen, hmstate->wrflag,
401 				    handle_memory_continue, hmstate,
402 				    sizeof(*hmstate));
403 			}
404 
405 			if(r != OK) return r;
406 
407 			hmstate->len -= sublen;
408 			hmstate->mem += sublen;
409 
410 			offset += sublen;
411 			length -= sublen;
412 			retry = FALSE;
413 		}
414 	}
415 
416 	return OK;
417 }
418 
419