xref: /minix/minix/kernel/arch/i386/memory.c (revision 7f5f010b)
1 
2 #include "kernel/kernel.h"
3 #include "kernel/vm.h"
4 
5 #include <machine/vm.h>
6 
7 #include <minix/type.h>
8 #include <minix/syslib.h>
9 #include <minix/cpufeature.h>
10 #include <string.h>
11 #include <assert.h>
12 #include <signal.h>
13 #include <stdlib.h>
14 
15 #include <machine/vm.h>
16 
17 #include "oxpcie.h"
18 #include "arch_proto.h"
19 
20 #ifdef USE_APIC
21 #include "apic.h"
22 #ifdef USE_WATCHDOG
23 #include "kernel/watchdog.h"
24 #endif
25 #endif
26 
27 phys_bytes video_mem_vaddr = 0;
28 
29 #define HASPT(procptr) ((procptr)->p_seg.p_cr3 != 0)
30 static int nfreepdes = 0;
31 #define MAXFREEPDES	2
32 static int freepdes[MAXFREEPDES];
33 
34 static u32_t phys_get32(phys_bytes v);
35 
36 void mem_clear_mapcache(void)
37 {
38 	int i;
39 	for(i = 0; i < nfreepdes; i++) {
40 		struct proc *ptproc = get_cpulocal_var(ptproc);
41 		int pde = freepdes[i];
42 		u32_t *ptv;
43 		assert(ptproc);
44 		ptv = ptproc->p_seg.p_cr3_v;
45 		assert(ptv);
46 		ptv[pde] = 0;
47 	}
48 }
49 
50 /* This function sets up a mapping from within the kernel's address
51  * space to any other area of memory, either straight physical
52  * memory (pr == NULL) or a process view of memory, in 4MB windows.
53  * I.e., it maps in 4MB chunks of virtual (or physical) address space
54  * to 4MB chunks of kernel virtual address space.
55  *
56  * It recognizes pr already being in memory as a special case (no
57  * mapping required).
58  *
59  * The target (i.e. in-kernel) mapping area is one of the freepdes[]
60  * VM has earlier already told the kernel about that is available. It is
61  * identified as the 'pde' parameter. This value can be chosen freely
62  * by the caller, as long as it is in range (i.e. 0 or higher and corresponds
63  * to a known freepde slot). It is up to the caller to keep track of which
64  * freepde's are in use, and to determine which ones are free to use.
65  *
66  * The logical number supplied by the caller is translated into an actual
67  * pde number to be used, and a pointer to it (linear address) is returned
68  * for actual use by phys_copy or memset.
69  */
70 static phys_bytes createpde(
71 	const struct proc *pr,	/* Requested process, NULL for physical. */
72 	const phys_bytes linaddr,/* Address after segment translation. */
73 	phys_bytes *bytes,	/* Size of chunk, function may truncate it. */
74 	int free_pde_idx,	/* index of the free slot to use */
75 	int *changed		/* If mapping is made, this is set to 1. */
76 	)
77 {
78 	u32_t pdeval;
79 	phys_bytes offset;
80 	int pde;
81 
82 	assert(free_pde_idx >= 0 && free_pde_idx < nfreepdes);
83 	pde = freepdes[free_pde_idx];
84 	assert(pde >= 0 && pde < 1024);
85 
86 	if(pr && ((pr == get_cpulocal_var(ptproc)) || iskernelp(pr))) {
87 		/* Process memory is requested, and
88 		 * it's a process that is already in current page table, or
89 		 * the kernel, which is always there.
90 		 * Therefore linaddr is valid directly, with the requested
91 		 * size.
92 		 */
93 		return linaddr;
94 	}
95 
96 	if(pr) {
97 		/* Requested address is in a process that is not currently
98 		 * accessible directly. Grab the PDE entry of that process'
99 		 * page table that corresponds to the requested address.
100 		 */
101 		assert(pr->p_seg.p_cr3_v);
102 		pdeval = pr->p_seg.p_cr3_v[I386_VM_PDE(linaddr)];
103 	} else {
104 		/* Requested address is physical. Make up the PDE entry. */
105 		pdeval = (linaddr & I386_VM_ADDR_MASK_4MB) |
106 			I386_VM_BIGPAGE | I386_VM_PRESENT |
107 			I386_VM_WRITE | I386_VM_USER;
108 	}
109 
110 	/* Write the pde value that we need into a pde that the kernel
111 	 * can access, into the currently loaded page table so it becomes
112 	 * visible.
113 	 */
114 	assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
115 	if(get_cpulocal_var(ptproc)->p_seg.p_cr3_v[pde] != pdeval) {
116 		get_cpulocal_var(ptproc)->p_seg.p_cr3_v[pde] = pdeval;
117 		*changed = 1;
118 	}
119 
120 	/* Memory is now available, but only the 4MB window of virtual
121 	 * address space that we have mapped; calculate how much of
122 	 * the requested range is visible and return that in *bytes,
123 	 * if that is less than the requested range.
124 	 */
125 	offset = linaddr & I386_VM_OFFSET_MASK_4MB; /* Offset in 4MB window. */
126 	*bytes = MIN(*bytes, I386_BIG_PAGE_SIZE - offset);
127 
128 	/* Return the linear address of the start of the new mapping. */
129 	return I386_BIG_PAGE_SIZE*pde + offset;
130 }
131 
132 
133 /*===========================================================================*
134  *                           check_resumed_caller                            *
135  *===========================================================================*/
136 static int check_resumed_caller(struct proc *caller)
137 {
138 	/* Returns the result from VM if caller was resumed, otherwise OK. */
139 	if (caller && (caller->p_misc_flags & MF_KCALL_RESUME)) {
140 		assert(caller->p_vmrequest.vmresult != VMSUSPEND);
141 		return caller->p_vmrequest.vmresult;
142 	}
143 
144 	return OK;
145 }
146 
147 /*===========================================================================*
148  *				lin_lin_copy				     *
149  *===========================================================================*/
150 static int lin_lin_copy(struct proc *srcproc, vir_bytes srclinaddr,
151 	struct proc *dstproc, vir_bytes dstlinaddr, vir_bytes bytes)
152 {
153 	u32_t addr;
154 	proc_nr_t procslot;
155 
156 	assert(get_cpulocal_var(ptproc));
157 	assert(get_cpulocal_var(proc_ptr));
158 	assert(read_cr3() == get_cpulocal_var(ptproc)->p_seg.p_cr3);
159 
160 	procslot = get_cpulocal_var(ptproc)->p_nr;
161 
162 	assert(procslot >= 0 && procslot < I386_VM_DIR_ENTRIES);
163 
164 	if(srcproc) assert(!RTS_ISSET(srcproc, RTS_SLOT_FREE));
165 	if(dstproc) assert(!RTS_ISSET(dstproc, RTS_SLOT_FREE));
166 	assert(!RTS_ISSET(get_cpulocal_var(ptproc), RTS_SLOT_FREE));
167 	assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
168 	if(srcproc) assert(!RTS_ISSET(srcproc, RTS_VMINHIBIT));
169 	if(dstproc) assert(!RTS_ISSET(dstproc, RTS_VMINHIBIT));
170 
171 	while(bytes > 0) {
172 		phys_bytes srcptr, dstptr;
173 		vir_bytes chunk = bytes;
174 		int changed = 0;
175 
176 #ifdef CONFIG_SMP
177 		unsigned cpu = cpuid;
178 
179 		if (srcproc && GET_BIT(srcproc->p_stale_tlb, cpu)) {
180 			changed = 1;
181 			UNSET_BIT(srcproc->p_stale_tlb, cpu);
182 		}
183 		if (dstproc && GET_BIT(dstproc->p_stale_tlb, cpu)) {
184 			changed = 1;
185 			UNSET_BIT(dstproc->p_stale_tlb, cpu);
186 		}
187 #endif
188 
189 		/* Set up 4MB ranges. */
190 		srcptr = createpde(srcproc, srclinaddr, &chunk, 0, &changed);
191 		dstptr = createpde(dstproc, dstlinaddr, &chunk, 1, &changed);
192 		if(changed)
193 			reload_cr3();
194 
195 		/* Copy pages. */
196 		PHYS_COPY_CATCH(srcptr, dstptr, chunk, addr);
197 
198 		if(addr) {
199 			/* If addr is nonzero, a page fault was caught. */
200 
201 			if(addr >= srcptr && addr < (srcptr + chunk)) {
202 				return EFAULT_SRC;
203 			}
204 			if(addr >= dstptr && addr < (dstptr + chunk)) {
205 				return EFAULT_DST;
206 			}
207 
208 			panic("lin_lin_copy fault out of range");
209 
210 			/* Not reached. */
211 			return EFAULT;
212 		}
213 
214 		/* Update counter and addresses for next iteration, if any. */
215 		bytes -= chunk;
216 		srclinaddr += chunk;
217 		dstlinaddr += chunk;
218 	}
219 
220 	if(srcproc) assert(!RTS_ISSET(srcproc, RTS_SLOT_FREE));
221 	if(dstproc) assert(!RTS_ISSET(dstproc, RTS_SLOT_FREE));
222 	assert(!RTS_ISSET(get_cpulocal_var(ptproc), RTS_SLOT_FREE));
223 	assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
224 
225 	return OK;
226 }
227 
228 
229 static u32_t phys_get32(phys_bytes addr)
230 {
231 	u32_t v;
232 	int r;
233 
234 	if((r=lin_lin_copy(NULL, addr,
235 		proc_addr(SYSTEM), (phys_bytes) &v, sizeof(v))) != OK) {
236 		panic("lin_lin_copy for phys_get32 failed: %d",  r);
237 	}
238 
239 	return v;
240 }
241 
242 #if 0
243 static char *cr0_str(u32_t e)
244 {
245 	static char str[80];
246 	strcpy(str, "");
247 #define FLAG(v) do { if(e & (v)) { strcat(str, #v " "); e &= ~v; } } while(0)
248 	FLAG(I386_CR0_PE);
249 	FLAG(I386_CR0_MP);
250 	FLAG(I386_CR0_EM);
251 	FLAG(I386_CR0_TS);
252 	FLAG(I386_CR0_ET);
253 	FLAG(I386_CR0_PG);
254 	FLAG(I386_CR0_WP);
255 	if(e) { strcat(str, " (++)"); }
256 	return str;
257 }
258 
259 static char *cr4_str(u32_t e)
260 {
261 	static char str[80];
262 	strcpy(str, "");
263 	FLAG(I386_CR4_VME);
264 	FLAG(I386_CR4_PVI);
265 	FLAG(I386_CR4_TSD);
266 	FLAG(I386_CR4_DE);
267 	FLAG(I386_CR4_PSE);
268 	FLAG(I386_CR4_PAE);
269 	FLAG(I386_CR4_MCE);
270 	FLAG(I386_CR4_PGE);
271 	if(e) { strcat(str, " (++)"); }
272 	return str;
273 }
274 #endif
275 
276 /*===========================================================================*
277  *                              umap_virtual                                 *
278  *===========================================================================*/
279 phys_bytes umap_virtual(rp, seg, vir_addr, bytes)
280 register struct proc *rp;       /* pointer to proc table entry for process */
281 int seg;                        /* T, D, or S segment */
282 vir_bytes vir_addr;             /* virtual address in bytes within the seg */
283 vir_bytes bytes;                /* # of bytes to be copied */
284 {
285 	phys_bytes phys = 0;
286 
287 	if(vm_lookup(rp, vir_addr, &phys, NULL) != OK) {
288 		printf("SYSTEM:umap_virtual: vm_lookup of %s: seg 0x%x: 0x%lx failed\n", rp->p_name, seg, vir_addr);
289 		phys = 0;
290 	} else {
291 		if(phys == 0)
292 			panic("vm_lookup returned phys: 0x%lx",  phys);
293 	}
294 
295 	if(phys == 0) {
296 		printf("SYSTEM:umap_virtual: lookup failed\n");
297 		return 0;
298 	}
299 
300 	/* Now make sure addresses are contiguous in physical memory
301 	 * so that the umap makes sense.
302 	 */
303 	if(bytes > 0 && vm_lookup_range(rp, vir_addr, NULL, bytes) != bytes) {
304 		printf("umap_virtual: %s: %lu at 0x%lx (vir 0x%lx) not contiguous\n",
305 			rp->p_name, bytes, vir_addr, vir_addr);
306 		return 0;
307 	}
308 
309 	/* phys must be larger than 0 (or the caller will think the call
310 	 * failed), and address must not cross a page boundary.
311 	 */
312 	assert(phys);
313 
314 	return phys;
315 }
316 
317 
318 /*===========================================================================*
319  *                              vm_lookup                                    *
320  *===========================================================================*/
321 int vm_lookup(const struct proc *proc, const vir_bytes virtual,
322  phys_bytes *physical, u32_t *ptent)
323 {
324 	u32_t *root, *pt;
325 	int pde, pte;
326 	u32_t pde_v, pte_v;
327 
328 	assert(proc);
329 	assert(physical);
330 	assert(!isemptyp(proc));
331 	assert(HASPT(proc));
332 
333 	/* Retrieve page directory entry. */
334 	root = (u32_t *) proc->p_seg.p_cr3;
335 	assert(!((u32_t) root % I386_PAGE_SIZE));
336 	pde = I386_VM_PDE(virtual);
337 	assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
338 	pde_v = phys_get32((u32_t) (root + pde));
339 
340 	if(!(pde_v & I386_VM_PRESENT)) {
341 		return EFAULT;
342 	}
343 
344 	/* We don't expect to ever see this. */
345 	if(pde_v & I386_VM_BIGPAGE) {
346 		*physical = pde_v & I386_VM_ADDR_MASK_4MB;
347 		if(ptent) *ptent = pde_v;
348 		*physical += virtual & I386_VM_OFFSET_MASK_4MB;
349 	} else {
350 		/* Retrieve page table entry. */
351 		pt = (u32_t *) I386_VM_PFA(pde_v);
352 		assert(!((u32_t) pt % I386_PAGE_SIZE));
353 		pte = I386_VM_PTE(virtual);
354 		assert(pte >= 0 && pte < I386_VM_PT_ENTRIES);
355 		pte_v = phys_get32((u32_t) (pt + pte));
356 		if(!(pte_v & I386_VM_PRESENT)) {
357 			return EFAULT;
358 		}
359 
360 		if(ptent) *ptent = pte_v;
361 
362 		/* Actual address now known; retrieve it and add page offset. */
363 		*physical = I386_VM_PFA(pte_v);
364 		*physical += virtual % I386_PAGE_SIZE;
365 	}
366 
367 	return OK;
368 }
369 
370 /*===========================================================================*
371  *				vm_lookup_range				     *
372  *===========================================================================*/
373 size_t vm_lookup_range(const struct proc *proc, vir_bytes vir_addr,
374 	phys_bytes *phys_addr, size_t bytes)
375 {
376 	/* Look up the physical address corresponding to linear virtual address
377 	 * 'vir_addr' for process 'proc'. Return the size of the range covered
378 	 * by contiguous physical memory starting from that address; this may
379 	 * be anywhere between 0 and 'bytes' inclusive. If the return value is
380 	 * nonzero, and 'phys_addr' is non-NULL, 'phys_addr' will be set to the
381 	 * base physical address of the range. 'vir_addr' and 'bytes' need not
382 	 * be page-aligned, but the caller must have verified that the given
383 	 * linear range is valid for the given process at all.
384 	 */
385 	phys_bytes phys, next_phys;
386 	size_t len;
387 
388 	assert(proc);
389 	assert(bytes > 0);
390 	assert(HASPT(proc));
391 
392 	/* Look up the first page. */
393 	if (vm_lookup(proc, vir_addr, &phys, NULL) != OK)
394 		return 0;
395 
396 	if (phys_addr != NULL)
397 		*phys_addr = phys;
398 
399 	len = I386_PAGE_SIZE - (vir_addr % I386_PAGE_SIZE);
400 	vir_addr += len;
401 	next_phys = phys + len;
402 
403 	/* Look up any next pages and test physical contiguity. */
404 	while (len < bytes) {
405 		if (vm_lookup(proc, vir_addr, &phys, NULL) != OK)
406 			break;
407 
408 		if (next_phys != phys)
409 			break;
410 
411 		len += I386_PAGE_SIZE;
412 		vir_addr += I386_PAGE_SIZE;
413 		next_phys += I386_PAGE_SIZE;
414 	}
415 
416 	/* We might now have overshot the requested length somewhat. */
417 	return MIN(bytes, len);
418 }
419 
420 /*===========================================================================*
421  *                              vm_suspend                                *
422  *===========================================================================*/
423 static void vm_suspend(struct proc *caller, const struct proc *target,
424 	const vir_bytes linaddr, const vir_bytes len, const int type,
425 	const int writeflag)
426 {
427 	/* This range is not OK for this process. Set parameters
428 	 * of the request and notify VM about the pending request.
429 	 */
430 	assert(!RTS_ISSET(caller, RTS_VMREQUEST));
431 	assert(!RTS_ISSET(target, RTS_VMREQUEST));
432 
433 	RTS_SET(caller, RTS_VMREQUEST);
434 
435 	assert(caller->p_endpoint != VM_PROC_NR);
436 
437 	caller->p_vmrequest.req_type = VMPTYPE_CHECK;
438 	caller->p_vmrequest.target = target->p_endpoint;
439 	caller->p_vmrequest.params.check.start = linaddr;
440 	caller->p_vmrequest.params.check.length = len;
441 	caller->p_vmrequest.params.check.writeflag = writeflag;
442 	caller->p_vmrequest.type = type;
443 
444 	/* Connect caller on vmrequest wait queue. */
445 	if(!(caller->p_vmrequest.nextrequestor = vmrequest))
446 		if(OK != send_sig(VM_PROC_NR, SIGKMEM))
447 			panic("send_sig failed");
448 	vmrequest = caller;
449 }
450 
451 /*===========================================================================*
452  *				vm_check_range				     *
453  *===========================================================================*/
454 int vm_check_range(struct proc *caller, struct proc *target,
455 	vir_bytes vir_addr, size_t bytes, int writeflag)
456 {
457 	/* Public interface to vm_suspend(), for use by kernel calls. On behalf
458 	 * of 'caller', call into VM to check linear virtual address range of
459 	 * process 'target', starting at 'vir_addr', for 'bytes' bytes. This
460 	 * function assumes that it will called twice if VM returned an error
461 	 * the first time (since nothing has changed in that case), and will
462 	 * then return the error code resulting from the first call. Upon the
463 	 * first call, a non-success error code is returned as well.
464 	 */
465 	int r;
466 
467 	if ((caller->p_misc_flags & MF_KCALL_RESUME) &&
468 			(r = caller->p_vmrequest.vmresult) != OK)
469 		return r;
470 
471 	vm_suspend(caller, target, vir_addr, bytes, VMSTYPE_KERNELCALL,
472 		writeflag);
473 
474 	return VMSUSPEND;
475 }
476 
477 /*===========================================================================*
478  *                              delivermsg                                *
479  *===========================================================================*/
480 void delivermsg(struct proc *rp)
481 {
482 	int r = OK;
483 
484 	assert(rp->p_misc_flags & MF_DELIVERMSG);
485 	assert(rp->p_delivermsg.m_source != NONE);
486 
487 	if (copy_msg_to_user(&rp->p_delivermsg,
488 				(message *) rp->p_delivermsg_vir)) {
489 		printf("WARNING wrong user pointer 0x%08lx from "
490 				"process %s / %d\n",
491 				rp->p_delivermsg_vir,
492 				rp->p_name,
493 				rp->p_endpoint);
494 		cause_sig(rp->p_nr, SIGSEGV);
495 		r = EFAULT;
496 	}
497 
498 	/* Indicate message has been delivered; address is 'used'. */
499 	rp->p_delivermsg.m_source = NONE;
500 	rp->p_misc_flags &= ~MF_DELIVERMSG;
501 
502 	if(!(rp->p_misc_flags & MF_CONTEXT_SET)) {
503 		rp->p_reg.retreg = r;
504 	}
505 }
506 
507 #if 0
508 static char *flagstr(u32_t e, const int dir)
509 {
510 	static char str[80];
511 	strcpy(str, "");
512 	FLAG(I386_VM_PRESENT);
513 	FLAG(I386_VM_WRITE);
514 	FLAG(I386_VM_USER);
515 	FLAG(I386_VM_PWT);
516 	FLAG(I386_VM_PCD);
517 	FLAG(I386_VM_GLOBAL);
518 	if(dir)
519 		FLAG(I386_VM_BIGPAGE);	/* Page directory entry only */
520 	else
521 		FLAG(I386_VM_DIRTY);	/* Page table entry only */
522 	return str;
523 }
524 
525 static void vm_pt_print(u32_t *pagetable, const u32_t v)
526 {
527 	int pte;
528 	int col = 0;
529 
530 	assert(!((u32_t) pagetable % I386_PAGE_SIZE));
531 
532 	for(pte = 0; pte < I386_VM_PT_ENTRIES; pte++) {
533 		u32_t pte_v, pfa;
534 		pte_v = phys_get32((u32_t) (pagetable + pte));
535 		if(!(pte_v & I386_VM_PRESENT))
536 			continue;
537 		pfa = I386_VM_PFA(pte_v);
538 		printf("%4d:%08lx:%08lx %2s ",
539 			pte, v + I386_PAGE_SIZE*pte, pfa,
540 			(pte_v & I386_VM_WRITE) ? "rw":"RO");
541 		col++;
542 		if(col == 3) { printf("\n"); col = 0; }
543 	}
544 	if(col > 0) printf("\n");
545 
546 	return;
547 }
548 
549 static void vm_print(u32_t *root)
550 {
551 	int pde;
552 
553 	assert(!((u32_t) root % I386_PAGE_SIZE));
554 
555 	printf("page table 0x%lx:\n", root);
556 
557 	for(pde = 0; pde < I386_VM_DIR_ENTRIES; pde++) {
558 		u32_t pde_v;
559 		u32_t *pte_a;
560 		pde_v = phys_get32((u32_t) (root + pde));
561 		if(!(pde_v & I386_VM_PRESENT))
562 			continue;
563 		if(pde_v & I386_VM_BIGPAGE) {
564 			printf("%4d: 0x%lx, flags %s\n",
565 				pde, I386_VM_PFA(pde_v), flagstr(pde_v, 1));
566 		} else {
567 			pte_a = (u32_t *) I386_VM_PFA(pde_v);
568 			printf("%4d: pt %08lx %s\n",
569 				pde, pte_a, flagstr(pde_v, 1));
570 			vm_pt_print(pte_a, pde * I386_VM_PT_ENTRIES * I386_PAGE_SIZE);
571 			printf("\n");
572 		}
573 	}
574 
575 
576 	return;
577 }
578 #endif
579 
580 /*===========================================================================*
581  *                                 vmmemset                                  *
582  *===========================================================================*/
583 int vm_memset(struct proc* caller, endpoint_t who, phys_bytes ph, int c,
584 	phys_bytes count)
585 {
586 	u32_t pattern;
587 	struct proc *whoptr = NULL;
588 	phys_bytes cur_ph = ph;
589 	phys_bytes left = count;
590 	phys_bytes ptr, chunk, pfa = 0;
591 	int new_cr3, r = OK;
592 
593 	if ((r = check_resumed_caller(caller)) != OK)
594 		return r;
595 
596 	/* NONE for physical, otherwise virtual */
597 	if (who != NONE && !(whoptr = endpoint_lookup(who)))
598 		return ESRCH;
599 
600 	c &= 0xFF;
601 	pattern = c | (c << 8) | (c << 16) | (c << 24);
602 
603 	assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
604 	assert(!catch_pagefaults);
605 	catch_pagefaults = 1;
606 
607 	/* We can memset as many bytes as we have remaining,
608 	 * or as many as remain in the 4MB chunk we mapped in.
609 	 */
610 	while (left > 0) {
611 		new_cr3 = 0;
612 		chunk = left;
613 		ptr = createpde(whoptr, cur_ph, &chunk, 0, &new_cr3);
614 
615 		if (new_cr3)
616 			reload_cr3();
617 
618 		/* If a page fault happens, pfa is non-null */
619 		if ((pfa = phys_memset(ptr, pattern, chunk))) {
620 
621 			/* If a process pagefaults, VM may help out */
622 			if (whoptr) {
623 				vm_suspend(caller, whoptr, ph, count,
624 						   VMSTYPE_KERNELCALL, 1);
625 				assert(catch_pagefaults);
626 				catch_pagefaults = 0;
627 				return VMSUSPEND;
628 			}
629 
630 			/* Pagefault when phys copying ?! */
631 			panic("vm_memset: pf %lx addr=%lx len=%lu\n",
632 						pfa , ptr, chunk);
633 		}
634 
635 		cur_ph += chunk;
636 		left -= chunk;
637 	}
638 
639 	assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
640 	assert(catch_pagefaults);
641 	catch_pagefaults = 0;
642 
643 	return OK;
644 }
645 
646 /*===========================================================================*
647  *				virtual_copy_f				     *
648  *===========================================================================*/
649 int virtual_copy_f(caller, src_addr, dst_addr, bytes, vmcheck)
650 struct proc * caller;
651 struct vir_addr *src_addr;	/* source virtual address */
652 struct vir_addr *dst_addr;	/* destination virtual address */
653 vir_bytes bytes;		/* # of bytes to copy  */
654 int vmcheck;			/* if nonzero, can return VMSUSPEND */
655 {
656 /* Copy bytes from virtual address src_addr to virtual address dst_addr. */
657   struct vir_addr *vir_addr[2];	/* virtual source and destination address */
658   int i, r;
659   struct proc *procs[2];
660 
661   assert((vmcheck && caller) || (!vmcheck && !caller));
662 
663   /* Check copy count. */
664   if (bytes <= 0) return(EDOM);
665 
666   /* Do some more checks and map virtual addresses to physical addresses. */
667   vir_addr[_SRC_] = src_addr;
668   vir_addr[_DST_] = dst_addr;
669 
670   for (i=_SRC_; i<=_DST_; i++) {
671   	endpoint_t proc_e = vir_addr[i]->proc_nr_e;
672 	int proc_nr;
673 	struct proc *p;
674 
675 	if(proc_e == NONE) {
676 		p = NULL;
677 	} else {
678 		if(!isokendpt(proc_e, &proc_nr)) {
679 			printf("virtual_copy: no reasonable endpoint\n");
680 			return ESRCH;
681 		}
682 		p = proc_addr(proc_nr);
683 	}
684 
685 	procs[i] = p;
686   }
687 
688   if ((r = check_resumed_caller(caller)) != OK)
689 	return r;
690 
691   if((r=lin_lin_copy(procs[_SRC_], vir_addr[_SRC_]->offset,
692   	procs[_DST_], vir_addr[_DST_]->offset, bytes)) != OK) {
693 	int writeflag;
694   	struct proc *target = NULL;
695   	phys_bytes lin;
696   	if(r != EFAULT_SRC && r != EFAULT_DST)
697   		panic("lin_lin_copy failed: %d",  r);
698   	if(!vmcheck || !caller) {
699     		return r;
700   	}
701 
702   	if(r == EFAULT_SRC) {
703   		lin = vir_addr[_SRC_]->offset;
704   		target = procs[_SRC_];
705 		writeflag = 0;
706   	} else if(r == EFAULT_DST) {
707   		lin = vir_addr[_DST_]->offset;
708   		target = procs[_DST_];
709 		writeflag = 1;
710   	} else {
711   		panic("r strange: %d",  r);
712   	}
713 
714 	assert(caller);
715 	assert(target);
716 
717 	vm_suspend(caller, target, lin, bytes, VMSTYPE_KERNELCALL, writeflag);
718 	return VMSUSPEND;
719   }
720 
721   return OK;
722 }
723 
724 /*===========================================================================*
725  *				data_copy				     *
726  *===========================================================================*/
727 int data_copy(const endpoint_t from_proc, const vir_bytes from_addr,
728 	const endpoint_t to_proc, const vir_bytes to_addr,
729 	size_t bytes)
730 {
731   struct vir_addr src, dst;
732 
733   src.offset = from_addr;
734   dst.offset = to_addr;
735   src.proc_nr_e = from_proc;
736   dst.proc_nr_e = to_proc;
737   assert(src.proc_nr_e != NONE);
738   assert(dst.proc_nr_e != NONE);
739 
740   return virtual_copy(&src, &dst, bytes);
741 }
742 
743 /*===========================================================================*
744  *				data_copy_vmcheck			     *
745  *===========================================================================*/
746 int data_copy_vmcheck(struct proc * caller,
747 	const endpoint_t from_proc, const vir_bytes from_addr,
748 	const endpoint_t to_proc, const vir_bytes to_addr,
749 	size_t bytes)
750 {
751   struct vir_addr src, dst;
752 
753   src.offset = from_addr;
754   dst.offset = to_addr;
755   src.proc_nr_e = from_proc;
756   dst.proc_nr_e = to_proc;
757   assert(src.proc_nr_e != NONE);
758   assert(dst.proc_nr_e != NONE);
759 
760   return virtual_copy_vmcheck(caller, &src, &dst, bytes);
761 }
762 
763 void memory_init(void)
764 {
765 	assert(nfreepdes == 0);
766 
767 	freepdes[nfreepdes++] = kinfo.freepde_start++;
768 	freepdes[nfreepdes++] = kinfo.freepde_start++;
769 
770 	assert(kinfo.freepde_start < I386_VM_DIR_ENTRIES);
771 	assert(nfreepdes == 2);
772 	assert(nfreepdes <= MAXFREEPDES);
773 }
774 
775 /*===========================================================================*
776  *				arch_proc_init				     *
777  *===========================================================================*/
778 void arch_proc_init(struct proc *pr, const u32_t ip, const u32_t sp,
779 	const u32_t ps_str, char *name)
780 {
781 	arch_proc_reset(pr);
782 	strlcpy(pr->p_name, name, sizeof(pr->p_name));
783 
784 	/* set custom state we know */
785 	pr->p_reg.pc = ip;
786 	pr->p_reg.sp = sp;
787 	pr->p_reg.bx = ps_str;
788 }
789 
790 static int oxpcie_mapping_index = -1,
791 	lapic_mapping_index = -1,
792 	ioapic_first_index = -1,
793 	ioapic_last_index = -1,
794 	video_mem_mapping_index = -1,
795 	usermapped_glo_index = -1,
796 	usermapped_index = -1, first_um_idx = -1;
797 
798 extern char *video_mem;
799 
800 extern char usermapped_start, usermapped_end, usermapped_nonglo_start;
801 
802 int arch_phys_map(const int index,
803 			phys_bytes *addr,
804 			phys_bytes *len,
805 			int *flags)
806 {
807 	static int first = 1;
808 	int freeidx = 0;
809 	static char *ser_var = NULL;
810 	u32_t glo_len = (u32_t) &usermapped_nonglo_start -
811 			(u32_t) &usermapped_start;
812 
813 	if(first) {
814 		memset(&minix_kerninfo, 0, sizeof(minix_kerninfo));
815 		video_mem_mapping_index = freeidx++;
816 		if(glo_len > 0) {
817 			usermapped_glo_index = freeidx++;
818 		}
819 
820 		usermapped_index = freeidx++;
821 		first_um_idx = usermapped_index;
822 		if(usermapped_glo_index != -1)
823 			first_um_idx = usermapped_glo_index;
824 
825 #ifdef USE_APIC
826 		if(lapic_addr)
827 			lapic_mapping_index = freeidx++;
828 		if (ioapic_enabled) {
829 			ioapic_first_index = freeidx;
830 			assert(nioapics > 0);
831 			freeidx += nioapics;
832 			ioapic_last_index = freeidx-1;
833 		}
834 #endif
835 
836 #ifdef CONFIG_OXPCIE
837 		if((ser_var = env_get("oxpcie"))) {
838 			if(ser_var[0] != '0' || ser_var[1] != 'x') {
839 				printf("oxpcie address in hex please\n");
840 			} else {
841 				printf("oxpcie address is %s\n", ser_var);
842 				oxpcie_mapping_index = freeidx++;
843 			}
844 		}
845 #endif
846 
847 		first = 0;
848 	}
849 
850 	if(index == usermapped_glo_index) {
851 		*addr = vir2phys(&usermapped_start);
852 		*len = glo_len;
853 		*flags = VMMF_USER | VMMF_GLO;
854 		return OK;
855 	}
856 	else if(index == usermapped_index) {
857 		*addr = vir2phys(&usermapped_nonglo_start);
858 		*len = (u32_t) &usermapped_end -
859 			(u32_t) &usermapped_nonglo_start;
860 		*flags = VMMF_USER;
861 		return OK;
862 	}
863 	else if (index == video_mem_mapping_index) {
864 		/* map video memory in so we can print panic messages */
865 		*addr = MULTIBOOT_VIDEO_BUFFER;
866 		*len = I386_PAGE_SIZE;
867 		*flags = VMMF_WRITE;
868 		return OK;
869 	}
870 #ifdef USE_APIC
871 	else if (index == lapic_mapping_index) {
872 		/* map the local APIC if enabled */
873 		if (!lapic_addr)
874 			return EINVAL;
875 		*addr = lapic_addr;
876 		*len = 4 << 10 /* 4kB */;
877 		*flags = VMMF_UNCACHED | VMMF_WRITE;
878 		return OK;
879 	}
880 	else if (ioapic_enabled && index >= ioapic_first_index && index <= ioapic_last_index) {
881 		int ioapic_idx = index - ioapic_first_index;
882 		*addr = io_apic[ioapic_idx].paddr;
883 		assert(*addr);
884 		*len = 4 << 10 /* 4kB */;
885 		*flags = VMMF_UNCACHED | VMMF_WRITE;
886 		printf("ioapic map: addr 0x%lx\n", *addr);
887 		return OK;
888 	}
889 #endif
890 
891 #if CONFIG_OXPCIE
892 	if(index == oxpcie_mapping_index) {
893 		*addr = strtoul(ser_var+2, NULL, 16);
894 		*len = 0x4000;
895 		*flags = VMMF_UNCACHED | VMMF_WRITE;
896 		return OK;
897 	}
898 #endif
899 
900 	return EINVAL;
901 }
902 
903 int arch_phys_map_reply(const int index, const vir_bytes addr)
904 {
905 #ifdef USE_APIC
906 	/* if local APIC is enabled */
907 	if (index == lapic_mapping_index && lapic_addr) {
908 		lapic_addr_vaddr = addr;
909 		return OK;
910 	}
911 	else if (ioapic_enabled && index >= ioapic_first_index &&
912 		index <= ioapic_last_index) {
913 		int i = index - ioapic_first_index;
914 		io_apic[i].vaddr = addr;
915 		return OK;
916 	}
917 #endif
918 
919 #if CONFIG_OXPCIE
920 	if (index == oxpcie_mapping_index) {
921 		oxpcie_set_vaddr((unsigned char *) addr);
922 		return OK;
923 	}
924 #endif
925 	if(index == first_um_idx) {
926 		extern struct minix_ipcvecs minix_ipcvecs_sysenter,
927 			minix_ipcvecs_syscall,
928 			minix_ipcvecs_softint;
929 		extern u32_t usermapped_offset;
930 		assert(addr > (u32_t) &usermapped_start);
931 		usermapped_offset = addr - (u32_t) &usermapped_start;
932 #define FIXEDPTR(ptr) (void *) ((u32_t)ptr + usermapped_offset)
933 #define FIXPTR(ptr) ptr = FIXEDPTR(ptr)
934 #define ASSIGN(minixstruct) minix_kerninfo.minixstruct = FIXEDPTR(&minixstruct)
935 		ASSIGN(kinfo);
936 		ASSIGN(machine);
937 		ASSIGN(kmessages);
938 		ASSIGN(loadinfo);
939 
940 		/* select the right set of IPC routines to map into processes */
941 		if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) {
942 			printf("kernel: selecting intel sysenter ipc style\n");
943 			minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_sysenter;
944 		} else  if(minix_feature_flags & MKF_I386_AMD_SYSCALL) {
945 			printf("kernel: selecting amd syscall ipc style\n");
946 			minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_syscall;
947 		} else	{
948 			printf("kernel: selecting fallback (int) ipc style\n");
949 			minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_softint;
950 		}
951 
952 		/* adjust the pointers of the functions and the struct
953 		 * itself to the user-accessible mapping
954 		 */
955 		FIXPTR(minix_kerninfo.minix_ipcvecs->send);
956 		FIXPTR(minix_kerninfo.minix_ipcvecs->receive);
957 		FIXPTR(minix_kerninfo.minix_ipcvecs->sendrec);
958 		FIXPTR(minix_kerninfo.minix_ipcvecs->senda);
959 		FIXPTR(minix_kerninfo.minix_ipcvecs->sendnb);
960 		FIXPTR(minix_kerninfo.minix_ipcvecs->notify);
961 		FIXPTR(minix_kerninfo.minix_ipcvecs->do_kernel_call);
962 		FIXPTR(minix_kerninfo.minix_ipcvecs);
963 
964 		minix_kerninfo.kerninfo_magic = KERNINFO_MAGIC;
965 		minix_kerninfo.minix_feature_flags = minix_feature_flags;
966 		minix_kerninfo_user = (vir_bytes) FIXEDPTR(&minix_kerninfo);
967 
968 		/* if libc_ipc is set, disable usermapped ipc functions
969 		 * and force binaries to use in-libc fallbacks.
970 		 */
971 		if(env_get("libc_ipc")) {
972 			printf("kernel: forcing in-libc fallback ipc style\n");
973 			minix_kerninfo.minix_ipcvecs = NULL;
974 		} else {
975 			minix_kerninfo.ki_flags |= MINIX_KIF_IPCVECS;
976 		}
977 
978 		return OK;
979 	}
980 
981 	if(index == usermapped_index) return OK;
982 
983 	if (index == video_mem_mapping_index) {
984 		video_mem_vaddr =  addr;
985 		return OK;
986 	}
987 
988 	return EINVAL;
989 }
990 
991 int arch_enable_paging(struct proc * caller)
992 {
993 	assert(caller->p_seg.p_cr3);
994 
995 	/* load caller's page table */
996 	switch_address_space(caller);
997 
998 	video_mem = (char *) video_mem_vaddr;
999 
1000 #ifdef USE_APIC
1001 	/* start using the virtual addresses */
1002 
1003 	/* if local APIC is enabled */
1004 	if (lapic_addr) {
1005 		lapic_addr = lapic_addr_vaddr;
1006 		lapic_eoi_addr = LAPIC_EOI;
1007 	}
1008 	/* if IO apics are enabled */
1009 	if (ioapic_enabled) {
1010 		int i;
1011 
1012 		for (i = 0; i < nioapics; i++) {
1013 			io_apic[i].addr = io_apic[i].vaddr;
1014 		}
1015 	}
1016 #if CONFIG_SMP
1017 	barrier();
1018 
1019 	wait_for_APs_to_finish_booting();
1020 #endif
1021 #endif
1022 
1023 #ifdef USE_WATCHDOG
1024 	/*
1025 	 * We make sure that we don't enable the watchdog until paging is turned
1026 	 * on as we might get an NMI while switching and we might still use wrong
1027 	 * lapic address. Bad things would happen. It is unfortunate but such is
1028 	 * life
1029 	 */
1030 	if (watchdog_enabled)
1031 		i386_watchdog_start();
1032 #endif
1033 
1034 	return OK;
1035 }
1036 
1037 void release_address_space(struct proc *pr)
1038 {
1039 	pr->p_seg.p_cr3_v = NULL;
1040 }
1041 
1042 /* computes a checksum of a buffer of a given length. The byte sum must be zero */
1043 int platform_tbl_checksum_ok(void *ptr, unsigned int length)
1044 {
1045 	u8_t total = 0;
1046 	unsigned int i;
1047 	for (i = 0; i < length; i++)
1048 		total += ((unsigned char *)ptr)[i];
1049 	return !total;
1050 }
1051 
1052 int platform_tbl_ptr(phys_bytes start,
1053 					phys_bytes end,
1054 					unsigned increment,
1055 					void * buff,
1056 					unsigned size,
1057 					phys_bytes * phys_addr,
1058 					int ((* cmp_f)(void *)))
1059 {
1060 	phys_bytes addr;
1061 
1062 	for (addr = start; addr < end; addr += increment) {
1063 		phys_copy (addr, (phys_bytes) buff, size);
1064 		if (cmp_f(buff)) {
1065 			if (phys_addr)
1066 				*phys_addr = addr;
1067 			return 1;
1068 		}
1069 	}
1070 	return 0;
1071 }
1072