xref: /minix/minix/kernel/arch/i386/memory.c (revision fb9c64b2)
1 
2 #include "kernel/kernel.h"
3 #include "kernel/vm.h"
4 
5 #include <machine/vm.h>
6 
7 #include <minix/syslib.h>
8 #include <minix/cpufeature.h>
9 #include <string.h>
10 #include <assert.h>
11 #include <signal.h>
12 #include <stdlib.h>
13 
14 #include <machine/vm.h>
15 
16 #include "oxpcie.h"
17 #include "arch_proto.h"
18 
19 #ifdef USE_APIC
20 #include "apic.h"
21 #ifdef USE_WATCHDOG
22 #include "kernel/watchdog.h"
23 #endif
24 #endif
25 
26 phys_bytes video_mem_vaddr = 0;
27 
28 #define HASPT(procptr) ((procptr)->p_seg.p_cr3 != 0)
29 static int nfreepdes = 0;
30 #define MAXFREEPDES	2
31 static int freepdes[MAXFREEPDES];
32 
33 static u32_t phys_get32(phys_bytes v);
34 
35 void mem_clear_mapcache(void)
36 {
37 	int i;
38 	for(i = 0; i < nfreepdes; i++) {
39 		struct proc *ptproc = get_cpulocal_var(ptproc);
40 		int pde = freepdes[i];
41 		u32_t *ptv;
42 		assert(ptproc);
43 		ptv = ptproc->p_seg.p_cr3_v;
44 		assert(ptv);
45 		ptv[pde] = 0;
46 	}
47 }
48 
49 /* This function sets up a mapping from within the kernel's address
50  * space to any other area of memory, either straight physical
51  * memory (pr == NULL) or a process view of memory, in 4MB windows.
52  * I.e., it maps in 4MB chunks of virtual (or physical) address space
53  * to 4MB chunks of kernel virtual address space.
54  *
55  * It recognizes pr already being in memory as a special case (no
56  * mapping required).
57  *
58  * The target (i.e. in-kernel) mapping area is one of the freepdes[]
59  * VM has earlier already told the kernel about that is available. It is
60  * identified as the 'pde' parameter. This value can be chosen freely
61  * by the caller, as long as it is in range (i.e. 0 or higher and corresponds
62  * to a known freepde slot). It is up to the caller to keep track of which
63  * freepde's are in use, and to determine which ones are free to use.
64  *
65  * The logical number supplied by the caller is translated into an actual
66  * pde number to be used, and a pointer to it (linear address) is returned
67  * for actual use by phys_copy or memset.
68  */
69 static phys_bytes createpde(
70 	const struct proc *pr,	/* Requested process, NULL for physical. */
71 	const phys_bytes linaddr,/* Address after segment translation. */
72 	phys_bytes *bytes,	/* Size of chunk, function may truncate it. */
73 	int free_pde_idx,	/* index of the free slot to use */
74 	int *changed		/* If mapping is made, this is set to 1. */
75 	)
76 {
77 	u32_t pdeval;
78 	phys_bytes offset;
79 	int pde;
80 
81 	assert(free_pde_idx >= 0 && free_pde_idx < nfreepdes);
82 	pde = freepdes[free_pde_idx];
83 	assert(pde >= 0 && pde < 1024);
84 
85 	if(pr && ((pr == get_cpulocal_var(ptproc)) || iskernelp(pr))) {
86 		/* Process memory is requested, and
87 		 * it's a process that is already in current page table, or
88 		 * the kernel, which is always there.
89 		 * Therefore linaddr is valid directly, with the requested
90 		 * size.
91 		 */
92 		return linaddr;
93 	}
94 
95 	if(pr) {
96 		/* Requested address is in a process that is not currently
97 		 * accessible directly. Grab the PDE entry of that process'
98 		 * page table that corresponds to the requested address.
99 		 */
100 		assert(pr->p_seg.p_cr3_v);
101 		pdeval = pr->p_seg.p_cr3_v[I386_VM_PDE(linaddr)];
102 	} else {
103 		/* Requested address is physical. Make up the PDE entry. */
104 		pdeval = (linaddr & I386_VM_ADDR_MASK_4MB) |
105 			I386_VM_BIGPAGE | I386_VM_PRESENT |
106 			I386_VM_WRITE | I386_VM_USER;
107 	}
108 
109 	/* Write the pde value that we need into a pde that the kernel
110 	 * can access, into the currently loaded page table so it becomes
111 	 * visible.
112 	 */
113 	assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
114 	if(get_cpulocal_var(ptproc)->p_seg.p_cr3_v[pde] != pdeval) {
115 		get_cpulocal_var(ptproc)->p_seg.p_cr3_v[pde] = pdeval;
116 		*changed = 1;
117 	}
118 
119 	/* Memory is now available, but only the 4MB window of virtual
120 	 * address space that we have mapped; calculate how much of
121 	 * the requested range is visible and return that in *bytes,
122 	 * if that is less than the requested range.
123 	 */
124 	offset = linaddr & I386_VM_OFFSET_MASK_4MB; /* Offset in 4MB window. */
125 	*bytes = MIN(*bytes, I386_BIG_PAGE_SIZE - offset);
126 
127 	/* Return the linear address of the start of the new mapping. */
128 	return I386_BIG_PAGE_SIZE*pde + offset;
129 }
130 
131 
132 /*===========================================================================*
133  *                           check_resumed_caller                            *
134  *===========================================================================*/
135 static int check_resumed_caller(struct proc *caller)
136 {
137 	/* Returns the result from VM if caller was resumed, otherwise OK. */
138 	if (caller && (caller->p_misc_flags & MF_KCALL_RESUME)) {
139 		assert(caller->p_vmrequest.vmresult != VMSUSPEND);
140 		return caller->p_vmrequest.vmresult;
141 	}
142 
143 	return OK;
144 }
145 
146 /*===========================================================================*
147  *				lin_lin_copy				     *
148  *===========================================================================*/
149 static int lin_lin_copy(struct proc *srcproc, vir_bytes srclinaddr,
150 	struct proc *dstproc, vir_bytes dstlinaddr, vir_bytes bytes)
151 {
152 	u32_t addr;
153 	proc_nr_t procslot;
154 
155 	assert(get_cpulocal_var(ptproc));
156 	assert(get_cpulocal_var(proc_ptr));
157 	assert(read_cr3() == get_cpulocal_var(ptproc)->p_seg.p_cr3);
158 
159 	procslot = get_cpulocal_var(ptproc)->p_nr;
160 
161 	assert(procslot >= 0 && procslot < I386_VM_DIR_ENTRIES);
162 
163 	if(srcproc) assert(!RTS_ISSET(srcproc, RTS_SLOT_FREE));
164 	if(dstproc) assert(!RTS_ISSET(dstproc, RTS_SLOT_FREE));
165 	assert(!RTS_ISSET(get_cpulocal_var(ptproc), RTS_SLOT_FREE));
166 	assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
167 	if(srcproc) assert(!RTS_ISSET(srcproc, RTS_VMINHIBIT));
168 	if(dstproc) assert(!RTS_ISSET(dstproc, RTS_VMINHIBIT));
169 
170 	while(bytes > 0) {
171 		phys_bytes srcptr, dstptr;
172 		vir_bytes chunk = bytes;
173 		int changed = 0;
174 
175 #ifdef CONFIG_SMP
176 		unsigned cpu = cpuid;
177 
178 		if (srcproc && GET_BIT(srcproc->p_stale_tlb, cpu)) {
179 			changed = 1;
180 			UNSET_BIT(srcproc->p_stale_tlb, cpu);
181 		}
182 		if (dstproc && GET_BIT(dstproc->p_stale_tlb, cpu)) {
183 			changed = 1;
184 			UNSET_BIT(dstproc->p_stale_tlb, cpu);
185 		}
186 #endif
187 
188 		/* Set up 4MB ranges. */
189 		srcptr = createpde(srcproc, srclinaddr, &chunk, 0, &changed);
190 		dstptr = createpde(dstproc, dstlinaddr, &chunk, 1, &changed);
191 		if(changed)
192 			reload_cr3();
193 
194 		/* Check for overflow. */
195 		if (srcptr + chunk < srcptr) return EFAULT_SRC;
196 		if (dstptr + chunk < dstptr) return EFAULT_DST;
197 
198 		/* Copy pages. */
199 		PHYS_COPY_CATCH(srcptr, dstptr, chunk, addr);
200 
201 		if(addr) {
202 			/* If addr is nonzero, a page fault was caught. */
203 
204 			if(addr >= srcptr && addr < (srcptr + chunk)) {
205 				return EFAULT_SRC;
206 			}
207 			if(addr >= dstptr && addr < (dstptr + chunk)) {
208 				return EFAULT_DST;
209 			}
210 
211 			panic("lin_lin_copy fault out of range");
212 
213 			/* Not reached. */
214 			return EFAULT;
215 		}
216 
217 		/* Update counter and addresses for next iteration, if any. */
218 		bytes -= chunk;
219 		srclinaddr += chunk;
220 		dstlinaddr += chunk;
221 	}
222 
223 	if(srcproc) assert(!RTS_ISSET(srcproc, RTS_SLOT_FREE));
224 	if(dstproc) assert(!RTS_ISSET(dstproc, RTS_SLOT_FREE));
225 	assert(!RTS_ISSET(get_cpulocal_var(ptproc), RTS_SLOT_FREE));
226 	assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
227 
228 	return OK;
229 }
230 
231 
232 static u32_t phys_get32(phys_bytes addr)
233 {
234 	u32_t v;
235 	int r;
236 
237 	if((r=lin_lin_copy(NULL, addr,
238 		proc_addr(SYSTEM), (phys_bytes) &v, sizeof(v))) != OK) {
239 		panic("lin_lin_copy for phys_get32 failed: %d",  r);
240 	}
241 
242 	return v;
243 }
244 
245 #if 0
246 static char *cr0_str(u32_t e)
247 {
248 	static char str[80];
249 	strcpy(str, "");
250 #define FLAG(v) do { if(e & (v)) { strcat(str, #v " "); e &= ~v; } } while(0)
251 	FLAG(I386_CR0_PE);
252 	FLAG(I386_CR0_MP);
253 	FLAG(I386_CR0_EM);
254 	FLAG(I386_CR0_TS);
255 	FLAG(I386_CR0_ET);
256 	FLAG(I386_CR0_PG);
257 	FLAG(I386_CR0_WP);
258 	if(e) { strcat(str, " (++)"); }
259 	return str;
260 }
261 
262 static char *cr4_str(u32_t e)
263 {
264 	static char str[80];
265 	strcpy(str, "");
266 	FLAG(I386_CR4_VME);
267 	FLAG(I386_CR4_PVI);
268 	FLAG(I386_CR4_TSD);
269 	FLAG(I386_CR4_DE);
270 	FLAG(I386_CR4_PSE);
271 	FLAG(I386_CR4_PAE);
272 	FLAG(I386_CR4_MCE);
273 	FLAG(I386_CR4_PGE);
274 	if(e) { strcat(str, " (++)"); }
275 	return str;
276 }
277 #endif
278 
279 /*===========================================================================*
280  *                              umap_virtual                                 *
281  *===========================================================================*/
282 phys_bytes umap_virtual(
283   register struct proc *rp,		/* pointer to proc table entry for process */
284   int seg,				/* T, D, or S segment */
285   vir_bytes vir_addr,			/* virtual address in bytes within the seg */
286   vir_bytes bytes			/* # of bytes to be copied */
287 )
288 {
289 	phys_bytes phys = 0;
290 
291 	if(vm_lookup(rp, vir_addr, &phys, NULL) != OK) {
292 		printf("SYSTEM:umap_virtual: vm_lookup of %s: seg 0x%x: 0x%lx failed\n", rp->p_name, seg, vir_addr);
293 		phys = 0;
294 	} else {
295 		if(phys == 0)
296 			panic("vm_lookup returned phys: 0x%lx",  phys);
297 	}
298 
299 	if(phys == 0) {
300 		printf("SYSTEM:umap_virtual: lookup failed\n");
301 		return 0;
302 	}
303 
304 	/* Now make sure addresses are contiguous in physical memory
305 	 * so that the umap makes sense.
306 	 */
307 	if(bytes > 0 && vm_lookup_range(rp, vir_addr, NULL, bytes) != bytes) {
308 		printf("umap_virtual: %s: %lu at 0x%lx (vir 0x%lx) not contiguous\n",
309 			rp->p_name, bytes, vir_addr, vir_addr);
310 		return 0;
311 	}
312 
313 	/* phys must be larger than 0 (or the caller will think the call
314 	 * failed), and address must not cross a page boundary.
315 	 */
316 	assert(phys);
317 
318 	return phys;
319 }
320 
321 
322 /*===========================================================================*
323  *                              vm_lookup                                    *
324  *===========================================================================*/
325 int vm_lookup(const struct proc *proc, const vir_bytes virtual,
326  phys_bytes *physical, u32_t *ptent)
327 {
328 	u32_t *root, *pt;
329 	int pde, pte;
330 	u32_t pde_v, pte_v;
331 
332 	assert(proc);
333 	assert(physical);
334 	assert(!isemptyp(proc));
335 	assert(HASPT(proc));
336 
337 	/* Retrieve page directory entry. */
338 	root = (u32_t *) proc->p_seg.p_cr3;
339 	assert(!((u32_t) root % I386_PAGE_SIZE));
340 	pde = I386_VM_PDE(virtual);
341 	assert(pde >= 0 && pde < I386_VM_DIR_ENTRIES);
342 	pde_v = phys_get32((u32_t) (root + pde));
343 
344 	if(!(pde_v & I386_VM_PRESENT)) {
345 		return EFAULT;
346 	}
347 
348 	/* We don't expect to ever see this. */
349 	if(pde_v & I386_VM_BIGPAGE) {
350 		*physical = pde_v & I386_VM_ADDR_MASK_4MB;
351 		if(ptent) *ptent = pde_v;
352 		*physical += virtual & I386_VM_OFFSET_MASK_4MB;
353 	} else {
354 		/* Retrieve page table entry. */
355 		pt = (u32_t *) I386_VM_PFA(pde_v);
356 		assert(!((u32_t) pt % I386_PAGE_SIZE));
357 		pte = I386_VM_PTE(virtual);
358 		assert(pte >= 0 && pte < I386_VM_PT_ENTRIES);
359 		pte_v = phys_get32((u32_t) (pt + pte));
360 		if(!(pte_v & I386_VM_PRESENT)) {
361 			return EFAULT;
362 		}
363 
364 		if(ptent) *ptent = pte_v;
365 
366 		/* Actual address now known; retrieve it and add page offset. */
367 		*physical = I386_VM_PFA(pte_v);
368 		*physical += virtual % I386_PAGE_SIZE;
369 	}
370 
371 	return OK;
372 }
373 
374 /*===========================================================================*
375  *				vm_lookup_range				     *
376  *===========================================================================*/
377 size_t vm_lookup_range(const struct proc *proc, vir_bytes vir_addr,
378 	phys_bytes *phys_addr, size_t bytes)
379 {
380 	/* Look up the physical address corresponding to linear virtual address
381 	 * 'vir_addr' for process 'proc'. Return the size of the range covered
382 	 * by contiguous physical memory starting from that address; this may
383 	 * be anywhere between 0 and 'bytes' inclusive. If the return value is
384 	 * nonzero, and 'phys_addr' is non-NULL, 'phys_addr' will be set to the
385 	 * base physical address of the range. 'vir_addr' and 'bytes' need not
386 	 * be page-aligned, but the caller must have verified that the given
387 	 * linear range is valid for the given process at all.
388 	 */
389 	phys_bytes phys, next_phys;
390 	size_t len;
391 
392 	assert(proc);
393 	assert(bytes > 0);
394 	assert(HASPT(proc));
395 
396 	/* Look up the first page. */
397 	if (vm_lookup(proc, vir_addr, &phys, NULL) != OK)
398 		return 0;
399 
400 	if (phys_addr != NULL)
401 		*phys_addr = phys;
402 
403 	len = I386_PAGE_SIZE - (vir_addr % I386_PAGE_SIZE);
404 	vir_addr += len;
405 	next_phys = phys + len;
406 
407 	/* Look up any next pages and test physical contiguity. */
408 	while (len < bytes) {
409 		if (vm_lookup(proc, vir_addr, &phys, NULL) != OK)
410 			break;
411 
412 		if (next_phys != phys)
413 			break;
414 
415 		len += I386_PAGE_SIZE;
416 		vir_addr += I386_PAGE_SIZE;
417 		next_phys += I386_PAGE_SIZE;
418 	}
419 
420 	/* We might now have overshot the requested length somewhat. */
421 	return MIN(bytes, len);
422 }
423 
424 /*===========================================================================*
425  *				vm_check_range				     *
426  *===========================================================================*/
427 int vm_check_range(struct proc *caller, struct proc *target,
428 	vir_bytes vir_addr, size_t bytes, int writeflag)
429 {
430 	/* Public interface to vm_suspend(), for use by kernel calls. On behalf
431 	 * of 'caller', call into VM to check linear virtual address range of
432 	 * process 'target', starting at 'vir_addr', for 'bytes' bytes. This
433 	 * function assumes that it will called twice if VM returned an error
434 	 * the first time (since nothing has changed in that case), and will
435 	 * then return the error code resulting from the first call. Upon the
436 	 * first call, a non-success error code is returned as well.
437 	 */
438 	int r;
439 
440 	if ((caller->p_misc_flags & MF_KCALL_RESUME) &&
441 			(r = caller->p_vmrequest.vmresult) != OK)
442 		return r;
443 
444 	vm_suspend(caller, target, vir_addr, bytes, VMSTYPE_KERNELCALL,
445 		writeflag);
446 
447 	return VMSUSPEND;
448 }
449 
450 #if 0
451 static char *flagstr(u32_t e, const int dir)
452 {
453 	static char str[80];
454 	strcpy(str, "");
455 	FLAG(I386_VM_PRESENT);
456 	FLAG(I386_VM_WRITE);
457 	FLAG(I386_VM_USER);
458 	FLAG(I386_VM_PWT);
459 	FLAG(I386_VM_PCD);
460 	FLAG(I386_VM_GLOBAL);
461 	if(dir)
462 		FLAG(I386_VM_BIGPAGE);	/* Page directory entry only */
463 	else
464 		FLAG(I386_VM_DIRTY);	/* Page table entry only */
465 	return str;
466 }
467 
468 static void vm_pt_print(u32_t *pagetable, const u32_t v)
469 {
470 	int pte;
471 	int col = 0;
472 
473 	assert(!((u32_t) pagetable % I386_PAGE_SIZE));
474 
475 	for(pte = 0; pte < I386_VM_PT_ENTRIES; pte++) {
476 		u32_t pte_v, pfa;
477 		pte_v = phys_get32((u32_t) (pagetable + pte));
478 		if(!(pte_v & I386_VM_PRESENT))
479 			continue;
480 		pfa = I386_VM_PFA(pte_v);
481 		printf("%4d:%08lx:%08lx %2s ",
482 			pte, v + I386_PAGE_SIZE*pte, pfa,
483 			(pte_v & I386_VM_WRITE) ? "rw":"RO");
484 		col++;
485 		if(col == 3) { printf("\n"); col = 0; }
486 	}
487 	if(col > 0) printf("\n");
488 
489 	return;
490 }
491 
492 static void vm_print(u32_t *root)
493 {
494 	int pde;
495 
496 	assert(!((u32_t) root % I386_PAGE_SIZE));
497 
498 	printf("page table 0x%lx:\n", root);
499 
500 	for(pde = 0; pde < I386_VM_DIR_ENTRIES; pde++) {
501 		u32_t pde_v;
502 		u32_t *pte_a;
503 		pde_v = phys_get32((u32_t) (root + pde));
504 		if(!(pde_v & I386_VM_PRESENT))
505 			continue;
506 		if(pde_v & I386_VM_BIGPAGE) {
507 			printf("%4d: 0x%lx, flags %s\n",
508 				pde, I386_VM_PFA(pde_v), flagstr(pde_v, 1));
509 		} else {
510 			pte_a = (u32_t *) I386_VM_PFA(pde_v);
511 			printf("%4d: pt %08lx %s\n",
512 				pde, pte_a, flagstr(pde_v, 1));
513 			vm_pt_print(pte_a, pde * I386_VM_PT_ENTRIES * I386_PAGE_SIZE);
514 			printf("\n");
515 		}
516 	}
517 
518 
519 	return;
520 }
521 #endif
522 
523 /*===========================================================================*
524  *                                 vmmemset                                  *
525  *===========================================================================*/
526 int vm_memset(struct proc* caller, endpoint_t who, phys_bytes ph, int c,
527 	phys_bytes count)
528 {
529 	u32_t pattern;
530 	struct proc *whoptr = NULL;
531 	phys_bytes cur_ph = ph;
532 	phys_bytes left = count;
533 	phys_bytes ptr, chunk, pfa = 0;
534 	int new_cr3, r = OK;
535 
536 	if ((r = check_resumed_caller(caller)) != OK)
537 		return r;
538 
539 	/* NONE for physical, otherwise virtual */
540 	if (who != NONE && !(whoptr = endpoint_lookup(who)))
541 		return ESRCH;
542 
543 	c &= 0xFF;
544 	pattern = c | (c << 8) | (c << 16) | (c << 24);
545 
546 	assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
547 	assert(!catch_pagefaults);
548 	catch_pagefaults = 1;
549 
550 	/* We can memset as many bytes as we have remaining,
551 	 * or as many as remain in the 4MB chunk we mapped in.
552 	 */
553 	while (left > 0) {
554 		new_cr3 = 0;
555 		chunk = left;
556 		ptr = createpde(whoptr, cur_ph, &chunk, 0, &new_cr3);
557 
558 		if (new_cr3)
559 			reload_cr3();
560 
561 		/* If a page fault happens, pfa is non-null */
562 		if ((pfa = phys_memset(ptr, pattern, chunk))) {
563 
564 			/* If a process pagefaults, VM may help out */
565 			if (whoptr) {
566 				vm_suspend(caller, whoptr, ph, count,
567 						   VMSTYPE_KERNELCALL, 1);
568 				assert(catch_pagefaults);
569 				catch_pagefaults = 0;
570 				return VMSUSPEND;
571 			}
572 
573 			/* Pagefault when phys copying ?! */
574 			panic("vm_memset: pf %lx addr=%lx len=%lu\n",
575 						pfa , ptr, chunk);
576 		}
577 
578 		cur_ph += chunk;
579 		left -= chunk;
580 	}
581 
582 	assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v);
583 	assert(catch_pagefaults);
584 	catch_pagefaults = 0;
585 
586 	return OK;
587 }
588 
589 /*===========================================================================*
590  *				virtual_copy_f				     *
591  *===========================================================================*/
592 int virtual_copy_f(
593   struct proc * caller,
594   struct vir_addr *src_addr,	/* source virtual address */
595   struct vir_addr *dst_addr,	/* destination virtual address */
596   vir_bytes bytes,		/* # of bytes to copy  */
597   int vmcheck			/* if nonzero, can return VMSUSPEND */
598 )
599 {
600 /* Copy bytes from virtual address src_addr to virtual address dst_addr. */
601   struct vir_addr *vir_addr[2];	/* virtual source and destination address */
602   int i, r;
603   struct proc *procs[2];
604 
605   assert((vmcheck && caller) || (!vmcheck && !caller));
606 
607   /* Check copy count. */
608   if (bytes <= 0) return(EDOM);
609 
610   /* Do some more checks and map virtual addresses to physical addresses. */
611   vir_addr[_SRC_] = src_addr;
612   vir_addr[_DST_] = dst_addr;
613 
614   for (i=_SRC_; i<=_DST_; i++) {
615   	endpoint_t proc_e = vir_addr[i]->proc_nr_e;
616 	int proc_nr;
617 	struct proc *p;
618 
619 	if(proc_e == NONE) {
620 		p = NULL;
621 	} else {
622 		if(!isokendpt(proc_e, &proc_nr)) {
623 			printf("virtual_copy: no reasonable endpoint\n");
624 			return ESRCH;
625 		}
626 		p = proc_addr(proc_nr);
627 	}
628 
629 	procs[i] = p;
630   }
631 
632   if ((r = check_resumed_caller(caller)) != OK)
633 	return r;
634 
635   if((r=lin_lin_copy(procs[_SRC_], vir_addr[_SRC_]->offset,
636   	procs[_DST_], vir_addr[_DST_]->offset, bytes)) != OK) {
637 	int writeflag;
638   	struct proc *target = NULL;
639   	phys_bytes lin;
640   	if(r != EFAULT_SRC && r != EFAULT_DST)
641   		panic("lin_lin_copy failed: %d",  r);
642   	if(!vmcheck || !caller) {
643     		return r;
644   	}
645 
646   	if(r == EFAULT_SRC) {
647   		lin = vir_addr[_SRC_]->offset;
648   		target = procs[_SRC_];
649 		writeflag = 0;
650   	} else if(r == EFAULT_DST) {
651   		lin = vir_addr[_DST_]->offset;
652   		target = procs[_DST_];
653 		writeflag = 1;
654   	} else {
655   		panic("r strange: %d",  r);
656   	}
657 
658 	assert(caller);
659 	assert(target);
660 
661 	vm_suspend(caller, target, lin, bytes, VMSTYPE_KERNELCALL, writeflag);
662 	return VMSUSPEND;
663   }
664 
665   return OK;
666 }
667 
668 /*===========================================================================*
669  *				data_copy				     *
670  *===========================================================================*/
671 int data_copy(const endpoint_t from_proc, const vir_bytes from_addr,
672 	const endpoint_t to_proc, const vir_bytes to_addr,
673 	size_t bytes)
674 {
675   struct vir_addr src, dst;
676 
677   src.offset = from_addr;
678   dst.offset = to_addr;
679   src.proc_nr_e = from_proc;
680   dst.proc_nr_e = to_proc;
681   assert(src.proc_nr_e != NONE);
682   assert(dst.proc_nr_e != NONE);
683 
684   return virtual_copy(&src, &dst, bytes);
685 }
686 
687 /*===========================================================================*
688  *				data_copy_vmcheck			     *
689  *===========================================================================*/
690 int data_copy_vmcheck(struct proc * caller,
691 	const endpoint_t from_proc, const vir_bytes from_addr,
692 	const endpoint_t to_proc, const vir_bytes to_addr,
693 	size_t bytes)
694 {
695   struct vir_addr src, dst;
696 
697   src.offset = from_addr;
698   dst.offset = to_addr;
699   src.proc_nr_e = from_proc;
700   dst.proc_nr_e = to_proc;
701   assert(src.proc_nr_e != NONE);
702   assert(dst.proc_nr_e != NONE);
703 
704   return virtual_copy_vmcheck(caller, &src, &dst, bytes);
705 }
706 
707 void memory_init(void)
708 {
709 	assert(nfreepdes == 0);
710 
711 	freepdes[nfreepdes++] = kinfo.freepde_start++;
712 	freepdes[nfreepdes++] = kinfo.freepde_start++;
713 
714 	assert(kinfo.freepde_start < I386_VM_DIR_ENTRIES);
715 	assert(nfreepdes == 2);
716 	assert(nfreepdes <= MAXFREEPDES);
717 }
718 
719 /*===========================================================================*
720  *				arch_proc_init				     *
721  *===========================================================================*/
722 void arch_proc_init(struct proc *pr, const u32_t ip, const u32_t sp,
723 	const u32_t ps_str, char *name)
724 {
725 	arch_proc_reset(pr);
726 	strlcpy(pr->p_name, name, sizeof(pr->p_name));
727 
728 	/* set custom state we know */
729 	pr->p_reg.pc = ip;
730 	pr->p_reg.sp = sp;
731 	pr->p_reg.bx = ps_str;
732 }
733 
734 static int oxpcie_mapping_index = -1,
735 	lapic_mapping_index = -1,
736 	ioapic_first_index = -1,
737 	ioapic_last_index = -1,
738 	video_mem_mapping_index = -1,
739 	usermapped_glo_index = -1,
740 	usermapped_index = -1, first_um_idx = -1;
741 
742 extern char *video_mem;
743 
744 extern char usermapped_start, usermapped_end, usermapped_nonglo_start;
745 
746 int arch_phys_map(const int index,
747 			phys_bytes *addr,
748 			phys_bytes *len,
749 			int *flags)
750 {
751 	static int first = 1;
752 	int freeidx = 0;
753 	static char *ser_var = NULL;
754 	u32_t glo_len = (u32_t) &usermapped_nonglo_start -
755 			(u32_t) &usermapped_start;
756 
757 	if(first) {
758 		memset(&minix_kerninfo, 0, sizeof(minix_kerninfo));
759 		video_mem_mapping_index = freeidx++;
760 		if(glo_len > 0) {
761 			usermapped_glo_index = freeidx++;
762 		}
763 
764 		usermapped_index = freeidx++;
765 		first_um_idx = usermapped_index;
766 		if(usermapped_glo_index != -1)
767 			first_um_idx = usermapped_glo_index;
768 
769 #ifdef USE_APIC
770 		if(lapic_addr)
771 			lapic_mapping_index = freeidx++;
772 		if (ioapic_enabled) {
773 			ioapic_first_index = freeidx;
774 			assert(nioapics > 0);
775 			freeidx += nioapics;
776 			ioapic_last_index = freeidx-1;
777 		}
778 #endif
779 
780 #ifdef CONFIG_OXPCIE
781 		if((ser_var = env_get("oxpcie"))) {
782 			if(ser_var[0] != '0' || ser_var[1] != 'x') {
783 				printf("oxpcie address in hex please\n");
784 			} else {
785 				printf("oxpcie address is %s\n", ser_var);
786 				oxpcie_mapping_index = freeidx++;
787 			}
788 		}
789 #endif
790 
791 		first = 0;
792 	}
793 
794 	if(index == usermapped_glo_index) {
795 		*addr = vir2phys(&usermapped_start);
796 		*len = glo_len;
797 		*flags = VMMF_USER | VMMF_GLO;
798 		return OK;
799 	}
800 	else if(index == usermapped_index) {
801 		*addr = vir2phys(&usermapped_nonglo_start);
802 		*len = (u32_t) &usermapped_end -
803 			(u32_t) &usermapped_nonglo_start;
804 		*flags = VMMF_USER;
805 		return OK;
806 	}
807 	else if (index == video_mem_mapping_index) {
808 		/* map video memory in so we can print panic messages */
809 		*addr = MULTIBOOT_VIDEO_BUFFER;
810 		*len = I386_PAGE_SIZE;
811 		*flags = VMMF_WRITE;
812 		return OK;
813 	}
814 #ifdef USE_APIC
815 	else if (index == lapic_mapping_index) {
816 		/* map the local APIC if enabled */
817 		if (!lapic_addr)
818 			return EINVAL;
819 		*addr = lapic_addr;
820 		*len = 4 << 10 /* 4kB */;
821 		*flags = VMMF_UNCACHED | VMMF_WRITE;
822 		return OK;
823 	}
824 	else if (ioapic_enabled && index >= ioapic_first_index && index <= ioapic_last_index) {
825 		int ioapic_idx = index - ioapic_first_index;
826 		*addr = io_apic[ioapic_idx].paddr;
827 		assert(*addr);
828 		*len = 4 << 10 /* 4kB */;
829 		*flags = VMMF_UNCACHED | VMMF_WRITE;
830 		printf("ioapic map: addr 0x%lx\n", *addr);
831 		return OK;
832 	}
833 #endif
834 
835 #if CONFIG_OXPCIE
836 	if(index == oxpcie_mapping_index) {
837 		*addr = strtoul(ser_var+2, NULL, 16);
838 		*len = 0x4000;
839 		*flags = VMMF_UNCACHED | VMMF_WRITE;
840 		return OK;
841 	}
842 #endif
843 
844 	return EINVAL;
845 }
846 
847 int arch_phys_map_reply(const int index, const vir_bytes addr)
848 {
849 #ifdef USE_APIC
850 	/* if local APIC is enabled */
851 	if (index == lapic_mapping_index && lapic_addr) {
852 		lapic_addr_vaddr = addr;
853 		return OK;
854 	}
855 	else if (ioapic_enabled && index >= ioapic_first_index &&
856 		index <= ioapic_last_index) {
857 		int i = index - ioapic_first_index;
858 		io_apic[i].vaddr = addr;
859 		return OK;
860 	}
861 #endif
862 
863 #if CONFIG_OXPCIE
864 	if (index == oxpcie_mapping_index) {
865 		oxpcie_set_vaddr((unsigned char *) addr);
866 		return OK;
867 	}
868 #endif
869 	if(index == first_um_idx) {
870 		extern struct minix_ipcvecs minix_ipcvecs_sysenter,
871 			minix_ipcvecs_syscall,
872 			minix_ipcvecs_softint;
873 		extern u32_t usermapped_offset;
874 		assert(addr > (u32_t) &usermapped_start);
875 		usermapped_offset = addr - (u32_t) &usermapped_start;
876 #define FIXEDPTR(ptr) (void *) ((u32_t)ptr + usermapped_offset)
877 #define FIXPTR(ptr) ptr = FIXEDPTR(ptr)
878 #define ASSIGN(minixstruct) minix_kerninfo.minixstruct = FIXEDPTR(&minixstruct)
879 		ASSIGN(kinfo);
880 		ASSIGN(machine);
881 		ASSIGN(kmessages);
882 		ASSIGN(loadinfo);
883 		ASSIGN(kuserinfo);
884 		ASSIGN(arm_frclock); /* eh, why not. */
885 		ASSIGN(kclockinfo);
886 
887 		/* select the right set of IPC routines to map into processes */
888 		if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) {
889 			DEBUGBASIC(("kernel: selecting intel sysenter ipc style\n"));
890 			minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_sysenter;
891 		} else  if(minix_feature_flags & MKF_I386_AMD_SYSCALL) {
892 			DEBUGBASIC(("kernel: selecting amd syscall ipc style\n"));
893 			minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_syscall;
894 		} else	{
895 			DEBUGBASIC(("kernel: selecting fallback (int) ipc style\n"));
896 			minix_kerninfo.minix_ipcvecs = &minix_ipcvecs_softint;
897 		}
898 
899 		/* adjust the pointers of the functions and the struct
900 		 * itself to the user-accessible mapping
901 		 */
902 		FIXPTR(minix_kerninfo.minix_ipcvecs->send);
903 		FIXPTR(minix_kerninfo.minix_ipcvecs->receive);
904 		FIXPTR(minix_kerninfo.minix_ipcvecs->sendrec);
905 		FIXPTR(minix_kerninfo.minix_ipcvecs->senda);
906 		FIXPTR(minix_kerninfo.minix_ipcvecs->sendnb);
907 		FIXPTR(minix_kerninfo.minix_ipcvecs->notify);
908 		FIXPTR(minix_kerninfo.minix_ipcvecs->do_kernel_call);
909 		FIXPTR(minix_kerninfo.minix_ipcvecs);
910 
911 		minix_kerninfo.kerninfo_magic = KERNINFO_MAGIC;
912 		minix_kerninfo.minix_feature_flags = minix_feature_flags;
913 		minix_kerninfo_user = (vir_bytes) FIXEDPTR(&minix_kerninfo);
914 
915 		/* if libc_ipc is set, disable usermapped ipc functions
916 		 * and force binaries to use in-libc fallbacks.
917 		 */
918 		if(env_get("libc_ipc")) {
919 			printf("kernel: forcing in-libc fallback ipc style\n");
920 			minix_kerninfo.minix_ipcvecs = NULL;
921 		} else {
922 			minix_kerninfo.ki_flags |= MINIX_KIF_IPCVECS;
923 		}
924 
925 		minix_kerninfo.ki_flags |= MINIX_KIF_USERINFO;
926 
927 		return OK;
928 	}
929 
930 	if(index == usermapped_index) return OK;
931 
932 	if (index == video_mem_mapping_index) {
933 		video_mem_vaddr =  addr;
934 		return OK;
935 	}
936 
937 	return EINVAL;
938 }
939 
940 int arch_enable_paging(struct proc * caller)
941 {
942 	assert(caller->p_seg.p_cr3);
943 
944 	/* load caller's page table */
945 	switch_address_space(caller);
946 
947 	video_mem = (char *) video_mem_vaddr;
948 
949 #ifdef USE_APIC
950 	/* start using the virtual addresses */
951 
952 	/* if local APIC is enabled */
953 	if (lapic_addr) {
954 		lapic_addr = lapic_addr_vaddr;
955 		lapic_eoi_addr = LAPIC_EOI;
956 	}
957 	/* if IO apics are enabled */
958 	if (ioapic_enabled) {
959 		int i;
960 
961 		for (i = 0; i < nioapics; i++) {
962 			io_apic[i].addr = io_apic[i].vaddr;
963 		}
964 	}
965 #if CONFIG_SMP
966 	barrier();
967 
968 	wait_for_APs_to_finish_booting();
969 #endif
970 #endif
971 
972 #ifdef USE_WATCHDOG
973 	/*
974 	 * We make sure that we don't enable the watchdog until paging is turned
975 	 * on as we might get an NMI while switching and we might still use wrong
976 	 * lapic address. Bad things would happen. It is unfortunate but such is
977 	 * life
978 	 */
979 	if (watchdog_enabled)
980 		i386_watchdog_start();
981 #endif
982 
983 	return OK;
984 }
985 
986 void release_address_space(struct proc *pr)
987 {
988 	pr->p_seg.p_cr3_v = NULL;
989 }
990 
991 /* computes a checksum of a buffer of a given length. The byte sum must be zero */
992 int platform_tbl_checksum_ok(void *ptr, unsigned int length)
993 {
994 	u8_t total = 0;
995 	unsigned int i;
996 	for (i = 0; i < length; i++)
997 		total += ((unsigned char *)ptr)[i];
998 	return !total;
999 }
1000 
1001 int platform_tbl_ptr(phys_bytes start,
1002 					phys_bytes end,
1003 					unsigned increment,
1004 					void * buff,
1005 					unsigned size,
1006 					phys_bytes * phys_addr,
1007 					int ((* cmp_f)(void *)))
1008 {
1009 	phys_bytes addr;
1010 
1011 	for (addr = start; addr < end; addr += increment) {
1012 		phys_copy (addr, (phys_bytes) buff, size);
1013 		if (cmp_f(buff)) {
1014 			if (phys_addr)
1015 				*phys_addr = addr;
1016 			return 1;
1017 		}
1018 	}
1019 	return 0;
1020 }
1021