xref: /minix/minix/servers/vm/utility.c (revision ebfedea0)
1 
2 /* This file contains some utility routines for VM.  */
3 
4 #define _SYSTEM		1
5 
6 #include <minix/callnr.h>
7 #include <minix/com.h>
8 #include <minix/config.h>
9 #include <minix/const.h>
10 #include <minix/ds.h>
11 #include <minix/endpoint.h>
12 #include <minix/minlib.h>
13 #include <minix/type.h>
14 #include <minix/ipc.h>
15 #include <minix/sysutil.h>
16 #include <minix/syslib.h>
17 #include <minix/type.h>
18 #include <minix/bitmap.h>
19 #include <minix/rs.h>
20 #include <string.h>
21 #include <errno.h>
22 #include <env.h>
23 #include <unistd.h>
24 #include <assert.h>
25 #include <sys/cdefs.h>
26 #include <sys/param.h>
27 #include <sys/mman.h>
28 #include <sys/resource.h>
29 
30 #include "proto.h"
31 #include "glo.h"
32 #include "util.h"
33 #include "region.h"
34 #include "sanitycheck.h"
35 
36 #include <machine/archtypes.h>
37 #include "kernel/const.h"
38 #include "kernel/config.h"
39 #include "kernel/type.h"
40 #include "kernel/proc.h"
41 
42 /*===========================================================================*
43  *                              get_mem_chunks                               *
44  *===========================================================================*/
45 void get_mem_chunks(
46 struct memory *mem_chunks)                      /* store mem chunks here */
47 {
48 /* Initialize the free memory list from the kernel-provided memory map.  Translate
49  * the byte offsets and sizes in this list to clicks, properly truncated.
50  */
51   phys_bytes base, size, limit;
52   int i;
53   struct memory *memp;
54 
55   /* Initialize everything to zero. */
56   memset(mem_chunks, 0, NR_MEMS*sizeof(*mem_chunks));
57 
58   /* Obtain and parse memory from kernel environment. */
59   /* XXX Any memory chunk in excess of NR_MEMS is silently ignored. */
60   for(i = 0; i < MIN(MAXMEMMAP, NR_MEMS); i++) {
61   	mem_chunks[i].base = kernel_boot_info.memmap[i].mm_base_addr;
62   	mem_chunks[i].size = kernel_boot_info.memmap[i].mm_length;
63   }
64 
65   /* Round physical memory to clicks. Round start up, round end down. */
66   for (i = 0; i < NR_MEMS; i++) {
67         memp = &mem_chunks[i];          /* next mem chunk is stored here */
68         base = mem_chunks[i].base;
69         size = mem_chunks[i].size;
70         limit = base + size;
71         base = (phys_bytes) (CLICK_CEIL(base));
72         limit = (phys_bytes) (CLICK_FLOOR(limit));
73         if (limit <= base) {
74                 memp->base = memp->size = 0;
75         } else {
76                 memp->base = base >> CLICK_SHIFT;
77                 memp->size = (limit - base) >> CLICK_SHIFT;
78         }
79   }
80 }
81 
82 /*===========================================================================*
83  *                              vm_isokendpt                           	     *
84  *===========================================================================*/
85 int vm_isokendpt(endpoint_t endpoint, int *procn)
86 {
87         *procn = _ENDPOINT_P(endpoint);
88         if(*procn < 0 || *procn >= NR_PROCS)
89 		return EINVAL;
90         if(*procn >= 0 && endpoint != vmproc[*procn].vm_endpoint)
91                 return EDEADEPT;
92         if(*procn >= 0 && !(vmproc[*procn].vm_flags & VMF_INUSE))
93                 return EDEADEPT;
94         return OK;
95 }
96 
97 
98 /*===========================================================================*
99  *                              do_info                                      *
100  *===========================================================================*/
101 int do_info(message *m)
102 {
103 	struct vm_stats_info vsi;
104 	struct vm_usage_info vui;
105 	static struct vm_region_info vri[MAX_VRI_COUNT];
106 	struct vmproc *vmp;
107 	vir_bytes addr, size, next, ptr;
108 	int r, pr, dummy, count, free_pages, largest_contig;
109 
110 	if (vm_isokendpt(m->m_source, &pr) != OK)
111 		return EINVAL;
112 	vmp = &vmproc[pr];
113 
114 	ptr = (vir_bytes) m->m_lsys_vm_info.ptr;
115 
116 	switch(m->m_lsys_vm_info.what) {
117 	case VMIW_STATS:
118 		vsi.vsi_pagesize = VM_PAGE_SIZE;
119 		vsi.vsi_total = total_pages;
120 		memstats(&dummy, &free_pages, &largest_contig);
121 		vsi.vsi_free = free_pages;
122 		vsi.vsi_largest = largest_contig;
123 
124 		get_stats_info(&vsi);
125 
126 		addr = (vir_bytes) &vsi;
127 		size = sizeof(vsi);
128 
129 		break;
130 
131 	case VMIW_USAGE:
132 		if(m->m_lsys_vm_info.ep < 0)
133 			get_usage_info_kernel(&vui);
134 		else if (vm_isokendpt(m->m_lsys_vm_info.ep, &pr) != OK)
135 			return EINVAL;
136 		else get_usage_info(&vmproc[pr], &vui);
137 
138 		addr = (vir_bytes) &vui;
139 		size = sizeof(vui);
140 
141 		break;
142 
143 	case VMIW_REGION:
144 		if(m->m_lsys_vm_info.ep == SELF) {
145 			m->m_lsys_vm_info.ep = m->m_source;
146 		}
147 		if (vm_isokendpt(m->m_lsys_vm_info.ep, &pr) != OK)
148 			return EINVAL;
149 
150 		count = MIN(m->m_lsys_vm_info.count, MAX_VRI_COUNT);
151 		next = m->m_lsys_vm_info.next;
152 
153 		count = get_region_info(&vmproc[pr], vri, count, &next);
154 
155 		m->m_lsys_vm_info.count = count;
156 		m->m_lsys_vm_info.next = next;
157 
158 		addr = (vir_bytes) vri;
159 		size = sizeof(vri[0]) * count;
160 
161 		break;
162 
163 	default:
164 		return EINVAL;
165 	}
166 
167 	if (size == 0)
168 		return OK;
169 
170 	/* Make sure that no page faults can occur while copying out. A page
171 	 * fault would cause the kernel to send a notify to us, while we would
172 	 * be waiting for the result of the copy system call, resulting in a
173 	 * deadlock. Note that no memory mapping can be undone without the
174 	 * involvement of VM, so we are safe until we're done.
175 	 */
176 	r = handle_memory_once(vmp, ptr, size, 1 /*wrflag*/);
177 	if (r != OK) return r;
178 
179 	/* Now that we know the copy out will succeed, perform the actual copy
180 	 * operation.
181 	 */
182 	return sys_datacopy(SELF, addr,
183 		(vir_bytes) vmp->vm_endpoint, ptr, size);
184 }
185 
186 /*===========================================================================*
187  *				swap_proc_slot	     			     *
188  *===========================================================================*/
189 int swap_proc_slot(struct vmproc *src_vmp, struct vmproc *dst_vmp)
190 {
191 	struct vmproc orig_src_vmproc, orig_dst_vmproc;
192 
193 #if LU_DEBUG
194 	printf("VM: swap_proc: swapping %d (%d) and %d (%d)\n",
195 	    src_vmp->vm_endpoint, src_vmp->vm_slot,
196 	    dst_vmp->vm_endpoint, dst_vmp->vm_slot);
197 #endif
198 
199 	/* Save existing data. */
200 	orig_src_vmproc = *src_vmp;
201 	orig_dst_vmproc = *dst_vmp;
202 
203 	/* Swap slots. */
204 	*src_vmp = orig_dst_vmproc;
205 	*dst_vmp = orig_src_vmproc;
206 
207 	/* Preserve endpoints and slot numbers. */
208 	src_vmp->vm_endpoint = orig_src_vmproc.vm_endpoint;
209 	src_vmp->vm_slot = orig_src_vmproc.vm_slot;
210 	dst_vmp->vm_endpoint = orig_dst_vmproc.vm_endpoint;
211 	dst_vmp->vm_slot = orig_dst_vmproc.vm_slot;
212 
213 #if LU_DEBUG
214 	printf("VM: swap_proc: swapped %d (%d) and %d (%d)\n",
215 	    src_vmp->vm_endpoint, src_vmp->vm_slot,
216 	    dst_vmp->vm_endpoint, dst_vmp->vm_slot);
217 #endif
218 
219 	return OK;
220 }
221 
222 /*
223  * Transfer memory mapped regions, using CoW sharing, from 'src_vmp' to
224  * 'dst_vmp', for the source process's address range of 'start_addr'
225  * (inclusive) to 'end_addr' (exclusive).  Return OK or an error code.
226  * If the regions seem to have been transferred already, do nothing.
227  */
228 static int
229 transfer_mmap_regions(struct vmproc *src_vmp, struct vmproc *dst_vmp,
230 	vir_bytes start_addr, vir_bytes end_addr)
231 {
232 	struct vir_region *start_vr, *check_vr, *end_vr;
233 
234 	start_vr = region_search(&src_vmp->vm_regions_avl, start_addr,
235 	    AVL_GREATER_EQUAL);
236 
237 	if (start_vr == NULL || start_vr->vaddr >= end_addr)
238 		return OK; /* nothing to do */
239 
240 	/* In the case of multicomponent live update that includes VM, this
241 	 * function may be called for the same process more than once, for the
242 	 * sake of keeping code paths as little divergent as possible while at
243 	 * the same time ensuring that the regions are copied early enough.
244 	 *
245 	 * To compensate for these multiple calls, we perform a very simple
246 	 * check here to see if the region to transfer is already present in
247 	 * the target process.  If so, we can safely skip copying the regions
248 	 * again, because there is no other possible explanation for the
249 	 * region being present already.  Things would go horribly wrong if we
250 	 * tried copying anyway, but this check is not good enough to detect
251 	 * all such problems, since we do a check on the base address only.
252 	 */
253 	check_vr = region_search(&dst_vmp->vm_regions_avl, start_vr->vaddr,
254 	    AVL_EQUAL);
255 	if (check_vr != NULL) {
256 #if LU_DEBUG
257 		printf("VM: transfer_mmap_regions: skipping transfer from "
258 		    "%d to %d (0x%lx already present)\n",
259 		    src_vmp->vm_endpoint, dst_vmp->vm_endpoint,
260 		    start_vr->vaddr);
261 #endif
262 		return OK;
263 	}
264 
265 	end_vr = region_search(&src_vmp->vm_regions_avl, end_addr, AVL_LESS);
266 	assert(end_vr != NULL);
267 	assert(start_vr->vaddr <= end_vr->vaddr);
268 
269 #if LU_DEBUG
270 	printf("VM: transfer_mmap_regions: transferring memory mapped regions "
271 	    "from %d to %d (0x%lx to 0x%lx)\n", src_vmp->vm_endpoint,
272 	    dst_vmp->vm_endpoint, start_vr->vaddr, end_vr->vaddr);
273 #endif
274 
275 	return map_proc_copy_range(dst_vmp, src_vmp, start_vr, end_vr);
276 }
277 
278 /*
279  * Create copy-on-write mappings in process 'dst_vmp' for all memory-mapped
280  * regions present in 'src_vmp'.  Return OK on success, or an error otherwise.
281  * In the case of failure, successfully created mappings are not undone.
282  */
283 int
284 map_proc_dyn_data(struct vmproc *src_vmp, struct vmproc *dst_vmp)
285 {
286 	int r;
287 
288 #if LU_DEBUG
289 	printf("VM: mapping dynamic data from %d to %d\n",
290 	    src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
291 #endif
292 
293 	/* Transfer memory mapped regions now. To sandbox the new instance and
294 	 * prevent state corruption on rollback, we share all the regions
295 	 * between the two instances as COW.
296 	 */
297 	r = transfer_mmap_regions(src_vmp, dst_vmp, VM_MMAPBASE, VM_MMAPTOP);
298 
299 	/* If the stack is not mapped at the VM_DATATOP, there might be some
300 	 * more regions hiding above the stack.  We also have to transfer
301 	 * those.
302 	 */
303 	if (r == OK && VM_STACKTOP < VM_DATATOP)
304 		r = transfer_mmap_regions(src_vmp, dst_vmp, VM_STACKTOP,
305 		    VM_DATATOP);
306 
307 	return r;
308 }
309 
310 /*===========================================================================*
311  *			      swap_proc_dyn_data	     		     *
312  *===========================================================================*/
313 int swap_proc_dyn_data(struct vmproc *src_vmp, struct vmproc *dst_vmp,
314 	int sys_upd_flags)
315 {
316 	int is_vm;
317 	int r;
318 
319 	is_vm = (dst_vmp->vm_endpoint == VM_PROC_NR);
320 
321         /* For VM, transfer memory mapped regions first. */
322         if(is_vm) {
323 #if LU_DEBUG
324 		printf("VM: swap_proc_dyn_data: tranferring memory mapped regions from old (%d) to new VM (%d)\n",
325 			src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
326 #endif
327 		r = pt_map_in_range(src_vmp, dst_vmp, VM_OWN_HEAPBASE, VM_OWN_MMAPTOP);
328 		if(r != OK) {
329 			printf("swap_proc_dyn_data: pt_map_in_range failed\n");
330 			return r;
331 		}
332 		r = pt_map_in_range(src_vmp, dst_vmp, VM_STACKTOP, VM_DATATOP);
333 		if(r != OK) {
334 			printf("swap_proc_dyn_data: pt_map_in_range failed\n");
335 			return r;
336 		}
337 
338         }
339 
340 #if LU_DEBUG
341 	printf("VM: swap_proc_dyn_data: swapping regions' parents for %d (%d) and %d (%d)\n",
342 	    src_vmp->vm_endpoint, src_vmp->vm_slot,
343 	    dst_vmp->vm_endpoint, dst_vmp->vm_slot);
344 #endif
345 
346 	/* Swap vir_regions' parents. */
347 	map_setparent(src_vmp);
348 	map_setparent(dst_vmp);
349 
350 	/* Don't transfer mmapped regions if not required. */
351 	if(is_vm || (sys_upd_flags & (SF_VM_ROLLBACK|SF_VM_NOMMAP))) {
352 		return OK;
353 	}
354 
355 	/* Make sure regions are consistent. */
356 	assert(region_search_root(&src_vmp->vm_regions_avl) && region_search_root(&dst_vmp->vm_regions_avl));
357 
358 	/* Source and destination are intentionally swapped here! */
359 	return map_proc_dyn_data(dst_vmp, src_vmp);
360 }
361 
362 void *mmap(void *addr, size_t len, int f, int f2, int f3, off_t o)
363 {
364 	void *ret;
365 	phys_bytes p;
366 
367 	assert(!addr);
368 	assert(!(len % VM_PAGE_SIZE));
369 
370 	ret = vm_allocpages(&p, VMP_SLAB, len/VM_PAGE_SIZE);
371 
372 	if(!ret) return MAP_FAILED;
373 	memset(ret, 0, len);
374 	return ret;
375 }
376 
377 int munmap(void * addr, size_t len)
378 {
379 	vm_freepages((vir_bytes) addr, roundup(len, VM_PAGE_SIZE)/VM_PAGE_SIZE);
380 	return 0;
381 }
382 
383 #ifdef __weak_alias
384 __weak_alias(brk, _brk)
385 #endif
386 int _brk(void *addr)
387 {
388 	/* brk is a special case function to allow vm itself to
389 	   allocate memory in it's own (cacheable) HEAP */
390 	vir_bytes target = roundup((vir_bytes)addr, VM_PAGE_SIZE), v;
391 	extern char _end;
392 	extern char *_brksize;
393 	static vir_bytes prevbrk = (vir_bytes) &_end;
394 	struct vmproc *vmprocess = &vmproc[VM_PROC_NR];
395 
396 	for(v = roundup(prevbrk, VM_PAGE_SIZE); v < target;
397 		v += VM_PAGE_SIZE) {
398 		phys_bytes mem, newpage = alloc_mem(1, 0);
399 		if(newpage == NO_MEM) return -1;
400 		mem = CLICK2ABS(newpage);
401 		if(pt_writemap(vmprocess, &vmprocess->vm_pt,
402 			v, mem, VM_PAGE_SIZE,
403 			  ARCH_VM_PTE_PRESENT
404 			| ARCH_VM_PTE_USER
405 			| ARCH_VM_PTE_RW
406 #if defined(__arm__)
407 			| ARM_VM_PTE_CACHED
408 #endif
409 			, 0) != OK) {
410 			free_mem(newpage, 1);
411 			return -1;
412 		}
413 		prevbrk = v + VM_PAGE_SIZE;
414 	}
415 
416         _brksize = (char *) addr;
417 
418         if(sys_vmctl(SELF, VMCTL_FLUSHTLB, 0) != OK)
419         	panic("flushtlb failed");
420 
421 	return 0;
422 }
423 
424 /*===========================================================================*
425  *				do_getrusage		     		     *
426  *===========================================================================*/
427 int do_getrusage(message *m)
428 {
429 	int res, slot;
430 	struct vmproc *vmp;
431 	struct rusage r_usage;
432 
433 	/* If the request is not from PM, it is coming directly from userland.
434 	 * This is an obsolete construction. In the future, userland programs
435 	 * should no longer be allowed to call vm_getrusage(2) directly at all.
436 	 * For backward compatibility, we simply return success for now.
437 	 */
438 	if (m->m_source != PM_PROC_NR)
439 		return OK;
440 
441 	/* Get the process for which resource usage is requested. */
442 	if ((res = vm_isokendpt(m->m_lsys_vm_rusage.endpt, &slot)) != OK)
443 		return ESRCH;
444 
445 	vmp = &vmproc[slot];
446 
447 	/* We are going to change only a few fields, so copy in the rusage
448 	 * structure first. The structure is still in PM's address space at
449 	 * this point, so use the message source.
450 	 */
451 	if ((res = sys_datacopy(m->m_source, m->m_lsys_vm_rusage.addr,
452 		SELF, (vir_bytes) &r_usage, (vir_bytes) sizeof(r_usage))) < 0)
453 		return res;
454 
455 	if (!m->m_lsys_vm_rusage.children) {
456 		r_usage.ru_maxrss = vmp->vm_total_max / 1024L; /* unit is KB */
457 		r_usage.ru_minflt = vmp->vm_minor_page_fault;
458 		r_usage.ru_majflt = vmp->vm_major_page_fault;
459 	} else {
460 		/* XXX TODO: return the fields for terminated, waited-for
461 		 * children of the given process. We currently do not have this
462 		 * information! In the future, rather than teaching VM about
463 		 * the process hierarchy, PM should probably tell VM at process
464 		 * exit time which other process should inherit its resource
465 		 * usage fields. For now, we assume PM clears the fields before
466 		 * making this call, so we don't zero the fields explicitly.
467 		 */
468 	}
469 
470 	/* Copy out the resulting structure back to PM. */
471 	return sys_datacopy(SELF, (vir_bytes) &r_usage, m->m_source,
472 		m->m_lsys_vm_rusage.addr, (vir_bytes) sizeof(r_usage));
473 }
474 
475 /*===========================================================================*
476  *                            adjust_proc_refs                              *
477  *===========================================================================*/
478 void adjust_proc_refs()
479 {
480        struct vmproc *vmp;
481        region_iter iter;
482 
483        /* Fix up region parents. */
484        for(vmp = vmproc; vmp < &vmproc[VMP_NR]; vmp++) {
485                struct vir_region *vr;
486                if(!(vmp->vm_flags & VMF_INUSE))
487                        continue;
488                region_start_iter_least(&vmp->vm_regions_avl, &iter);
489                while((vr = region_get_iter(&iter))) {
490                        USE(vr, vr->parent = vmp;);
491                        region_incr_iter(&iter);
492                }
493        }
494 }
495 
496