xref: /minix/minix/servers/vm/utility.c (revision 77e79d33)
1 
2 /* This file contains some utility routines for VM.  */
3 
4 #define _SYSTEM		1
5 
6 #include <minix/callnr.h>
7 #include <minix/com.h>
8 #include <minix/config.h>
9 #include <minix/const.h>
10 #include <minix/ds.h>
11 #include <minix/endpoint.h>
12 #include <minix/minlib.h>
13 #include <minix/type.h>
14 #include <minix/ipc.h>
15 #include <minix/sysutil.h>
16 #include <minix/syslib.h>
17 #include <minix/type.h>
18 #include <minix/bitmap.h>
19 #include <minix/rs.h>
20 #include <string.h>
21 #include <errno.h>
22 #include <unistd.h>
23 #include <assert.h>
24 #include <sys/cdefs.h>
25 #include <sys/param.h>
26 #include <sys/mman.h>
27 #include <sys/resource.h>
28 
29 #include "proto.h"
30 #include "glo.h"
31 #include "util.h"
32 #include "region.h"
33 #include "sanitycheck.h"
34 
35 #include <machine/archtypes.h>
36 #include "kernel/const.h"
37 #include "kernel/config.h"
38 #include "kernel/type.h"
39 #include "kernel/proc.h"
40 
41 /*===========================================================================*
42  *                              get_mem_chunks                               *
43  *===========================================================================*/
44 void get_mem_chunks(
45 struct memory *mem_chunks)                      /* store mem chunks here */
46 {
47 /* Initialize the free memory list from the kernel-provided memory map.  Translate
48  * the byte offsets and sizes in this list to clicks, properly truncated.
49  */
50   phys_bytes base, size, limit;
51   int i;
52   struct memory *memp;
53 
54   /* Initialize everything to zero. */
55   memset(mem_chunks, 0, NR_MEMS*sizeof(*mem_chunks));
56 
57   /* Obtain and parse memory from kernel environment. */
58   /* XXX Any memory chunk in excess of NR_MEMS is silently ignored. */
59   for(i = 0; i < MIN(MAXMEMMAP, NR_MEMS); i++) {
60   	mem_chunks[i].base = kernel_boot_info.memmap[i].mm_base_addr;
61   	mem_chunks[i].size = kernel_boot_info.memmap[i].mm_length;
62   }
63 
64   /* Round physical memory to clicks. Round start up, round end down. */
65   for (i = 0; i < NR_MEMS; i++) {
66         memp = &mem_chunks[i];          /* next mem chunk is stored here */
67         base = mem_chunks[i].base;
68         size = mem_chunks[i].size;
69         limit = base + size;
70         base = (phys_bytes) (CLICK_CEIL(base));
71         limit = (phys_bytes) (CLICK_FLOOR(limit));
72         if (limit <= base) {
73                 memp->base = memp->size = 0;
74         } else {
75                 memp->base = base >> CLICK_SHIFT;
76                 memp->size = (limit - base) >> CLICK_SHIFT;
77         }
78   }
79 }
80 
81 /*===========================================================================*
82  *                              vm_isokendpt                           	     *
83  *===========================================================================*/
84 int vm_isokendpt(endpoint_t endpoint, int *procn)
85 {
86         *procn = _ENDPOINT_P(endpoint);
87         if(*procn < 0 || *procn >= NR_PROCS)
88 		return EINVAL;
89         if(*procn >= 0 && endpoint != vmproc[*procn].vm_endpoint)
90                 return EDEADEPT;
91         if(*procn >= 0 && !(vmproc[*procn].vm_flags & VMF_INUSE))
92                 return EDEADEPT;
93         return OK;
94 }
95 
96 
97 /*===========================================================================*
98  *                              do_info                                      *
99  *===========================================================================*/
100 int do_info(message *m)
101 {
102 	struct vm_stats_info vsi;
103 	struct vm_usage_info vui;
104 	static struct vm_region_info vri[MAX_VRI_COUNT];
105 	struct vmproc *vmp;
106 	vir_bytes addr, size, next, ptr;
107 	int r, pr, dummy, count, free_pages, largest_contig;
108 
109 	if (vm_isokendpt(m->m_source, &pr) != OK)
110 		return EINVAL;
111 	vmp = &vmproc[pr];
112 
113 	ptr = (vir_bytes) m->m_lsys_vm_info.ptr;
114 
115 	switch(m->m_lsys_vm_info.what) {
116 	case VMIW_STATS:
117 		vsi.vsi_pagesize = VM_PAGE_SIZE;
118 		vsi.vsi_total = total_pages;
119 		memstats(&dummy, &free_pages, &largest_contig);
120 		vsi.vsi_free = free_pages;
121 		vsi.vsi_largest = largest_contig;
122 
123 		get_stats_info(&vsi);
124 
125 		addr = (vir_bytes) &vsi;
126 		size = sizeof(vsi);
127 
128 		break;
129 
130 	case VMIW_USAGE:
131 		if(m->m_lsys_vm_info.ep < 0)
132 			get_usage_info_kernel(&vui);
133 		else if (vm_isokendpt(m->m_lsys_vm_info.ep, &pr) != OK)
134 			return EINVAL;
135 		else get_usage_info(&vmproc[pr], &vui);
136 
137 		addr = (vir_bytes) &vui;
138 		size = sizeof(vui);
139 
140 		break;
141 
142 	case VMIW_REGION:
143 		if(m->m_lsys_vm_info.ep == SELF) {
144 			m->m_lsys_vm_info.ep = m->m_source;
145 		}
146 		if (vm_isokendpt(m->m_lsys_vm_info.ep, &pr) != OK)
147 			return EINVAL;
148 
149 		count = MIN(m->m_lsys_vm_info.count, MAX_VRI_COUNT);
150 		next = m->m_lsys_vm_info.next;
151 
152 		count = get_region_info(&vmproc[pr], vri, count, &next);
153 
154 		m->m_lsys_vm_info.count = count;
155 		m->m_lsys_vm_info.next = next;
156 
157 		addr = (vir_bytes) vri;
158 		size = sizeof(vri[0]) * count;
159 
160 		break;
161 
162 	default:
163 		return EINVAL;
164 	}
165 
166 	if (size == 0)
167 		return OK;
168 
169 	/* Make sure that no page faults can occur while copying out. A page
170 	 * fault would cause the kernel to send a notify to us, while we would
171 	 * be waiting for the result of the copy system call, resulting in a
172 	 * deadlock. Note that no memory mapping can be undone without the
173 	 * involvement of VM, so we are safe until we're done.
174 	 */
175 	r = handle_memory_once(vmp, ptr, size, 1 /*wrflag*/);
176 	if (r != OK) return r;
177 
178 	/* Now that we know the copy out will succeed, perform the actual copy
179 	 * operation.
180 	 */
181 	return sys_datacopy(SELF, addr,
182 		(vir_bytes) vmp->vm_endpoint, ptr, size);
183 }
184 
185 /*===========================================================================*
186  *				swap_proc_slot	     			     *
187  *===========================================================================*/
188 int swap_proc_slot(struct vmproc *src_vmp, struct vmproc *dst_vmp)
189 {
190 	struct vmproc orig_src_vmproc, orig_dst_vmproc;
191 
192 #if LU_DEBUG
193 	printf("VM: swap_proc: swapping %d (%d) and %d (%d)\n",
194 	    src_vmp->vm_endpoint, src_vmp->vm_slot,
195 	    dst_vmp->vm_endpoint, dst_vmp->vm_slot);
196 #endif
197 
198 	/* Save existing data. */
199 	orig_src_vmproc = *src_vmp;
200 	orig_dst_vmproc = *dst_vmp;
201 
202 	/* Swap slots. */
203 	*src_vmp = orig_dst_vmproc;
204 	*dst_vmp = orig_src_vmproc;
205 
206 	/* Preserve endpoints and slot numbers. */
207 	src_vmp->vm_endpoint = orig_src_vmproc.vm_endpoint;
208 	src_vmp->vm_slot = orig_src_vmproc.vm_slot;
209 	dst_vmp->vm_endpoint = orig_dst_vmproc.vm_endpoint;
210 	dst_vmp->vm_slot = orig_dst_vmproc.vm_slot;
211 
212 #if LU_DEBUG
213 	printf("VM: swap_proc: swapped %d (%d) and %d (%d)\n",
214 	    src_vmp->vm_endpoint, src_vmp->vm_slot,
215 	    dst_vmp->vm_endpoint, dst_vmp->vm_slot);
216 #endif
217 
218 	return OK;
219 }
220 
221 /*
222  * Transfer memory mapped regions, using CoW sharing, from 'src_vmp' to
223  * 'dst_vmp', for the source process's address range of 'start_addr'
224  * (inclusive) to 'end_addr' (exclusive).  Return OK or an error code.
225  * If the regions seem to have been transferred already, do nothing.
226  */
227 static int
228 transfer_mmap_regions(struct vmproc *src_vmp, struct vmproc *dst_vmp,
229 	vir_bytes start_addr, vir_bytes end_addr)
230 {
231 	struct vir_region *start_vr, *check_vr, *end_vr;
232 
233 	start_vr = region_search(&src_vmp->vm_regions_avl, start_addr,
234 	    AVL_GREATER_EQUAL);
235 
236 	if (start_vr == NULL || start_vr->vaddr >= end_addr)
237 		return OK; /* nothing to do */
238 
239 	/* In the case of multicomponent live update that includes VM, this
240 	 * function may be called for the same process more than once, for the
241 	 * sake of keeping code paths as little divergent as possible while at
242 	 * the same time ensuring that the regions are copied early enough.
243 	 *
244 	 * To compensate for these multiple calls, we perform a very simple
245 	 * check here to see if the region to transfer is already present in
246 	 * the target process.  If so, we can safely skip copying the regions
247 	 * again, because there is no other possible explanation for the
248 	 * region being present already.  Things would go horribly wrong if we
249 	 * tried copying anyway, but this check is not good enough to detect
250 	 * all such problems, since we do a check on the base address only.
251 	 */
252 	check_vr = region_search(&dst_vmp->vm_regions_avl, start_vr->vaddr,
253 	    AVL_EQUAL);
254 	if (check_vr != NULL) {
255 #if LU_DEBUG
256 		printf("VM: transfer_mmap_regions: skipping transfer from "
257 		    "%d to %d (0x%lx already present)\n",
258 		    src_vmp->vm_endpoint, dst_vmp->vm_endpoint,
259 		    start_vr->vaddr);
260 #endif
261 		return OK;
262 	}
263 
264 	end_vr = region_search(&src_vmp->vm_regions_avl, end_addr, AVL_LESS);
265 	assert(end_vr != NULL);
266 	assert(start_vr->vaddr <= end_vr->vaddr);
267 
268 #if LU_DEBUG
269 	printf("VM: transfer_mmap_regions: transferring memory mapped regions "
270 	    "from %d to %d (0x%lx to 0x%lx)\n", src_vmp->vm_endpoint,
271 	    dst_vmp->vm_endpoint, start_vr->vaddr, end_vr->vaddr);
272 #endif
273 
274 	return map_proc_copy_range(dst_vmp, src_vmp, start_vr, end_vr);
275 }
276 
277 /*
278  * Create copy-on-write mappings in process 'dst_vmp' for all memory-mapped
279  * regions present in 'src_vmp'.  Return OK on success, or an error otherwise.
280  * In the case of failure, successfully created mappings are not undone.
281  */
282 int
283 map_proc_dyn_data(struct vmproc *src_vmp, struct vmproc *dst_vmp)
284 {
285 	int r;
286 
287 #if LU_DEBUG
288 	printf("VM: mapping dynamic data from %d to %d\n",
289 	    src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
290 #endif
291 
292 	/* Transfer memory mapped regions now. To sandbox the new instance and
293 	 * prevent state corruption on rollback, we share all the regions
294 	 * between the two instances as COW.
295 	 */
296 	r = transfer_mmap_regions(src_vmp, dst_vmp, VM_MMAPBASE, VM_MMAPTOP);
297 
298 	/* If the stack is not mapped at the VM_DATATOP, there might be some
299 	 * more regions hiding above the stack.  We also have to transfer
300 	 * those.
301 	 */
302 	if (r == OK && VM_STACKTOP < VM_DATATOP)
303 		r = transfer_mmap_regions(src_vmp, dst_vmp, VM_STACKTOP,
304 		    VM_DATATOP);
305 
306 	return r;
307 }
308 
309 /*===========================================================================*
310  *			      swap_proc_dyn_data	     		     *
311  *===========================================================================*/
312 int swap_proc_dyn_data(struct vmproc *src_vmp, struct vmproc *dst_vmp,
313 	int sys_upd_flags)
314 {
315 	int is_vm;
316 	int r;
317 
318 	is_vm = (dst_vmp->vm_endpoint == VM_PROC_NR);
319 
320         /* For VM, transfer memory mapped regions first. */
321         if(is_vm) {
322 #if LU_DEBUG
323 		printf("VM: swap_proc_dyn_data: tranferring memory mapped regions from old (%d) to new VM (%d)\n",
324 			src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
325 #endif
326 		r = pt_map_in_range(src_vmp, dst_vmp, VM_OWN_HEAPBASE, VM_OWN_MMAPTOP);
327 		if(r != OK) {
328 			printf("swap_proc_dyn_data: pt_map_in_range failed\n");
329 			return r;
330 		}
331 		r = pt_map_in_range(src_vmp, dst_vmp, VM_STACKTOP, VM_DATATOP);
332 		if(r != OK) {
333 			printf("swap_proc_dyn_data: pt_map_in_range failed\n");
334 			return r;
335 		}
336 
337         }
338 
339 #if LU_DEBUG
340 	printf("VM: swap_proc_dyn_data: swapping regions' parents for %d (%d) and %d (%d)\n",
341 	    src_vmp->vm_endpoint, src_vmp->vm_slot,
342 	    dst_vmp->vm_endpoint, dst_vmp->vm_slot);
343 #endif
344 
345 	/* Swap vir_regions' parents. */
346 	map_setparent(src_vmp);
347 	map_setparent(dst_vmp);
348 
349 	/* Don't transfer mmapped regions if not required. */
350 	if(is_vm || (sys_upd_flags & (SF_VM_ROLLBACK|SF_VM_NOMMAP))) {
351 		return OK;
352 	}
353 
354 	/* Make sure regions are consistent. */
355 	assert(region_search_root(&src_vmp->vm_regions_avl) && region_search_root(&dst_vmp->vm_regions_avl));
356 
357 	/* Source and destination are intentionally swapped here! */
358 	return map_proc_dyn_data(dst_vmp, src_vmp);
359 }
360 
361 void *mmap(void *addr, size_t len, int f, int f2, int f3, off_t o)
362 {
363 	void *ret;
364 	phys_bytes p;
365 
366 	assert(!addr);
367 	assert(!(len % VM_PAGE_SIZE));
368 
369 	ret = vm_allocpages(&p, VMP_SLAB, len/VM_PAGE_SIZE);
370 
371 	if(!ret) return MAP_FAILED;
372 	memset(ret, 0, len);
373 	return ret;
374 }
375 
376 int munmap(void * addr, size_t len)
377 {
378 	vm_freepages((vir_bytes) addr, roundup(len, VM_PAGE_SIZE)/VM_PAGE_SIZE);
379 	return 0;
380 }
381 
382 #ifdef __weak_alias
383 __weak_alias(brk, _brk)
384 #endif
385 int _brk(void *addr)
386 {
387 	/* brk is a special case function to allow vm itself to
388 	   allocate memory in it's own (cacheable) HEAP */
389 	vir_bytes target = roundup((vir_bytes)addr, VM_PAGE_SIZE), v;
390 	extern char _end;
391 	extern char *_brksize;
392 	static vir_bytes prevbrk = (vir_bytes) &_end;
393 	struct vmproc *vmprocess = &vmproc[VM_PROC_NR];
394 
395 	for(v = roundup(prevbrk, VM_PAGE_SIZE); v < target;
396 		v += VM_PAGE_SIZE) {
397 		phys_bytes mem, newpage = alloc_mem(1, 0);
398 		if(newpage == NO_MEM) return -1;
399 		mem = CLICK2ABS(newpage);
400 		if(pt_writemap(vmprocess, &vmprocess->vm_pt,
401 			v, mem, VM_PAGE_SIZE,
402 			  ARCH_VM_PTE_PRESENT
403 			| ARCH_VM_PTE_USER
404 			| ARCH_VM_PTE_RW
405 #if defined(__arm__)
406 			| ARM_VM_PTE_CACHED
407 #endif
408 			, 0) != OK) {
409 			free_mem(newpage, 1);
410 			return -1;
411 		}
412 		prevbrk = v + VM_PAGE_SIZE;
413 	}
414 
415         _brksize = (char *) addr;
416 
417         if(sys_vmctl(SELF, VMCTL_FLUSHTLB, 0) != OK)
418         	panic("flushtlb failed");
419 
420 	return 0;
421 }
422 
423 /*===========================================================================*
424  *				do_getrusage		     		     *
425  *===========================================================================*/
426 int do_getrusage(message *m)
427 {
428 	int res, slot;
429 	struct vmproc *vmp;
430 	struct rusage r_usage;
431 
432 	/* If the request is not from PM, it is coming directly from userland.
433 	 * This is an obsolete construction. In the future, userland programs
434 	 * should no longer be allowed to call vm_getrusage(2) directly at all.
435 	 * For backward compatibility, we simply return success for now.
436 	 */
437 	if (m->m_source != PM_PROC_NR)
438 		return OK;
439 
440 	/* Get the process for which resource usage is requested. */
441 	if ((res = vm_isokendpt(m->m_lsys_vm_rusage.endpt, &slot)) != OK)
442 		return ESRCH;
443 
444 	vmp = &vmproc[slot];
445 
446 	/* We are going to change only a few fields, so copy in the rusage
447 	 * structure first. The structure is still in PM's address space at
448 	 * this point, so use the message source.
449 	 */
450 	if ((res = sys_datacopy(m->m_source, m->m_lsys_vm_rusage.addr,
451 		SELF, (vir_bytes) &r_usage, (vir_bytes) sizeof(r_usage))) < 0)
452 		return res;
453 
454 	if (!m->m_lsys_vm_rusage.children) {
455 		r_usage.ru_maxrss = vmp->vm_total_max / 1024L; /* unit is KB */
456 		r_usage.ru_minflt = vmp->vm_minor_page_fault;
457 		r_usage.ru_majflt = vmp->vm_major_page_fault;
458 	} else {
459 		/* XXX TODO: return the fields for terminated, waited-for
460 		 * children of the given process. We currently do not have this
461 		 * information! In the future, rather than teaching VM about
462 		 * the process hierarchy, PM should probably tell VM at process
463 		 * exit time which other process should inherit its resource
464 		 * usage fields. For now, we assume PM clears the fields before
465 		 * making this call, so we don't zero the fields explicitly.
466 		 */
467 	}
468 
469 	/* Copy out the resulting structure back to PM. */
470 	return sys_datacopy(SELF, (vir_bytes) &r_usage, m->m_source,
471 		m->m_lsys_vm_rusage.addr, (vir_bytes) sizeof(r_usage));
472 }
473 
474 /*===========================================================================*
475  *                            adjust_proc_refs                              *
476  *===========================================================================*/
477 void adjust_proc_refs()
478 {
479        struct vmproc *vmp;
480        region_iter iter;
481 
482        /* Fix up region parents. */
483        for(vmp = vmproc; vmp < &vmproc[VMP_NR]; vmp++) {
484                struct vir_region *vr;
485                if(!(vmp->vm_flags & VMF_INUSE))
486                        continue;
487                region_start_iter_least(&vmp->vm_regions_avl, &iter);
488                while((vr = region_get_iter(&iter))) {
489                        USE(vr, vr->parent = vmp;);
490                        region_incr_iter(&iter);
491                }
492        }
493 }
494 
495