1 2 /* This file contains some utility routines for VM. */ 3 4 #define _SYSTEM 1 5 6 #include <minix/callnr.h> 7 #include <minix/com.h> 8 #include <minix/config.h> 9 #include <minix/const.h> 10 #include <minix/ds.h> 11 #include <minix/endpoint.h> 12 #include <minix/minlib.h> 13 #include <minix/type.h> 14 #include <minix/ipc.h> 15 #include <minix/sysutil.h> 16 #include <minix/syslib.h> 17 #include <minix/type.h> 18 #include <minix/bitmap.h> 19 #include <minix/rs.h> 20 #include <string.h> 21 #include <errno.h> 22 #include <unistd.h> 23 #include <assert.h> 24 #include <sys/cdefs.h> 25 #include <sys/param.h> 26 #include <sys/mman.h> 27 #include <sys/resource.h> 28 29 #include "proto.h" 30 #include "glo.h" 31 #include "util.h" 32 #include "region.h" 33 #include "sanitycheck.h" 34 35 #include <machine/archtypes.h> 36 #include "kernel/const.h" 37 #include "kernel/config.h" 38 #include "kernel/type.h" 39 #include "kernel/proc.h" 40 41 /*===========================================================================* 42 * get_mem_chunks * 43 *===========================================================================*/ 44 void get_mem_chunks( 45 struct memory *mem_chunks) /* store mem chunks here */ 46 { 47 /* Initialize the free memory list from the kernel-provided memory map. Translate 48 * the byte offsets and sizes in this list to clicks, properly truncated. 49 */ 50 phys_bytes base, size, limit; 51 int i; 52 struct memory *memp; 53 54 /* Initialize everything to zero. */ 55 memset(mem_chunks, 0, NR_MEMS*sizeof(*mem_chunks)); 56 57 /* Obtain and parse memory from kernel environment. */ 58 /* XXX Any memory chunk in excess of NR_MEMS is silently ignored. */ 59 for(i = 0; i < MIN(MAXMEMMAP, NR_MEMS); i++) { 60 mem_chunks[i].base = kernel_boot_info.memmap[i].mm_base_addr; 61 mem_chunks[i].size = kernel_boot_info.memmap[i].mm_length; 62 } 63 64 /* Round physical memory to clicks. Round start up, round end down. */ 65 for (i = 0; i < NR_MEMS; i++) { 66 memp = &mem_chunks[i]; /* next mem chunk is stored here */ 67 base = mem_chunks[i].base; 68 size = mem_chunks[i].size; 69 limit = base + size; 70 base = (phys_bytes) (CLICK_CEIL(base)); 71 limit = (phys_bytes) (CLICK_FLOOR(limit)); 72 if (limit <= base) { 73 memp->base = memp->size = 0; 74 } else { 75 memp->base = base >> CLICK_SHIFT; 76 memp->size = (limit - base) >> CLICK_SHIFT; 77 } 78 } 79 } 80 81 /*===========================================================================* 82 * vm_isokendpt * 83 *===========================================================================*/ 84 int vm_isokendpt(endpoint_t endpoint, int *procn) 85 { 86 *procn = _ENDPOINT_P(endpoint); 87 if(*procn < 0 || *procn >= NR_PROCS) 88 return EINVAL; 89 if(*procn >= 0 && endpoint != vmproc[*procn].vm_endpoint) 90 return EDEADEPT; 91 if(*procn >= 0 && !(vmproc[*procn].vm_flags & VMF_INUSE)) 92 return EDEADEPT; 93 return OK; 94 } 95 96 97 /*===========================================================================* 98 * do_info * 99 *===========================================================================*/ 100 int do_info(message *m) 101 { 102 struct vm_stats_info vsi; 103 struct vm_usage_info vui; 104 static struct vm_region_info vri[MAX_VRI_COUNT]; 105 struct vmproc *vmp; 106 vir_bytes addr, size, next, ptr; 107 int r, pr, dummy, count, free_pages, largest_contig; 108 109 if (vm_isokendpt(m->m_source, &pr) != OK) 110 return EINVAL; 111 vmp = &vmproc[pr]; 112 113 ptr = (vir_bytes) m->m_lsys_vm_info.ptr; 114 115 switch(m->m_lsys_vm_info.what) { 116 case VMIW_STATS: 117 vsi.vsi_pagesize = VM_PAGE_SIZE; 118 vsi.vsi_total = total_pages; 119 memstats(&dummy, &free_pages, &largest_contig); 120 vsi.vsi_free = free_pages; 121 vsi.vsi_largest = largest_contig; 122 123 get_stats_info(&vsi); 124 125 addr = (vir_bytes) &vsi; 126 size = sizeof(vsi); 127 128 break; 129 130 case VMIW_USAGE: 131 if(m->m_lsys_vm_info.ep < 0) 132 get_usage_info_kernel(&vui); 133 else if (vm_isokendpt(m->m_lsys_vm_info.ep, &pr) != OK) 134 return EINVAL; 135 else get_usage_info(&vmproc[pr], &vui); 136 137 addr = (vir_bytes) &vui; 138 size = sizeof(vui); 139 140 break; 141 142 case VMIW_REGION: 143 if(m->m_lsys_vm_info.ep == SELF) { 144 m->m_lsys_vm_info.ep = m->m_source; 145 } 146 if (vm_isokendpt(m->m_lsys_vm_info.ep, &pr) != OK) 147 return EINVAL; 148 149 count = MIN(m->m_lsys_vm_info.count, MAX_VRI_COUNT); 150 next = m->m_lsys_vm_info.next; 151 152 count = get_region_info(&vmproc[pr], vri, count, &next); 153 154 m->m_lsys_vm_info.count = count; 155 m->m_lsys_vm_info.next = next; 156 157 addr = (vir_bytes) vri; 158 size = sizeof(vri[0]) * count; 159 160 break; 161 162 default: 163 return EINVAL; 164 } 165 166 if (size == 0) 167 return OK; 168 169 /* Make sure that no page faults can occur while copying out. A page 170 * fault would cause the kernel to send a notify to us, while we would 171 * be waiting for the result of the copy system call, resulting in a 172 * deadlock. Note that no memory mapping can be undone without the 173 * involvement of VM, so we are safe until we're done. 174 */ 175 r = handle_memory_once(vmp, ptr, size, 1 /*wrflag*/); 176 if (r != OK) return r; 177 178 /* Now that we know the copy out will succeed, perform the actual copy 179 * operation. 180 */ 181 return sys_datacopy(SELF, addr, 182 (vir_bytes) vmp->vm_endpoint, ptr, size); 183 } 184 185 /*===========================================================================* 186 * swap_proc_slot * 187 *===========================================================================*/ 188 int swap_proc_slot(struct vmproc *src_vmp, struct vmproc *dst_vmp) 189 { 190 struct vmproc orig_src_vmproc, orig_dst_vmproc; 191 192 #if LU_DEBUG 193 printf("VM: swap_proc: swapping %d (%d) and %d (%d)\n", 194 src_vmp->vm_endpoint, src_vmp->vm_slot, 195 dst_vmp->vm_endpoint, dst_vmp->vm_slot); 196 #endif 197 198 /* Save existing data. */ 199 orig_src_vmproc = *src_vmp; 200 orig_dst_vmproc = *dst_vmp; 201 202 /* Swap slots. */ 203 *src_vmp = orig_dst_vmproc; 204 *dst_vmp = orig_src_vmproc; 205 206 /* Preserve endpoints and slot numbers. */ 207 src_vmp->vm_endpoint = orig_src_vmproc.vm_endpoint; 208 src_vmp->vm_slot = orig_src_vmproc.vm_slot; 209 dst_vmp->vm_endpoint = orig_dst_vmproc.vm_endpoint; 210 dst_vmp->vm_slot = orig_dst_vmproc.vm_slot; 211 212 #if LU_DEBUG 213 printf("VM: swap_proc: swapped %d (%d) and %d (%d)\n", 214 src_vmp->vm_endpoint, src_vmp->vm_slot, 215 dst_vmp->vm_endpoint, dst_vmp->vm_slot); 216 #endif 217 218 return OK; 219 } 220 221 /* 222 * Transfer memory mapped regions, using CoW sharing, from 'src_vmp' to 223 * 'dst_vmp', for the source process's address range of 'start_addr' 224 * (inclusive) to 'end_addr' (exclusive). Return OK or an error code. 225 * If the regions seem to have been transferred already, do nothing. 226 */ 227 static int 228 transfer_mmap_regions(struct vmproc *src_vmp, struct vmproc *dst_vmp, 229 vir_bytes start_addr, vir_bytes end_addr) 230 { 231 struct vir_region *start_vr, *check_vr, *end_vr; 232 233 start_vr = region_search(&src_vmp->vm_regions_avl, start_addr, 234 AVL_GREATER_EQUAL); 235 236 if (start_vr == NULL || start_vr->vaddr >= end_addr) 237 return OK; /* nothing to do */ 238 239 /* In the case of multicomponent live update that includes VM, this 240 * function may be called for the same process more than once, for the 241 * sake of keeping code paths as little divergent as possible while at 242 * the same time ensuring that the regions are copied early enough. 243 * 244 * To compensate for these multiple calls, we perform a very simple 245 * check here to see if the region to transfer is already present in 246 * the target process. If so, we can safely skip copying the regions 247 * again, because there is no other possible explanation for the 248 * region being present already. Things would go horribly wrong if we 249 * tried copying anyway, but this check is not good enough to detect 250 * all such problems, since we do a check on the base address only. 251 */ 252 check_vr = region_search(&dst_vmp->vm_regions_avl, start_vr->vaddr, 253 AVL_EQUAL); 254 if (check_vr != NULL) { 255 #if LU_DEBUG 256 printf("VM: transfer_mmap_regions: skipping transfer from " 257 "%d to %d (0x%lx already present)\n", 258 src_vmp->vm_endpoint, dst_vmp->vm_endpoint, 259 start_vr->vaddr); 260 #endif 261 return OK; 262 } 263 264 end_vr = region_search(&src_vmp->vm_regions_avl, end_addr, AVL_LESS); 265 assert(end_vr != NULL); 266 assert(start_vr->vaddr <= end_vr->vaddr); 267 268 #if LU_DEBUG 269 printf("VM: transfer_mmap_regions: transferring memory mapped regions " 270 "from %d to %d (0x%lx to 0x%lx)\n", src_vmp->vm_endpoint, 271 dst_vmp->vm_endpoint, start_vr->vaddr, end_vr->vaddr); 272 #endif 273 274 return map_proc_copy_range(dst_vmp, src_vmp, start_vr, end_vr); 275 } 276 277 /* 278 * Create copy-on-write mappings in process 'dst_vmp' for all memory-mapped 279 * regions present in 'src_vmp'. Return OK on success, or an error otherwise. 280 * In the case of failure, successfully created mappings are not undone. 281 */ 282 int 283 map_proc_dyn_data(struct vmproc *src_vmp, struct vmproc *dst_vmp) 284 { 285 int r; 286 287 #if LU_DEBUG 288 printf("VM: mapping dynamic data from %d to %d\n", 289 src_vmp->vm_endpoint, dst_vmp->vm_endpoint); 290 #endif 291 292 /* Transfer memory mapped regions now. To sandbox the new instance and 293 * prevent state corruption on rollback, we share all the regions 294 * between the two instances as COW. 295 */ 296 r = transfer_mmap_regions(src_vmp, dst_vmp, VM_MMAPBASE, VM_MMAPTOP); 297 298 /* If the stack is not mapped at the VM_DATATOP, there might be some 299 * more regions hiding above the stack. We also have to transfer 300 * those. 301 */ 302 if (r == OK && VM_STACKTOP < VM_DATATOP) 303 r = transfer_mmap_regions(src_vmp, dst_vmp, VM_STACKTOP, 304 VM_DATATOP); 305 306 return r; 307 } 308 309 /*===========================================================================* 310 * swap_proc_dyn_data * 311 *===========================================================================*/ 312 int swap_proc_dyn_data(struct vmproc *src_vmp, struct vmproc *dst_vmp, 313 int sys_upd_flags) 314 { 315 int is_vm; 316 int r; 317 318 is_vm = (dst_vmp->vm_endpoint == VM_PROC_NR); 319 320 /* For VM, transfer memory mapped regions first. */ 321 if(is_vm) { 322 #if LU_DEBUG 323 printf("VM: swap_proc_dyn_data: tranferring memory mapped regions from old (%d) to new VM (%d)\n", 324 src_vmp->vm_endpoint, dst_vmp->vm_endpoint); 325 #endif 326 r = pt_map_in_range(src_vmp, dst_vmp, VM_OWN_HEAPBASE, VM_OWN_MMAPTOP); 327 if(r != OK) { 328 printf("swap_proc_dyn_data: pt_map_in_range failed\n"); 329 return r; 330 } 331 r = pt_map_in_range(src_vmp, dst_vmp, VM_STACKTOP, VM_DATATOP); 332 if(r != OK) { 333 printf("swap_proc_dyn_data: pt_map_in_range failed\n"); 334 return r; 335 } 336 337 } 338 339 #if LU_DEBUG 340 printf("VM: swap_proc_dyn_data: swapping regions' parents for %d (%d) and %d (%d)\n", 341 src_vmp->vm_endpoint, src_vmp->vm_slot, 342 dst_vmp->vm_endpoint, dst_vmp->vm_slot); 343 #endif 344 345 /* Swap vir_regions' parents. */ 346 map_setparent(src_vmp); 347 map_setparent(dst_vmp); 348 349 /* Don't transfer mmapped regions if not required. */ 350 if(is_vm || (sys_upd_flags & (SF_VM_ROLLBACK|SF_VM_NOMMAP))) { 351 return OK; 352 } 353 354 /* Make sure regions are consistent. */ 355 assert(region_search_root(&src_vmp->vm_regions_avl) && region_search_root(&dst_vmp->vm_regions_avl)); 356 357 /* Source and destination are intentionally swapped here! */ 358 return map_proc_dyn_data(dst_vmp, src_vmp); 359 } 360 361 void *mmap(void *addr, size_t len, int f, int f2, int f3, off_t o) 362 { 363 void *ret; 364 phys_bytes p; 365 366 assert(!addr); 367 assert(!(len % VM_PAGE_SIZE)); 368 369 ret = vm_allocpages(&p, VMP_SLAB, len/VM_PAGE_SIZE); 370 371 if(!ret) return MAP_FAILED; 372 memset(ret, 0, len); 373 return ret; 374 } 375 376 int munmap(void * addr, size_t len) 377 { 378 vm_freepages((vir_bytes) addr, roundup(len, VM_PAGE_SIZE)/VM_PAGE_SIZE); 379 return 0; 380 } 381 382 #ifdef __weak_alias 383 __weak_alias(brk, _brk) 384 #endif 385 int _brk(void *addr) 386 { 387 /* brk is a special case function to allow vm itself to 388 allocate memory in it's own (cacheable) HEAP */ 389 vir_bytes target = roundup((vir_bytes)addr, VM_PAGE_SIZE), v; 390 extern char _end; 391 extern char *_brksize; 392 static vir_bytes prevbrk = (vir_bytes) &_end; 393 struct vmproc *vmprocess = &vmproc[VM_PROC_NR]; 394 395 for(v = roundup(prevbrk, VM_PAGE_SIZE); v < target; 396 v += VM_PAGE_SIZE) { 397 phys_bytes mem, newpage = alloc_mem(1, 0); 398 if(newpage == NO_MEM) return -1; 399 mem = CLICK2ABS(newpage); 400 if(pt_writemap(vmprocess, &vmprocess->vm_pt, 401 v, mem, VM_PAGE_SIZE, 402 ARCH_VM_PTE_PRESENT 403 | ARCH_VM_PTE_USER 404 | ARCH_VM_PTE_RW 405 #if defined(__arm__) 406 | ARM_VM_PTE_CACHED 407 #endif 408 , 0) != OK) { 409 free_mem(newpage, 1); 410 return -1; 411 } 412 prevbrk = v + VM_PAGE_SIZE; 413 } 414 415 _brksize = (char *) addr; 416 417 if(sys_vmctl(SELF, VMCTL_FLUSHTLB, 0) != OK) 418 panic("flushtlb failed"); 419 420 return 0; 421 } 422 423 /*===========================================================================* 424 * do_getrusage * 425 *===========================================================================*/ 426 int do_getrusage(message *m) 427 { 428 int res, slot; 429 struct vmproc *vmp; 430 struct rusage r_usage; 431 432 /* If the request is not from PM, it is coming directly from userland. 433 * This is an obsolete construction. In the future, userland programs 434 * should no longer be allowed to call vm_getrusage(2) directly at all. 435 * For backward compatibility, we simply return success for now. 436 */ 437 if (m->m_source != PM_PROC_NR) 438 return OK; 439 440 /* Get the process for which resource usage is requested. */ 441 if ((res = vm_isokendpt(m->m_lsys_vm_rusage.endpt, &slot)) != OK) 442 return ESRCH; 443 444 vmp = &vmproc[slot]; 445 446 /* We are going to change only a few fields, so copy in the rusage 447 * structure first. The structure is still in PM's address space at 448 * this point, so use the message source. 449 */ 450 if ((res = sys_datacopy(m->m_source, m->m_lsys_vm_rusage.addr, 451 SELF, (vir_bytes) &r_usage, (vir_bytes) sizeof(r_usage))) < 0) 452 return res; 453 454 if (!m->m_lsys_vm_rusage.children) { 455 r_usage.ru_maxrss = vmp->vm_total_max / 1024L; /* unit is KB */ 456 r_usage.ru_minflt = vmp->vm_minor_page_fault; 457 r_usage.ru_majflt = vmp->vm_major_page_fault; 458 } else { 459 /* XXX TODO: return the fields for terminated, waited-for 460 * children of the given process. We currently do not have this 461 * information! In the future, rather than teaching VM about 462 * the process hierarchy, PM should probably tell VM at process 463 * exit time which other process should inherit its resource 464 * usage fields. For now, we assume PM clears the fields before 465 * making this call, so we don't zero the fields explicitly. 466 */ 467 } 468 469 /* Copy out the resulting structure back to PM. */ 470 return sys_datacopy(SELF, (vir_bytes) &r_usage, m->m_source, 471 m->m_lsys_vm_rusage.addr, (vir_bytes) sizeof(r_usage)); 472 } 473 474 /*===========================================================================* 475 * adjust_proc_refs * 476 *===========================================================================*/ 477 void adjust_proc_refs() 478 { 479 struct vmproc *vmp; 480 region_iter iter; 481 482 /* Fix up region parents. */ 483 for(vmp = vmproc; vmp < &vmproc[VMP_NR]; vmp++) { 484 struct vir_region *vr; 485 if(!(vmp->vm_flags & VMF_INUSE)) 486 continue; 487 region_start_iter_least(&vmp->vm_regions_avl, &iter); 488 while((vr = region_get_iter(&iter))) { 489 USE(vr, vr->parent = vmp;); 490 region_incr_iter(&iter); 491 } 492 } 493 } 494 495