1 2 /* This file contains some utility routines for VM. */ 3 4 #define _SYSTEM 1 5 6 #include <minix/callnr.h> 7 #include <minix/com.h> 8 #include <minix/config.h> 9 #include <minix/const.h> 10 #include <minix/ds.h> 11 #include <minix/endpoint.h> 12 #include <minix/minlib.h> 13 #include <minix/type.h> 14 #include <minix/ipc.h> 15 #include <minix/sysutil.h> 16 #include <minix/syslib.h> 17 #include <minix/type.h> 18 #include <minix/bitmap.h> 19 #include <minix/rs.h> 20 #include <string.h> 21 #include <errno.h> 22 #include <env.h> 23 #include <unistd.h> 24 #include <assert.h> 25 #include <sys/cdefs.h> 26 #include <sys/param.h> 27 #include <sys/mman.h> 28 #include <sys/resource.h> 29 30 #include "proto.h" 31 #include "glo.h" 32 #include "util.h" 33 #include "region.h" 34 #include "sanitycheck.h" 35 36 #include <machine/archtypes.h> 37 #include "kernel/const.h" 38 #include "kernel/config.h" 39 #include "kernel/type.h" 40 #include "kernel/proc.h" 41 42 /*===========================================================================* 43 * get_mem_chunks * 44 *===========================================================================*/ 45 void get_mem_chunks( 46 struct memory *mem_chunks) /* store mem chunks here */ 47 { 48 /* Initialize the free memory list from the kernel-provided memory map. Translate 49 * the byte offsets and sizes in this list to clicks, properly truncated. 50 */ 51 phys_bytes base, size, limit; 52 int i; 53 struct memory *memp; 54 55 /* Initialize everything to zero. */ 56 memset(mem_chunks, 0, NR_MEMS*sizeof(*mem_chunks)); 57 58 /* Obtain and parse memory from kernel environment. */ 59 /* XXX Any memory chunk in excess of NR_MEMS is silently ignored. */ 60 for(i = 0; i < MIN(MAXMEMMAP, NR_MEMS); i++) { 61 mem_chunks[i].base = kernel_boot_info.memmap[i].mm_base_addr; 62 mem_chunks[i].size = kernel_boot_info.memmap[i].mm_length; 63 } 64 65 /* Round physical memory to clicks. Round start up, round end down. */ 66 for (i = 0; i < NR_MEMS; i++) { 67 memp = &mem_chunks[i]; /* next mem chunk is stored here */ 68 base = mem_chunks[i].base; 69 size = mem_chunks[i].size; 70 limit = base + size; 71 base = (phys_bytes) (CLICK_CEIL(base)); 72 limit = (phys_bytes) (CLICK_FLOOR(limit)); 73 if (limit <= base) { 74 memp->base = memp->size = 0; 75 } else { 76 memp->base = base >> CLICK_SHIFT; 77 memp->size = (limit - base) >> CLICK_SHIFT; 78 } 79 } 80 } 81 82 /*===========================================================================* 83 * vm_isokendpt * 84 *===========================================================================*/ 85 int vm_isokendpt(endpoint_t endpoint, int *procn) 86 { 87 *procn = _ENDPOINT_P(endpoint); 88 if(*procn < 0 || *procn >= NR_PROCS) 89 return EINVAL; 90 if(*procn >= 0 && endpoint != vmproc[*procn].vm_endpoint) 91 return EDEADEPT; 92 if(*procn >= 0 && !(vmproc[*procn].vm_flags & VMF_INUSE)) 93 return EDEADEPT; 94 return OK; 95 } 96 97 98 /*===========================================================================* 99 * do_info * 100 *===========================================================================*/ 101 int do_info(message *m) 102 { 103 struct vm_stats_info vsi; 104 struct vm_usage_info vui; 105 static struct vm_region_info vri[MAX_VRI_COUNT]; 106 struct vmproc *vmp; 107 vir_bytes addr, size, next, ptr; 108 int r, pr, dummy, count, free_pages, largest_contig; 109 110 if (vm_isokendpt(m->m_source, &pr) != OK) 111 return EINVAL; 112 vmp = &vmproc[pr]; 113 114 ptr = (vir_bytes) m->m_lsys_vm_info.ptr; 115 116 switch(m->m_lsys_vm_info.what) { 117 case VMIW_STATS: 118 vsi.vsi_pagesize = VM_PAGE_SIZE; 119 vsi.vsi_total = total_pages; 120 memstats(&dummy, &free_pages, &largest_contig); 121 vsi.vsi_free = free_pages; 122 vsi.vsi_largest = largest_contig; 123 124 get_stats_info(&vsi); 125 126 addr = (vir_bytes) &vsi; 127 size = sizeof(vsi); 128 129 break; 130 131 case VMIW_USAGE: 132 if(m->m_lsys_vm_info.ep < 0) 133 get_usage_info_kernel(&vui); 134 else if (vm_isokendpt(m->m_lsys_vm_info.ep, &pr) != OK) 135 return EINVAL; 136 else get_usage_info(&vmproc[pr], &vui); 137 138 addr = (vir_bytes) &vui; 139 size = sizeof(vui); 140 141 break; 142 143 case VMIW_REGION: 144 if(m->m_lsys_vm_info.ep == SELF) { 145 m->m_lsys_vm_info.ep = m->m_source; 146 } 147 if (vm_isokendpt(m->m_lsys_vm_info.ep, &pr) != OK) 148 return EINVAL; 149 150 count = MIN(m->m_lsys_vm_info.count, MAX_VRI_COUNT); 151 next = m->m_lsys_vm_info.next; 152 153 count = get_region_info(&vmproc[pr], vri, count, &next); 154 155 m->m_lsys_vm_info.count = count; 156 m->m_lsys_vm_info.next = next; 157 158 addr = (vir_bytes) vri; 159 size = sizeof(vri[0]) * count; 160 161 break; 162 163 default: 164 return EINVAL; 165 } 166 167 if (size == 0) 168 return OK; 169 170 /* Make sure that no page faults can occur while copying out. A page 171 * fault would cause the kernel to send a notify to us, while we would 172 * be waiting for the result of the copy system call, resulting in a 173 * deadlock. Note that no memory mapping can be undone without the 174 * involvement of VM, so we are safe until we're done. 175 */ 176 r = handle_memory_once(vmp, ptr, size, 1 /*wrflag*/); 177 if (r != OK) return r; 178 179 /* Now that we know the copy out will succeed, perform the actual copy 180 * operation. 181 */ 182 return sys_datacopy(SELF, addr, 183 (vir_bytes) vmp->vm_endpoint, ptr, size); 184 } 185 186 /*===========================================================================* 187 * swap_proc_slot * 188 *===========================================================================*/ 189 int swap_proc_slot(struct vmproc *src_vmp, struct vmproc *dst_vmp) 190 { 191 struct vmproc orig_src_vmproc, orig_dst_vmproc; 192 193 #if LU_DEBUG 194 printf("VM: swap_proc: swapping %d (%d) and %d (%d)\n", 195 src_vmp->vm_endpoint, src_vmp->vm_slot, 196 dst_vmp->vm_endpoint, dst_vmp->vm_slot); 197 #endif 198 199 /* Save existing data. */ 200 orig_src_vmproc = *src_vmp; 201 orig_dst_vmproc = *dst_vmp; 202 203 /* Swap slots. */ 204 *src_vmp = orig_dst_vmproc; 205 *dst_vmp = orig_src_vmproc; 206 207 /* Preserve endpoints and slot numbers. */ 208 src_vmp->vm_endpoint = orig_src_vmproc.vm_endpoint; 209 src_vmp->vm_slot = orig_src_vmproc.vm_slot; 210 dst_vmp->vm_endpoint = orig_dst_vmproc.vm_endpoint; 211 dst_vmp->vm_slot = orig_dst_vmproc.vm_slot; 212 213 #if LU_DEBUG 214 printf("VM: swap_proc: swapped %d (%d) and %d (%d)\n", 215 src_vmp->vm_endpoint, src_vmp->vm_slot, 216 dst_vmp->vm_endpoint, dst_vmp->vm_slot); 217 #endif 218 219 return OK; 220 } 221 222 /* 223 * Transfer memory mapped regions, using CoW sharing, from 'src_vmp' to 224 * 'dst_vmp', for the source process's address range of 'start_addr' 225 * (inclusive) to 'end_addr' (exclusive). Return OK or an error code. 226 * If the regions seem to have been transferred already, do nothing. 227 */ 228 static int 229 transfer_mmap_regions(struct vmproc *src_vmp, struct vmproc *dst_vmp, 230 vir_bytes start_addr, vir_bytes end_addr) 231 { 232 struct vir_region *start_vr, *check_vr, *end_vr; 233 234 start_vr = region_search(&src_vmp->vm_regions_avl, start_addr, 235 AVL_GREATER_EQUAL); 236 237 if (start_vr == NULL || start_vr->vaddr >= end_addr) 238 return OK; /* nothing to do */ 239 240 /* In the case of multicomponent live update that includes VM, this 241 * function may be called for the same process more than once, for the 242 * sake of keeping code paths as little divergent as possible while at 243 * the same time ensuring that the regions are copied early enough. 244 * 245 * To compensate for these multiple calls, we perform a very simple 246 * check here to see if the region to transfer is already present in 247 * the target process. If so, we can safely skip copying the regions 248 * again, because there is no other possible explanation for the 249 * region being present already. Things would go horribly wrong if we 250 * tried copying anyway, but this check is not good enough to detect 251 * all such problems, since we do a check on the base address only. 252 */ 253 check_vr = region_search(&dst_vmp->vm_regions_avl, start_vr->vaddr, 254 AVL_EQUAL); 255 if (check_vr != NULL) { 256 #if LU_DEBUG 257 printf("VM: transfer_mmap_regions: skipping transfer from " 258 "%d to %d (0x%lx already present)\n", 259 src_vmp->vm_endpoint, dst_vmp->vm_endpoint, 260 start_vr->vaddr); 261 #endif 262 return OK; 263 } 264 265 end_vr = region_search(&src_vmp->vm_regions_avl, end_addr, AVL_LESS); 266 assert(end_vr != NULL); 267 assert(start_vr->vaddr <= end_vr->vaddr); 268 269 #if LU_DEBUG 270 printf("VM: transfer_mmap_regions: transferring memory mapped regions " 271 "from %d to %d (0x%lx to 0x%lx)\n", src_vmp->vm_endpoint, 272 dst_vmp->vm_endpoint, start_vr->vaddr, end_vr->vaddr); 273 #endif 274 275 return map_proc_copy_range(dst_vmp, src_vmp, start_vr, end_vr); 276 } 277 278 /* 279 * Create copy-on-write mappings in process 'dst_vmp' for all memory-mapped 280 * regions present in 'src_vmp'. Return OK on success, or an error otherwise. 281 * In the case of failure, successfully created mappings are not undone. 282 */ 283 int 284 map_proc_dyn_data(struct vmproc *src_vmp, struct vmproc *dst_vmp) 285 { 286 int r; 287 288 #if LU_DEBUG 289 printf("VM: mapping dynamic data from %d to %d\n", 290 src_vmp->vm_endpoint, dst_vmp->vm_endpoint); 291 #endif 292 293 /* Transfer memory mapped regions now. To sandbox the new instance and 294 * prevent state corruption on rollback, we share all the regions 295 * between the two instances as COW. 296 */ 297 r = transfer_mmap_regions(src_vmp, dst_vmp, VM_MMAPBASE, VM_MMAPTOP); 298 299 /* If the stack is not mapped at the VM_DATATOP, there might be some 300 * more regions hiding above the stack. We also have to transfer 301 * those. 302 */ 303 if (r == OK && VM_STACKTOP < VM_DATATOP) 304 r = transfer_mmap_regions(src_vmp, dst_vmp, VM_STACKTOP, 305 VM_DATATOP); 306 307 return r; 308 } 309 310 /*===========================================================================* 311 * swap_proc_dyn_data * 312 *===========================================================================*/ 313 int swap_proc_dyn_data(struct vmproc *src_vmp, struct vmproc *dst_vmp, 314 int sys_upd_flags) 315 { 316 int is_vm; 317 int r; 318 319 is_vm = (dst_vmp->vm_endpoint == VM_PROC_NR); 320 321 /* For VM, transfer memory mapped regions first. */ 322 if(is_vm) { 323 #if LU_DEBUG 324 printf("VM: swap_proc_dyn_data: tranferring memory mapped regions from old (%d) to new VM (%d)\n", 325 src_vmp->vm_endpoint, dst_vmp->vm_endpoint); 326 #endif 327 r = pt_map_in_range(src_vmp, dst_vmp, VM_OWN_HEAPBASE, VM_OWN_MMAPTOP); 328 if(r != OK) { 329 printf("swap_proc_dyn_data: pt_map_in_range failed\n"); 330 return r; 331 } 332 r = pt_map_in_range(src_vmp, dst_vmp, VM_STACKTOP, VM_DATATOP); 333 if(r != OK) { 334 printf("swap_proc_dyn_data: pt_map_in_range failed\n"); 335 return r; 336 } 337 338 } 339 340 #if LU_DEBUG 341 printf("VM: swap_proc_dyn_data: swapping regions' parents for %d (%d) and %d (%d)\n", 342 src_vmp->vm_endpoint, src_vmp->vm_slot, 343 dst_vmp->vm_endpoint, dst_vmp->vm_slot); 344 #endif 345 346 /* Swap vir_regions' parents. */ 347 map_setparent(src_vmp); 348 map_setparent(dst_vmp); 349 350 /* Don't transfer mmapped regions if not required. */ 351 if(is_vm || (sys_upd_flags & (SF_VM_ROLLBACK|SF_VM_NOMMAP))) { 352 return OK; 353 } 354 355 /* Make sure regions are consistent. */ 356 assert(region_search_root(&src_vmp->vm_regions_avl) && region_search_root(&dst_vmp->vm_regions_avl)); 357 358 /* Source and destination are intentionally swapped here! */ 359 return map_proc_dyn_data(dst_vmp, src_vmp); 360 } 361 362 void *mmap(void *addr, size_t len, int f, int f2, int f3, off_t o) 363 { 364 void *ret; 365 phys_bytes p; 366 367 assert(!addr); 368 assert(!(len % VM_PAGE_SIZE)); 369 370 ret = vm_allocpages(&p, VMP_SLAB, len/VM_PAGE_SIZE); 371 372 if(!ret) return MAP_FAILED; 373 memset(ret, 0, len); 374 return ret; 375 } 376 377 int munmap(void * addr, size_t len) 378 { 379 vm_freepages((vir_bytes) addr, roundup(len, VM_PAGE_SIZE)/VM_PAGE_SIZE); 380 return 0; 381 } 382 383 #ifdef __weak_alias 384 __weak_alias(brk, _brk) 385 #endif 386 int _brk(void *addr) 387 { 388 /* brk is a special case function to allow vm itself to 389 allocate memory in it's own (cacheable) HEAP */ 390 vir_bytes target = roundup((vir_bytes)addr, VM_PAGE_SIZE), v; 391 extern char _end; 392 extern char *_brksize; 393 static vir_bytes prevbrk = (vir_bytes) &_end; 394 struct vmproc *vmprocess = &vmproc[VM_PROC_NR]; 395 396 for(v = roundup(prevbrk, VM_PAGE_SIZE); v < target; 397 v += VM_PAGE_SIZE) { 398 phys_bytes mem, newpage = alloc_mem(1, 0); 399 if(newpage == NO_MEM) return -1; 400 mem = CLICK2ABS(newpage); 401 if(pt_writemap(vmprocess, &vmprocess->vm_pt, 402 v, mem, VM_PAGE_SIZE, 403 ARCH_VM_PTE_PRESENT 404 | ARCH_VM_PTE_USER 405 | ARCH_VM_PTE_RW 406 #if defined(__arm__) 407 | ARM_VM_PTE_CACHED 408 #endif 409 , 0) != OK) { 410 free_mem(newpage, 1); 411 return -1; 412 } 413 prevbrk = v + VM_PAGE_SIZE; 414 } 415 416 _brksize = (char *) addr; 417 418 if(sys_vmctl(SELF, VMCTL_FLUSHTLB, 0) != OK) 419 panic("flushtlb failed"); 420 421 return 0; 422 } 423 424 /*===========================================================================* 425 * do_getrusage * 426 *===========================================================================*/ 427 int do_getrusage(message *m) 428 { 429 int res, slot; 430 struct vmproc *vmp; 431 struct rusage r_usage; 432 433 /* If the request is not from PM, it is coming directly from userland. 434 * This is an obsolete construction. In the future, userland programs 435 * should no longer be allowed to call vm_getrusage(2) directly at all. 436 * For backward compatibility, we simply return success for now. 437 */ 438 if (m->m_source != PM_PROC_NR) 439 return OK; 440 441 /* Get the process for which resource usage is requested. */ 442 if ((res = vm_isokendpt(m->m_lsys_vm_rusage.endpt, &slot)) != OK) 443 return ESRCH; 444 445 vmp = &vmproc[slot]; 446 447 /* We are going to change only a few fields, so copy in the rusage 448 * structure first. The structure is still in PM's address space at 449 * this point, so use the message source. 450 */ 451 if ((res = sys_datacopy(m->m_source, m->m_lsys_vm_rusage.addr, 452 SELF, (vir_bytes) &r_usage, (vir_bytes) sizeof(r_usage))) < 0) 453 return res; 454 455 if (!m->m_lsys_vm_rusage.children) { 456 r_usage.ru_maxrss = vmp->vm_total_max / 1024L; /* unit is KB */ 457 r_usage.ru_minflt = vmp->vm_minor_page_fault; 458 r_usage.ru_majflt = vmp->vm_major_page_fault; 459 } else { 460 /* XXX TODO: return the fields for terminated, waited-for 461 * children of the given process. We currently do not have this 462 * information! In the future, rather than teaching VM about 463 * the process hierarchy, PM should probably tell VM at process 464 * exit time which other process should inherit its resource 465 * usage fields. For now, we assume PM clears the fields before 466 * making this call, so we don't zero the fields explicitly. 467 */ 468 } 469 470 /* Copy out the resulting structure back to PM. */ 471 return sys_datacopy(SELF, (vir_bytes) &r_usage, m->m_source, 472 m->m_lsys_vm_rusage.addr, (vir_bytes) sizeof(r_usage)); 473 } 474 475 /*===========================================================================* 476 * adjust_proc_refs * 477 *===========================================================================*/ 478 void adjust_proc_refs() 479 { 480 struct vmproc *vmp; 481 region_iter iter; 482 483 /* Fix up region parents. */ 484 for(vmp = vmproc; vmp < &vmproc[VMP_NR]; vmp++) { 485 struct vir_region *vr; 486 if(!(vmp->vm_flags & VMF_INUSE)) 487 continue; 488 region_start_iter_least(&vmp->vm_regions_avl, &iter); 489 while((vr = region_get_iter(&iter))) { 490 USE(vr, vr->parent = vmp;); 491 region_incr_iter(&iter); 492 } 493 } 494 } 495 496