1 /* 2 * Copyright (c) 2006 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vm/vm_vmspace.c,v 1.10 2007/01/14 07:59:09 dillon Exp $ 35 */ 36 #include "opt_ddb.h" 37 38 #include <sys/param.h> 39 #include <sys/kernel.h> 40 #include <sys/systm.h> 41 #include <sys/sysproto.h> 42 #include <sys/kern_syscall.h> 43 #include <sys/mman.h> 44 #include <sys/proc.h> 45 #include <sys/malloc.h> 46 #include <sys/sysctl.h> 47 #include <sys/vkernel.h> 48 #include <sys/vmspace.h> 49 #include <sys/spinlock2.h> 50 51 #include <vm/vm_extern.h> 52 #include <vm/pmap.h> 53 #include <ddb/ddb.h> 54 55 #include <machine/vmparam.h> 56 57 static struct vmspace_entry *vkernel_find_vmspace(struct vkernel_common *vc, 58 void *id); 59 static void vmspace_entry_delete(struct vmspace_entry *ve, 60 struct vkernel_common *vc); 61 62 static MALLOC_DEFINE(M_VKERNEL, "vkernel", "VKernel structures"); 63 64 /* 65 * vmspace_create (void *id, int type, void *data) 66 * 67 * Create a VMSPACE under the control of the caller with the specified id. 68 * An id of NULL cannot be used. The type and data fields must currently 69 * be 0. 70 * 71 * The vmspace starts out completely empty. Memory may be mapped into the 72 * VMSPACE with vmspace_mmap() and MAP_VPAGETABLE section(s) controlled 73 * with vmspace_mcontrol(). 74 */ 75 int 76 sys_vmspace_create(struct vmspace_create_args *uap) 77 { 78 struct vkernel_common *vc; 79 struct vmspace_entry *ve; 80 struct vkernel *vk; 81 82 if (vkernel_enable == 0) 83 return (EOPNOTSUPP); 84 85 /* 86 * Create a virtual kernel side-structure for the process if one 87 * does not exist. 88 */ 89 if ((vk = curproc->p_vkernel) == NULL) { 90 vk = kmalloc(sizeof(*vk), M_VKERNEL, M_WAITOK|M_ZERO); 91 vc = kmalloc(sizeof(*vc), M_VKERNEL, M_WAITOK|M_ZERO); 92 vc->vc_refs = 1; 93 spin_init(&vc->vc_spin); 94 RB_INIT(&vc->vc_root); 95 vk->vk_common = vc; 96 curproc->p_vkernel = vk; 97 } 98 vc = vk->vk_common; 99 100 /* 101 * Create a new VMSPACE 102 */ 103 if (vkernel_find_vmspace(vc, uap->id)) 104 return (EEXIST); 105 ve = kmalloc(sizeof(struct vmspace_entry), M_VKERNEL, M_WAITOK|M_ZERO); 106 ve->vmspace = vmspace_alloc(VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 107 ve->id = uap->id; 108 pmap_pinit2(vmspace_pmap(ve->vmspace)); 109 RB_INSERT(vmspace_rb_tree, &vc->vc_root, ve); 110 return (0); 111 } 112 113 /* 114 * vmspace_destroy (void *id) 115 * 116 * Destroy a VMSPACE. 117 */ 118 int 119 sys_vmspace_destroy(struct vmspace_destroy_args *uap) 120 { 121 struct vkernel_common *vc; 122 struct vmspace_entry *ve; 123 struct vkernel *vk; 124 125 if ((vk = curproc->p_vkernel) == NULL) 126 return (EINVAL); 127 vc = vk->vk_common; 128 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 129 return (ENOENT); 130 if (ve->refs) 131 return (EBUSY); 132 vmspace_entry_delete(ve, vc); 133 return(0); 134 } 135 136 /* 137 * vmspace_ctl (void *id, int cmd, struct trapframe *tframe, 138 * struct vextframe *vframe); 139 * 140 * Transfer control to a VMSPACE. Control is returned after the specified 141 * number of microseconds or if a page fault, signal, trap, or system call 142 * occurs. The context is updated as appropriate. 143 */ 144 int 145 sys_vmspace_ctl(struct vmspace_ctl_args *uap) 146 { 147 struct vkernel_common *vc; 148 struct vmspace_entry *ve; 149 struct vkernel *vk; 150 struct proc *p; 151 int framesz; 152 int error; 153 154 if ((vk = curproc->p_vkernel) == NULL) 155 return (EINVAL); 156 vc = vk->vk_common; 157 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 158 return (ENOENT); 159 160 /* 161 * Signal mailbox interlock 162 */ 163 if (curproc->p_flag & P_MAILBOX) { 164 curproc->p_flag &= ~P_MAILBOX; 165 return (EINTR); 166 } 167 168 switch(uap->cmd) { 169 case VMSPACE_CTL_RUN: 170 /* 171 * Save the caller's register context, swap VM spaces, and 172 * install the passed register context. Return with 173 * EJUSTRETURN so the syscall code doesn't adjust the context. 174 */ 175 p = curproc; 176 ++ve->refs; 177 framesz = sizeof(struct trapframe); 178 vk->vk_current = ve; 179 vk->vk_save_vmspace = p->p_vmspace; 180 vk->vk_user_trapframe = uap->tframe; 181 vk->vk_user_vextframe = uap->vframe; 182 bcopy(uap->sysmsg_frame, &vk->vk_save_trapframe, framesz); 183 bcopy(&curthread->td_tls, &vk->vk_save_vextframe.vx_tls, 184 sizeof(vk->vk_save_vextframe.vx_tls)); 185 error = copyin(uap->tframe, uap->sysmsg_frame, framesz); 186 if (error == 0) 187 error = copyin(&uap->vframe->vx_tls, &curthread->td_tls, sizeof(struct savetls)); 188 if (error == 0) 189 error = cpu_sanitize_frame(uap->sysmsg_frame); 190 if (error == 0) 191 error = cpu_sanitize_tls(&curthread->td_tls); 192 if (error) { 193 bcopy(&vk->vk_save_trapframe, uap->sysmsg_frame, framesz); 194 bcopy(&vk->vk_save_vextframe.vx_tls, &curthread->td_tls, 195 sizeof(vk->vk_save_vextframe.vx_tls)); 196 set_user_TLS(); 197 vk->vk_current = NULL; 198 vk->vk_save_vmspace = NULL; 199 --ve->refs; 200 } else { 201 pmap_deactivate(p); 202 p->p_vmspace = ve->vmspace; 203 pmap_activate(p); 204 set_user_TLS(); 205 set_vkernel_fp(uap->sysmsg_frame); 206 error = EJUSTRETURN; 207 } 208 break; 209 default: 210 error = EOPNOTSUPP; 211 break; 212 } 213 return(error); 214 } 215 216 /* 217 * vmspace_mmap(id, addr, len, prot, flags, fd, offset) 218 * 219 * map memory within a VMSPACE. This function is just like a normal mmap() 220 * but operates on the vmspace's memory map. Most callers use this to create 221 * a MAP_VPAGETABLE mapping. 222 */ 223 int 224 sys_vmspace_mmap(struct vmspace_mmap_args *uap) 225 { 226 struct vkernel_common *vc; 227 struct vmspace_entry *ve; 228 struct vkernel *vk; 229 int error; 230 231 if ((vk = curproc->p_vkernel) == NULL) 232 return (EINVAL); 233 vc = vk->vk_common; 234 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 235 return (ENOENT); 236 error = kern_mmap(ve->vmspace, uap->addr, uap->len, 237 uap->prot, uap->flags, 238 uap->fd, uap->offset, &uap->sysmsg_resultp); 239 return (error); 240 } 241 242 /* 243 * vmspace_munmap(id, addr, len) 244 * 245 * unmap memory within a VMSPACE. 246 */ 247 int 248 sys_vmspace_munmap(struct vmspace_munmap_args *uap) 249 { 250 struct vkernel_common *vc; 251 struct vmspace_entry *ve; 252 struct vkernel *vk; 253 vm_offset_t addr; 254 vm_size_t size, pageoff; 255 vm_map_t map; 256 257 if ((vk = curproc->p_vkernel) == NULL) 258 return (EINVAL); 259 vc = vk->vk_common; 260 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 261 return (ENOENT); 262 263 /* 264 * Copied from sys_munmap() 265 */ 266 addr = (vm_offset_t)uap->addr; 267 size = uap->len; 268 269 pageoff = (addr & PAGE_MASK); 270 addr -= pageoff; 271 size += pageoff; 272 size = (vm_size_t)round_page(size); 273 if (addr + size < addr) 274 return (EINVAL); 275 if (size == 0) 276 return (0); 277 278 if (VM_MAX_USER_ADDRESS > 0 && addr + size > VM_MAX_USER_ADDRESS) 279 return (EINVAL); 280 if (VM_MIN_USER_ADDRESS > 0 && addr < VM_MIN_USER_ADDRESS) 281 return (EINVAL); 282 map = &ve->vmspace->vm_map; 283 if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) 284 return (EINVAL); 285 vm_map_remove(map, addr, addr + size); 286 return (0); 287 } 288 289 /* 290 * vmspace_pread(id, buf, nbyte, flags, offset) 291 * 292 * Read data from a vmspace. The number of bytes read is returned or 293 * -1 if an unrecoverable error occured. If the number of bytes read is 294 * less then the request size, a page fault occured in the VMSPACE which 295 * the caller must resolve in order to proceed. 296 */ 297 int 298 sys_vmspace_pread(struct vmspace_pread_args *uap) 299 { 300 struct vkernel_common *vc; 301 struct vmspace_entry *ve; 302 struct vkernel *vk; 303 304 if ((vk = curproc->p_vkernel) == NULL) 305 return (EINVAL); 306 vc = vk->vk_common; 307 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 308 return (ENOENT); 309 return (EINVAL); 310 } 311 312 /* 313 * vmspace_pwrite(id, buf, nbyte, flags, offset) 314 * 315 * Write data to a vmspace. The number of bytes written is returned or 316 * -1 if an unrecoverable error occured. If the number of bytes written is 317 * less then the request size, a page fault occured in the VMSPACE which 318 * the caller must resolve in order to proceed. 319 */ 320 int 321 sys_vmspace_pwrite(struct vmspace_pwrite_args *uap) 322 { 323 struct vkernel_common *vc; 324 struct vmspace_entry *ve; 325 struct vkernel *vk; 326 327 if ((vk = curproc->p_vkernel) == NULL) 328 return (EINVAL); 329 vc = vk->vk_common; 330 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 331 return (ENOENT); 332 return (EINVAL); 333 } 334 335 /* 336 * vmspace_mcontrol(id, addr, len, behav, value) 337 * 338 * madvise/mcontrol support for a vmspace. 339 */ 340 int 341 sys_vmspace_mcontrol(struct vmspace_mcontrol_args *uap) 342 { 343 struct vkernel_common *vc; 344 struct vmspace_entry *ve; 345 struct vkernel *vk; 346 vm_offset_t start, end; 347 348 if ((vk = curproc->p_vkernel) == NULL) 349 return (EINVAL); 350 vc = vk->vk_common; 351 if ((ve = vkernel_find_vmspace(vc, uap->id)) == NULL) 352 return (ENOENT); 353 354 /* 355 * This code is basically copied from sys_mcontrol() 356 */ 357 if (uap->behav < 0 || uap->behav > MADV_CONTROL_END) 358 return (EINVAL); 359 360 if (VM_MAX_USER_ADDRESS > 0 && 361 ((vm_offset_t) uap->addr + uap->len) > VM_MAX_USER_ADDRESS) 362 return (EINVAL); 363 if (VM_MIN_USER_ADDRESS > 0 && uap->addr < VM_MIN_USER_ADDRESS) 364 return (EINVAL); 365 if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr) 366 return (EINVAL); 367 368 start = trunc_page((vm_offset_t) uap->addr); 369 end = round_page((vm_offset_t) uap->addr + uap->len); 370 371 return (vm_map_madvise(&ve->vmspace->vm_map, start, end, 372 uap->behav, uap->value)); 373 } 374 375 /* 376 * Red black tree functions 377 */ 378 static int rb_vmspace_compare(struct vmspace_entry *, struct vmspace_entry *); 379 RB_GENERATE(vmspace_rb_tree, vmspace_entry, rb_entry, rb_vmspace_compare); 380 381 /* a->start is address, and the only field has to be initialized */ 382 static int 383 rb_vmspace_compare(struct vmspace_entry *a, struct vmspace_entry *b) 384 { 385 if ((char *)a->id < (char *)b->id) 386 return(-1); 387 else if ((char *)a->id > (char *)b->id) 388 return(1); 389 return(0); 390 } 391 392 static 393 int 394 rb_vmspace_delete(struct vmspace_entry *ve, void *data) 395 { 396 struct vkernel_common *vc = data; 397 398 KKASSERT(ve->refs == 0); 399 vmspace_entry_delete(ve, vc); 400 return(0); 401 } 402 403 /* 404 * Remove a vmspace_entry from the RB tree and destroy it. We have to clean 405 * up the pmap, the vm_map, then destroy the vmspace. 406 */ 407 static 408 void 409 vmspace_entry_delete(struct vmspace_entry *ve, struct vkernel_common *vc) 410 { 411 RB_REMOVE(vmspace_rb_tree, &vc->vc_root, ve); 412 413 pmap_remove_pages(vmspace_pmap(ve->vmspace), 414 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 415 vm_map_remove(&ve->vmspace->vm_map, 416 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 417 vmspace_free(ve->vmspace); 418 kfree(ve, M_VKERNEL); 419 } 420 421 422 static 423 struct vmspace_entry * 424 vkernel_find_vmspace(struct vkernel_common *vc, void *id) 425 { 426 struct vmspace_entry *ve; 427 struct vmspace_entry key; 428 429 key.id = id; 430 ve = RB_FIND(vmspace_rb_tree, &vc->vc_root, &key); 431 return (ve); 432 } 433 434 /* 435 * Manage vkernel refs, used by the kernel when fork()ing or exit()ing 436 * a vkernel process. 437 */ 438 void 439 vkernel_inherit(struct proc *p1, struct proc *p2) 440 { 441 struct vkernel_common *vc; 442 struct vkernel *vk; 443 444 vk = p1->p_vkernel; 445 vc = vk->vk_common; 446 KKASSERT(vc->vc_refs > 0); 447 atomic_add_int(&vc->vc_refs, 1); 448 vk = kmalloc(sizeof(*vk), M_VKERNEL, M_WAITOK|M_ZERO); 449 p2->p_vkernel = vk; 450 vk->vk_common = vc; 451 } 452 453 void 454 vkernel_exit(struct proc *p) 455 { 456 struct vkernel_common *vc; 457 struct vmspace_entry *ve; 458 struct vkernel *vk; 459 int freeme = 0; 460 461 vk = p->p_vkernel; 462 p->p_vkernel = NULL; 463 vc = vk->vk_common; 464 vk->vk_common = NULL; 465 466 /* 467 * Restore the original VM context if we are killed while running 468 * a different one. 469 * 470 * This isn't supposed to happen. What is supposed to happen is 471 * that the process should enter vkernel_trap() before the handling 472 * the signal. 473 */ 474 if ((ve = vk->vk_current) != NULL) { 475 kprintf("Killed with active VC, notify kernel list\n"); 476 #ifdef DDB 477 db_print_backtrace(); 478 #endif 479 vk->vk_current = NULL; 480 pmap_deactivate(p); 481 p->p_vmspace = vk->vk_save_vmspace; 482 pmap_activate(p); 483 vk->vk_save_vmspace = NULL; 484 KKASSERT(ve->refs > 0); 485 --ve->refs; 486 } 487 488 /* 489 * Dereference the common area 490 */ 491 KKASSERT(vc->vc_refs > 0); 492 spin_lock_wr(&vc->vc_spin); 493 if (--vc->vc_refs == 0) 494 freeme = 1; 495 spin_unlock_wr(&vc->vc_spin); 496 497 if (freeme) { 498 RB_SCAN(vmspace_rb_tree, &vc->vc_root, NULL, 499 rb_vmspace_delete, vc); 500 kfree(vc, M_VKERNEL); 501 } 502 kfree(vk, M_VKERNEL); 503 } 504 505 /* 506 * A VM space under virtual kernel control trapped out or made a system call 507 * or otherwise needs to return control to the virtual kernel context. 508 */ 509 int 510 vkernel_trap(struct proc *p, struct trapframe *frame) 511 { 512 struct vmspace_entry *ve; 513 struct vkernel *vk; 514 int error; 515 516 /* 517 * Which vmspace entry was running? 518 */ 519 vk = p->p_vkernel; 520 ve = vk->vk_current; 521 vk->vk_current = NULL; 522 KKASSERT(ve != NULL); 523 524 /* 525 * Switch the process context back to the virtual kernel's VM space. 526 */ 527 pmap_deactivate(p); 528 p->p_vmspace = vk->vk_save_vmspace; 529 pmap_activate(p); 530 vk->vk_save_vmspace = NULL; 531 KKASSERT(ve->refs > 0); 532 --ve->refs; 533 534 /* 535 * Copy the emulated process frame to the virtual kernel process. 536 * The emulated process cannot change TLS descriptors so don't 537 * bother saving them, we already have a copy. 538 * 539 * Restore the virtual kernel's saved context so the virtual kernel 540 * process can resume. 541 */ 542 error = copyout(frame, vk->vk_user_trapframe, sizeof(*frame)); 543 bcopy(&vk->vk_save_trapframe, frame, sizeof(*frame)); 544 bcopy(&vk->vk_save_vextframe.vx_tls, &curthread->td_tls, 545 sizeof(vk->vk_save_vextframe.vx_tls)); 546 set_user_TLS(); 547 return(error); 548 } 549 550