1 /* 2 * Copyright (c) 1994 Adam Glass and Charles Hannum. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. All advertising materials mentioning features or use of this software 13 * must display the following acknowledgement: 14 * This product includes software developed by Adam Glass and Charles 15 * Hannum. 16 * 4. The names of the authors may not be used to endorse or promote products 17 * derived from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "opt_sysvipc.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/sysmsg.h> 36 #include <sys/kernel.h> 37 #include <sys/sysctl.h> 38 #include <sys/shm.h> 39 #include <sys/proc.h> 40 #include <sys/malloc.h> 41 #include <sys/mman.h> 42 #include <sys/stat.h> 43 #include <sys/sysent.h> 44 #include <sys/jail.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_param.h> 48 #include <sys/lock.h> 49 #include <vm/pmap.h> 50 #include <vm/vm_object.h> 51 #include <vm/vm_map.h> 52 #include <vm/vm_page.h> 53 #include <vm/vm_pager.h> 54 55 static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments"); 56 57 static int shmget_allocate_segment(struct proc *p, struct sysmsg *sysmsg, 58 const struct shmget_args *uap, int mode); 59 static int shmget_existing(struct proc *p, struct sysmsg *sysmsg, 60 const struct shmget_args *uap, int mode, int segnum); 61 62 #define SHMSEG_FREE 0x0200 63 #define SHMSEG_REMOVED 0x0400 64 #define SHMSEG_ALLOCATED 0x0800 65 #define SHMSEG_WANTED 0x1000 66 67 static int shm_last_free, shm_committed, shmalloced; 68 int shm_nused; 69 static struct shmid_ds *shmsegs; 70 static struct lwkt_token shm_token = LWKT_TOKEN_INITIALIZER(shm_token); 71 72 struct shm_handle { 73 /* vm_offset_t kva; */ 74 vm_object_t shm_object; 75 }; 76 77 struct shmmap_state { 78 vm_offset_t va; 79 int shmid; 80 int reserved; 81 }; 82 83 static void shm_deallocate_segment (struct shmid_ds *); 84 static int shm_find_segment_by_key (key_t); 85 static struct shmid_ds *shm_find_segment_by_shmid (int); 86 static int shm_delete_mapping (struct vmspace *vm, struct shmmap_state *); 87 static void shmrealloc (void); 88 static void shminit (void *); 89 90 /* 91 * Tuneable values 92 */ 93 #ifndef SHMMIN 94 #define SHMMIN 1 95 #endif 96 #ifndef SHMMNI 97 #define SHMMNI 512 98 #endif 99 #ifndef SHMSEG 100 #define SHMSEG 1024 101 #endif 102 103 struct shminfo shminfo = { 104 0, 105 SHMMIN, 106 SHMMNI, 107 SHMSEG, 108 0 109 }; 110 111 /* 112 * allow-removed Allow a shared memory segment to be attached by its shmid 113 * even after it has been deleted, as long as it was still 114 * being referenced by someone. This is a trick used by 115 * chrome and other applications to avoid leaving shm 116 * segments hanging around after the application is killed 117 * or seg-faults unexpectedly. 118 * 119 * use-phys Shared memory segments are to use physical memory by 120 * default, which may allow the kernel to better-optimize 121 * the pmap and reduce overhead. The pages are effectively 122 * wired. 123 */ 124 static int shm_allow_removed = 1; 125 static int shm_use_phys = 1; 126 127 TUNABLE_LONG("kern.ipc.shmmin", &shminfo.shmmin); 128 TUNABLE_LONG("kern.ipc.shmmni", &shminfo.shmmni); 129 TUNABLE_LONG("kern.ipc.shmseg", &shminfo.shmseg); 130 TUNABLE_LONG("kern.ipc.shmmaxpgs", &shminfo.shmall); 131 TUNABLE_INT("kern.ipc.shm_use_phys", &shm_use_phys); 132 133 SYSCTL_LONG(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0, 134 "Max shared memory segment size"); 135 SYSCTL_LONG(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0, 136 "Min shared memory segment size"); 137 SYSCTL_LONG(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RD, &shminfo.shmmni, 0, 138 "Max number of shared memory identifiers"); 139 SYSCTL_LONG(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RW, &shminfo.shmseg, 0, 140 "Max shared memory segments per process"); 141 SYSCTL_LONG(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0, 142 "Max pages of shared memory"); 143 SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW, &shm_use_phys, 0, 144 "Use phys pager allocation instead of swap pager allocation"); 145 SYSCTL_INT(_kern_ipc, OID_AUTO, shm_allow_removed, CTLFLAG_RW, 146 &shm_allow_removed, 0, 147 "Enable/Disable attachment to attached segments marked for removal"); 148 149 static int 150 shm_find_segment_by_key(key_t key) 151 { 152 int i; 153 154 for (i = 0; i < shmalloced; i++) { 155 if ((shmsegs[i].shm_perm.mode & SHMSEG_ALLOCATED) && 156 shmsegs[i].shm_perm.key == key) 157 return i; 158 } 159 return -1; 160 } 161 162 static struct shmid_ds * 163 shm_find_segment_by_shmid(int shmid) 164 { 165 int segnum; 166 struct shmid_ds *shmseg; 167 168 segnum = IPCID_TO_IX(shmid); 169 if (segnum < 0 || segnum >= shmalloced) 170 return NULL; 171 shmseg = &shmsegs[segnum]; 172 if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0 || 173 (!shm_allow_removed && 174 (shmseg->shm_perm.mode & SHMSEG_REMOVED) != 0) || 175 shmseg->shm_perm.seq != IPCID_TO_SEQ(shmid)) { 176 return NULL; 177 } 178 return shmseg; 179 } 180 181 static void 182 shm_deallocate_segment(struct shmid_ds *shmseg) 183 { 184 struct shm_handle *shm_handle; 185 size_t size; 186 187 shm_handle = shmseg->shm_internal; 188 vm_object_deallocate(shm_handle->shm_object); 189 kfree((caddr_t)shm_handle, M_SHM); 190 shmseg->shm_internal = NULL; 191 size = round_page(shmseg->shm_segsz); 192 shm_committed -= btoc(size); 193 shm_nused--; 194 shmseg->shm_perm.mode = SHMSEG_FREE; 195 } 196 197 static int 198 shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s) 199 { 200 struct shmid_ds *shmseg; 201 int segnum, result; 202 size_t size; 203 204 segnum = IPCID_TO_IX(shmmap_s->shmid); 205 shmseg = &shmsegs[segnum]; 206 size = round_page(shmseg->shm_segsz); 207 result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size); 208 if (result != KERN_SUCCESS) 209 return EINVAL; 210 shmmap_s->shmid = -1; 211 shmseg->shm_dtime = time_second; 212 if ((--shmseg->shm_nattch <= 0) && 213 (shmseg->shm_perm.mode & SHMSEG_REMOVED)) { 214 shm_deallocate_segment(shmseg); 215 shm_last_free = segnum; 216 } 217 return 0; 218 } 219 220 /* 221 * MPALMOSTSAFE 222 */ 223 int 224 sys_shmdt(struct sysmsg *sysmsg, const struct shmdt_args *uap) 225 { 226 struct thread *td = curthread; 227 struct proc *p = td->td_proc; 228 struct shmmap_state *shmmap_s; 229 struct prison *pr = p->p_ucred->cr_prison; 230 231 long i; 232 int error; 233 234 if (pr && !PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_SYS_SYSVIPC)) 235 return (ENOSYS); 236 237 lwkt_gettoken(&shm_token); 238 shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; 239 if (shmmap_s == NULL) { 240 error = EINVAL; 241 goto done; 242 } 243 for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) { 244 if (shmmap_s->shmid != -1 && 245 shmmap_s->va == (vm_offset_t)uap->shmaddr) 246 break; 247 } 248 if (i == shminfo.shmseg) 249 error = EINVAL; 250 else 251 error = shm_delete_mapping(p->p_vmspace, shmmap_s); 252 done: 253 lwkt_reltoken(&shm_token); 254 255 return (error); 256 } 257 258 /* 259 * MPALMOSTSAFE 260 */ 261 int 262 sys_shmat(struct sysmsg *sysmsg, const struct shmat_args *uap) 263 { 264 struct thread *td = curthread; 265 struct proc *p = td->td_proc; 266 struct prison *pr = p->p_ucred->cr_prison; 267 int error, flags; 268 long i; 269 struct shmid_ds *shmseg; 270 struct shmmap_state *shmmap_s = NULL; 271 struct shm_handle *shm_handle; 272 vm_offset_t attach_va; 273 vm_prot_t prot; 274 vm_size_t size; 275 vm_size_t align; 276 int rv; 277 278 if (pr && !PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_SYS_SYSVIPC)) 279 return (ENOSYS); 280 281 lwkt_gettoken(&shm_token); 282 again: 283 shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; 284 if (shmmap_s == NULL) { 285 size = shminfo.shmseg * sizeof(struct shmmap_state); 286 shmmap_s = kmalloc(size, M_SHM, M_WAITOK); 287 for (i = 0; i < shminfo.shmseg; i++) { 288 shmmap_s[i].shmid = -1; 289 shmmap_s[i].reserved = 0; 290 } 291 if (p->p_vmspace->vm_shm != NULL) { 292 kfree(shmmap_s, M_SHM); 293 goto again; 294 } 295 p->p_vmspace->vm_shm = (caddr_t)shmmap_s; 296 } 297 shmseg = shm_find_segment_by_shmid(uap->shmid); 298 if (shmseg == NULL) { 299 error = EINVAL; 300 goto done; 301 } 302 error = ipcperm(p, &shmseg->shm_perm, 303 (uap->shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W); 304 if (error) 305 goto done; 306 307 /* 308 * Find a free element and mark reserved. This fixes races 309 * against concurrent allocations due to the token being 310 * interrupted by blocking operations. The shmmap_s reservation 311 * will be cleared upon completion or error. 312 */ 313 for (i = 0; i < shminfo.shmseg; i++) { 314 if (shmmap_s->shmid == -1 && shmmap_s->reserved == 0) { 315 shmmap_s->reserved = 1; 316 break; 317 } 318 shmmap_s++; 319 } 320 if (i >= shminfo.shmseg) { 321 error = EMFILE; 322 goto done; 323 } 324 size = round_page(shmseg->shm_segsz); 325 #ifdef VM_PROT_READ_IS_EXEC 326 prot = VM_PROT_READ | VM_PROT_EXECUTE; 327 #else 328 prot = VM_PROT_READ; 329 #endif 330 if ((uap->shmflg & SHM_RDONLY) == 0) 331 prot |= VM_PROT_WRITE; 332 flags = MAP_ANON | MAP_SHARED; 333 if (uap->shmaddr) { 334 flags |= MAP_FIXED; 335 if (uap->shmflg & SHM_RND) { 336 attach_va = 337 rounddown2((vm_offset_t)uap->shmaddr, SHMLBA); 338 } else if (((vm_offset_t)uap->shmaddr & (SHMLBA-1)) == 0) { 339 attach_va = (vm_offset_t)uap->shmaddr; 340 } else { 341 error = EINVAL; 342 shmmap_s->reserved = 0; 343 goto done; 344 } 345 } else { 346 /* 347 * This is just a hint to vm_map_find() about where to put it. 348 */ 349 attach_va = round_page((vm_offset_t)p->p_vmspace->vm_taddr + 350 maxtsiz + maxdsiz); 351 } 352 353 /* 354 * Handle alignment. For large memory maps it is possible 355 * that the MMU can optimize the page table so align anything 356 * that is a multiple of SEG_SIZE to SEG_SIZE. 357 */ 358 if ((flags & MAP_FIXED) == 0 && (size & SEG_MASK) == 0) 359 align = SEG_SIZE; 360 else 361 align = PAGE_SIZE; 362 363 shm_handle = shmseg->shm_internal; 364 vm_object_hold(shm_handle->shm_object); 365 vm_object_reference_locked(shm_handle->shm_object); 366 rv = vm_map_find(&p->p_vmspace->vm_map, 367 shm_handle->shm_object, NULL, 368 0, &attach_va, size, 369 align, 370 ((flags & MAP_FIXED) ? 0 : 1), 371 VM_MAPTYPE_NORMAL, VM_SUBSYS_SHMEM, 372 prot, prot, 0); 373 vm_object_drop(shm_handle->shm_object); 374 if (rv != KERN_SUCCESS) { 375 vm_object_deallocate(shm_handle->shm_object); 376 shmmap_s->reserved = 0; 377 error = ENOMEM; 378 goto done; 379 } 380 vm_map_inherit(&p->p_vmspace->vm_map, 381 attach_va, attach_va + size, VM_INHERIT_SHARE); 382 383 KKASSERT(shmmap_s->shmid == -1); 384 shmmap_s->va = attach_va; 385 shmmap_s->shmid = uap->shmid; 386 shmmap_s->reserved = 0; 387 shmseg->shm_lpid = p->p_pid; 388 shmseg->shm_atime = time_second; 389 shmseg->shm_nattch++; 390 sysmsg->sysmsg_resultp = (void *)attach_va; 391 error = 0; 392 done: 393 lwkt_reltoken(&shm_token); 394 395 return error; 396 } 397 398 /* 399 * MPALMOSTSAFE 400 */ 401 int 402 sys_shmctl(struct sysmsg *sysmsg, const struct shmctl_args *uap) 403 { 404 struct thread *td = curthread; 405 struct proc *p = td->td_proc; 406 struct prison *pr = p->p_ucred->cr_prison; 407 int error; 408 struct shmid_ds inbuf; 409 struct shmid_ds *shmseg; 410 411 if (pr && !PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_SYS_SYSVIPC)) 412 return (ENOSYS); 413 414 lwkt_gettoken(&shm_token); 415 shmseg = shm_find_segment_by_shmid(uap->shmid); 416 if (shmseg == NULL) { 417 error = EINVAL; 418 goto done; 419 } 420 421 switch (uap->cmd) { 422 case IPC_STAT: 423 error = ipcperm(p, &shmseg->shm_perm, IPC_R); 424 if (error == 0) 425 error = copyout(shmseg, uap->buf, sizeof(inbuf)); 426 break; 427 case IPC_SET: 428 error = ipcperm(p, &shmseg->shm_perm, IPC_M); 429 if (error == 0) 430 error = copyin(uap->buf, &inbuf, sizeof(inbuf)); 431 if (error == 0) { 432 shmseg->shm_perm.uid = inbuf.shm_perm.uid; 433 shmseg->shm_perm.gid = inbuf.shm_perm.gid; 434 shmseg->shm_perm.mode = 435 (shmseg->shm_perm.mode & ~ACCESSPERMS) | 436 (inbuf.shm_perm.mode & ACCESSPERMS); 437 shmseg->shm_ctime = time_second; 438 } 439 break; 440 case IPC_RMID: 441 error = ipcperm(p, &shmseg->shm_perm, IPC_M); 442 if (error == 0) { 443 shmseg->shm_perm.key = IPC_PRIVATE; 444 shmseg->shm_perm.mode |= SHMSEG_REMOVED; 445 if (shmseg->shm_nattch <= 0) { 446 shm_deallocate_segment(shmseg); 447 shm_last_free = IPCID_TO_IX(uap->shmid); 448 } 449 } 450 break; 451 #if 0 452 case SHM_LOCK: 453 case SHM_UNLOCK: 454 #endif 455 default: 456 error = EINVAL; 457 break; 458 } 459 done: 460 lwkt_reltoken(&shm_token); 461 462 return error; 463 } 464 465 static int 466 shmget_existing(struct proc *p, struct sysmsg *sysmsg, 467 const struct shmget_args *uap, int mode, int segnum) 468 { 469 struct shmid_ds *shmseg; 470 int error; 471 472 shmseg = &shmsegs[segnum]; 473 if (shmseg->shm_perm.mode & SHMSEG_REMOVED) { 474 /* 475 * This segment is in the process of being allocated. Wait 476 * until it's done, and look the key up again (in case the 477 * allocation failed or it was freed). 478 */ 479 shmseg->shm_perm.mode |= SHMSEG_WANTED; 480 error = tsleep((caddr_t)shmseg, PCATCH, "shmget", 0); 481 if (error) 482 return error; 483 return EAGAIN; 484 } 485 if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL)) 486 return EEXIST; 487 error = ipcperm(p, &shmseg->shm_perm, mode); 488 if (error) 489 return error; 490 if (uap->size && uap->size > shmseg->shm_segsz) 491 return EINVAL; 492 sysmsg->sysmsg_result = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); 493 return 0; 494 } 495 496 static int 497 shmget_allocate_segment(struct proc *p, struct sysmsg *sysmsg, 498 const struct shmget_args *uap, int mode) 499 { 500 int i, segnum, shmid; 501 size_t size; 502 struct ucred *cred = p->p_ucred; 503 struct shmid_ds *shmseg; 504 struct shm_handle *shm_handle; 505 506 if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax) 507 return EINVAL; 508 if (shm_nused >= shminfo.shmmni) /* any shmids left? */ 509 return ENOSPC; 510 size = round_page(uap->size); 511 if (shm_committed + btoc(size) > shminfo.shmall) 512 return ENOMEM; 513 if (shm_last_free < 0) { 514 shmrealloc(); /* maybe expand the shmsegs[] array */ 515 for (i = 0; i < shmalloced; i++) { 516 if (shmsegs[i].shm_perm.mode & SHMSEG_FREE) 517 break; 518 } 519 if (i == shmalloced) 520 return ENOSPC; 521 segnum = i; 522 } else { 523 segnum = shm_last_free; 524 shm_last_free = -1; 525 } 526 shmseg = &shmsegs[segnum]; 527 /* 528 * In case we sleep in malloc(), mark the segment present but deleted 529 * so that noone else tries to create the same key. 530 */ 531 shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED; 532 shmseg->shm_perm.key = uap->key; 533 shmseg->shm_perm.seq = (shmseg->shm_perm.seq + 1) & 0x7fff; 534 shm_handle = kmalloc(sizeof(struct shm_handle), M_SHM, M_WAITOK); 535 shmid = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); 536 537 /* 538 * We make sure that we have allocated a pager before we need 539 * to. 540 */ 541 if (shm_use_phys) { 542 shm_handle->shm_object = 543 phys_pager_alloc(NULL, size, VM_PROT_DEFAULT, 0); 544 } else { 545 shm_handle->shm_object = 546 swap_pager_alloc(NULL, size, VM_PROT_DEFAULT, 0); 547 } 548 vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING); 549 vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT); 550 551 shmseg->shm_internal = shm_handle; 552 shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid; 553 shmseg->shm_perm.cgid = shmseg->shm_perm.gid = cred->cr_gid; 554 shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) | 555 (mode & ACCESSPERMS) | SHMSEG_ALLOCATED; 556 shmseg->shm_segsz = uap->size; 557 shmseg->shm_cpid = p->p_pid; 558 shmseg->shm_lpid = shmseg->shm_nattch = 0; 559 shmseg->shm_atime = shmseg->shm_dtime = 0; 560 shmseg->shm_ctime = time_second; 561 shm_committed += btoc(size); 562 shm_nused++; 563 564 /* 565 * If a physical mapping is desired and we have a ton of free pages 566 * we pre-allocate the pages here in order to avoid on-the-fly 567 * allocation later. This has a big effect on database warm-up 568 * times since DFly supports concurrent page faults coming from the 569 * same VM object for pages which already exist. 570 * 571 * This can hang the kernel for a while so only do it if shm_use_phys 572 * is set to 2 or higher. 573 */ 574 if (shm_use_phys > 1) { 575 vm_pindex_t pi, pmax; 576 vm_page_t m; 577 578 pmax = round_page(shmseg->shm_segsz) >> PAGE_SHIFT; 579 vm_object_hold(shm_handle->shm_object); 580 if (pmax > vmstats.v_free_count) 581 pmax = vmstats.v_free_count; 582 for (pi = 0; pi < pmax; ++pi) { 583 m = vm_page_grab(shm_handle->shm_object, pi, 584 VM_ALLOC_SYSTEM | VM_ALLOC_NULL_OK | 585 VM_ALLOC_ZERO); 586 if (m == NULL) 587 break; 588 vm_pager_get_page(shm_handle->shm_object, pi, &m, 1); 589 vm_page_activate(m); 590 vm_page_wakeup(m); 591 lwkt_yield(); 592 } 593 vm_object_drop(shm_handle->shm_object); 594 } 595 596 if (shmseg->shm_perm.mode & SHMSEG_WANTED) { 597 /* 598 * Somebody else wanted this key while we were asleep. Wake 599 * them up now. 600 */ 601 shmseg->shm_perm.mode &= ~SHMSEG_WANTED; 602 wakeup((caddr_t)shmseg); 603 } 604 sysmsg->sysmsg_result = shmid; 605 return 0; 606 } 607 608 /* 609 * MPALMOSTSAFE 610 */ 611 int 612 sys_shmget(struct sysmsg *sysmsg, const struct shmget_args *uap) 613 { 614 struct thread *td = curthread; 615 struct proc *p = td->td_proc; 616 struct prison *pr = p->p_ucred->cr_prison; 617 int segnum, mode, error; 618 619 if (pr && !PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_SYS_SYSVIPC)) 620 return (ENOSYS); 621 622 mode = uap->shmflg & ACCESSPERMS; 623 624 lwkt_gettoken(&shm_token); 625 626 if (uap->key != IPC_PRIVATE) { 627 again: 628 segnum = shm_find_segment_by_key(uap->key); 629 if (segnum >= 0) { 630 error = shmget_existing(p, sysmsg, uap, mode, segnum); 631 if (error == EAGAIN) 632 goto again; 633 goto done; 634 } 635 if ((uap->shmflg & IPC_CREAT) == 0) { 636 error = ENOENT; 637 goto done; 638 } 639 } 640 error = shmget_allocate_segment(p, sysmsg, uap, mode); 641 done: 642 lwkt_reltoken(&shm_token); 643 644 return (error); 645 } 646 647 void 648 shmfork(struct proc *p1, struct proc *p2) 649 { 650 struct shmmap_state *shmmap_s; 651 size_t size; 652 int i; 653 654 lwkt_gettoken(&shm_token); 655 size = shminfo.shmseg * sizeof(struct shmmap_state); 656 shmmap_s = kmalloc(size, M_SHM, M_WAITOK); 657 bcopy((caddr_t)p1->p_vmspace->vm_shm, (caddr_t)shmmap_s, size); 658 p2->p_vmspace->vm_shm = (caddr_t)shmmap_s; 659 for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) { 660 if (shmmap_s->shmid != -1) 661 shmsegs[IPCID_TO_IX(shmmap_s->shmid)].shm_nattch++; 662 } 663 lwkt_reltoken(&shm_token); 664 } 665 666 void 667 shmexit(struct vmspace *vm) 668 { 669 struct shmmap_state *base, *shm; 670 int i; 671 672 if ((base = (struct shmmap_state *)vm->vm_shm) != NULL) { 673 vm->vm_shm = NULL; 674 lwkt_gettoken(&shm_token); 675 for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) { 676 if (shm->shmid != -1) 677 shm_delete_mapping(vm, shm); 678 } 679 kfree(base, M_SHM); 680 lwkt_reltoken(&shm_token); 681 } 682 } 683 684 static void 685 shmrealloc(void) 686 { 687 int i; 688 struct shmid_ds *newsegs; 689 690 if (shmalloced >= shminfo.shmmni) 691 return; 692 693 newsegs = kmalloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK); 694 for (i = 0; i < shmalloced; i++) 695 bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0])); 696 for (; i < shminfo.shmmni; i++) { 697 shmsegs[i].shm_perm.mode = SHMSEG_FREE; 698 shmsegs[i].shm_perm.seq = 0; 699 } 700 kfree(shmsegs, M_SHM); 701 shmsegs = newsegs; 702 shmalloced = shminfo.shmmni; 703 } 704 705 static void 706 shminit(void *dummy) 707 { 708 int i; 709 710 /* 711 * If not overridden by a tunable set the maximum shm to 712 * 2/3 of main memory. 713 */ 714 if (shminfo.shmall == 0) 715 shminfo.shmall = (size_t)vmstats.v_page_count * 2 / 3; 716 717 shminfo.shmmax = shminfo.shmall * PAGE_SIZE; 718 shmalloced = shminfo.shmmni; 719 shmsegs = kmalloc(shmalloced * sizeof(shmsegs[0]), M_SHM, M_WAITOK); 720 for (i = 0; i < shmalloced; i++) { 721 shmsegs[i].shm_perm.mode = SHMSEG_FREE; 722 shmsegs[i].shm_perm.seq = 0; 723 } 724 shm_last_free = 0; 725 shm_nused = 0; 726 shm_committed = 0; 727 } 728 SYSINIT(sysv_shm, SI_SUB_SYSV_SHM, SI_ORDER_FIRST, shminit, NULL); 729