1 /* 2 * Copyright (c) 1994 Adam Glass and Charles Hannum. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. All advertising materials mentioning features or use of this software 13 * must display the following acknowledgement: 14 * This product includes software developed by Adam Glass and Charles 15 * Hannum. 16 * 4. The names of the authors may not be used to endorse or promote products 17 * derived from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "opt_sysvipc.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/sysmsg.h> 36 #include <sys/kernel.h> 37 #include <sys/sysctl.h> 38 #include <sys/shm.h> 39 #include <sys/proc.h> 40 #include <sys/malloc.h> 41 #include <sys/mman.h> 42 #include <sys/stat.h> 43 #include <sys/sysent.h> 44 #include <sys/jail.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_param.h> 48 #include <sys/lock.h> 49 #include <vm/pmap.h> 50 #include <vm/vm_object.h> 51 #include <vm/vm_map.h> 52 #include <vm/vm_page.h> 53 #include <vm/vm_pager.h> 54 55 static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments"); 56 57 static int shmget_allocate_segment(struct proc *p, struct sysmsg *sysmsg, 58 const struct shmget_args *uap, int mode); 59 static int shmget_existing(struct proc *p, struct sysmsg *sysmsg, 60 const struct shmget_args *uap, int mode, int segnum); 61 62 #define SHMSEG_FREE 0x0200 63 #define SHMSEG_REMOVED 0x0400 64 #define SHMSEG_ALLOCATED 0x0800 65 #define SHMSEG_WANTED 0x1000 66 67 static int shm_last_free, shm_committed, shmalloced, shm_nused; 68 static struct shmid_ds *shmsegs; 69 static struct lwkt_token shm_token = LWKT_TOKEN_INITIALIZER(shm_token); 70 71 struct shm_handle { 72 /* vm_offset_t kva; */ 73 vm_object_t shm_object; 74 }; 75 76 struct shmmap_state { 77 vm_offset_t va; 78 int shmid; 79 int reserved; 80 }; 81 82 static void shm_deallocate_segment (struct shmid_ds *); 83 static int shm_find_segment_by_key (key_t); 84 static struct shmid_ds *shm_find_segment_by_shmid (int); 85 static int shm_delete_mapping (struct vmspace *vm, struct shmmap_state *); 86 static void shmrealloc (void); 87 static void shminit (void *); 88 89 /* 90 * Tuneable values 91 */ 92 #ifndef SHMMIN 93 #define SHMMIN 1 94 #endif 95 #ifndef SHMMNI 96 #define SHMMNI 512 97 #endif 98 #ifndef SHMSEG 99 #define SHMSEG 1024 100 #endif 101 102 struct shminfo shminfo = { 103 0, 104 SHMMIN, 105 SHMMNI, 106 SHMSEG, 107 0 108 }; 109 110 /* 111 * allow-removed Allow a shared memory segment to be attached by its shmid 112 * even after it has been deleted, as long as it was still 113 * being referenced by someone. This is a trick used by 114 * chrome and other applications to avoid leaving shm 115 * segments hanging around after the application is killed 116 * or seg-faults unexpectedly. 117 * 118 * use-phys Shared memory segments are to use physical memory by 119 * default, which may allow the kernel to better-optimize 120 * the pmap and reduce overhead. The pages are effectively 121 * wired. 122 */ 123 static int shm_allow_removed = 1; 124 static int shm_use_phys = 1; 125 126 TUNABLE_LONG("kern.ipc.shmmin", &shminfo.shmmin); 127 TUNABLE_LONG("kern.ipc.shmmni", &shminfo.shmmni); 128 TUNABLE_LONG("kern.ipc.shmseg", &shminfo.shmseg); 129 TUNABLE_LONG("kern.ipc.shmmaxpgs", &shminfo.shmall); 130 TUNABLE_INT("kern.ipc.shm_use_phys", &shm_use_phys); 131 132 SYSCTL_LONG(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0, 133 "Max shared memory segment size"); 134 SYSCTL_LONG(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0, 135 "Min shared memory segment size"); 136 SYSCTL_LONG(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RD, &shminfo.shmmni, 0, 137 "Max number of shared memory identifiers"); 138 SYSCTL_LONG(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RW, &shminfo.shmseg, 0, 139 "Max shared memory segments per process"); 140 SYSCTL_LONG(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0, 141 "Max pages of shared memory"); 142 SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW, &shm_use_phys, 0, 143 "Use phys pager allocation instead of swap pager allocation"); 144 SYSCTL_INT(_kern_ipc, OID_AUTO, shm_allow_removed, CTLFLAG_RW, 145 &shm_allow_removed, 0, 146 "Enable/Disable attachment to attached segments marked for removal"); 147 148 static int 149 shm_find_segment_by_key(key_t key) 150 { 151 int i; 152 153 for (i = 0; i < shmalloced; i++) { 154 if ((shmsegs[i].shm_perm.mode & SHMSEG_ALLOCATED) && 155 shmsegs[i].shm_perm.key == key) 156 return i; 157 } 158 return -1; 159 } 160 161 static struct shmid_ds * 162 shm_find_segment_by_shmid(int shmid) 163 { 164 int segnum; 165 struct shmid_ds *shmseg; 166 167 segnum = IPCID_TO_IX(shmid); 168 if (segnum < 0 || segnum >= shmalloced) 169 return NULL; 170 shmseg = &shmsegs[segnum]; 171 if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0 || 172 (!shm_allow_removed && 173 (shmseg->shm_perm.mode & SHMSEG_REMOVED) != 0) || 174 shmseg->shm_perm.seq != IPCID_TO_SEQ(shmid)) { 175 return NULL; 176 } 177 return shmseg; 178 } 179 180 static void 181 shm_deallocate_segment(struct shmid_ds *shmseg) 182 { 183 struct shm_handle *shm_handle; 184 size_t size; 185 186 shm_handle = shmseg->shm_internal; 187 vm_object_deallocate(shm_handle->shm_object); 188 kfree((caddr_t)shm_handle, M_SHM); 189 shmseg->shm_internal = NULL; 190 size = round_page(shmseg->shm_segsz); 191 shm_committed -= btoc(size); 192 shm_nused--; 193 shmseg->shm_perm.mode = SHMSEG_FREE; 194 } 195 196 static int 197 shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s) 198 { 199 struct shmid_ds *shmseg; 200 int segnum, result; 201 size_t size; 202 203 segnum = IPCID_TO_IX(shmmap_s->shmid); 204 shmseg = &shmsegs[segnum]; 205 size = round_page(shmseg->shm_segsz); 206 result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size); 207 if (result != KERN_SUCCESS) 208 return EINVAL; 209 shmmap_s->shmid = -1; 210 shmseg->shm_dtime = time_second; 211 if ((--shmseg->shm_nattch <= 0) && 212 (shmseg->shm_perm.mode & SHMSEG_REMOVED)) { 213 shm_deallocate_segment(shmseg); 214 shm_last_free = segnum; 215 } 216 return 0; 217 } 218 219 /* 220 * MPALMOSTSAFE 221 */ 222 int 223 sys_shmdt(struct sysmsg *sysmsg, const struct shmdt_args *uap) 224 { 225 struct thread *td = curthread; 226 struct proc *p = td->td_proc; 227 struct shmmap_state *shmmap_s; 228 struct prison *pr = p->p_ucred->cr_prison; 229 230 long i; 231 int error; 232 233 if (pr && !PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_SYS_SYSVIPC)) 234 return (ENOSYS); 235 236 lwkt_gettoken(&shm_token); 237 shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; 238 if (shmmap_s == NULL) { 239 error = EINVAL; 240 goto done; 241 } 242 for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) { 243 if (shmmap_s->shmid != -1 && 244 shmmap_s->va == (vm_offset_t)uap->shmaddr) 245 break; 246 } 247 if (i == shminfo.shmseg) 248 error = EINVAL; 249 else 250 error = shm_delete_mapping(p->p_vmspace, shmmap_s); 251 done: 252 lwkt_reltoken(&shm_token); 253 254 return (error); 255 } 256 257 /* 258 * MPALMOSTSAFE 259 */ 260 int 261 sys_shmat(struct sysmsg *sysmsg, const struct shmat_args *uap) 262 { 263 struct thread *td = curthread; 264 struct proc *p = td->td_proc; 265 struct prison *pr = p->p_ucred->cr_prison; 266 int error, flags; 267 long i; 268 struct shmid_ds *shmseg; 269 struct shmmap_state *shmmap_s = NULL; 270 struct shm_handle *shm_handle; 271 vm_offset_t attach_va; 272 vm_prot_t prot; 273 vm_size_t size; 274 vm_size_t align; 275 int rv; 276 277 if (pr && !PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_SYS_SYSVIPC)) 278 return (ENOSYS); 279 280 lwkt_gettoken(&shm_token); 281 again: 282 shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; 283 if (shmmap_s == NULL) { 284 size = shminfo.shmseg * sizeof(struct shmmap_state); 285 shmmap_s = kmalloc(size, M_SHM, M_WAITOK); 286 for (i = 0; i < shminfo.shmseg; i++) { 287 shmmap_s[i].shmid = -1; 288 shmmap_s[i].reserved = 0; 289 } 290 if (p->p_vmspace->vm_shm != NULL) { 291 kfree(shmmap_s, M_SHM); 292 goto again; 293 } 294 p->p_vmspace->vm_shm = (caddr_t)shmmap_s; 295 } 296 shmseg = shm_find_segment_by_shmid(uap->shmid); 297 if (shmseg == NULL) { 298 error = EINVAL; 299 goto done; 300 } 301 error = ipcperm(p, &shmseg->shm_perm, 302 (uap->shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W); 303 if (error) 304 goto done; 305 306 /* 307 * Find a free element and mark reserved. This fixes races 308 * against concurrent allocations due to the token being 309 * interrupted by blocking operations. The shmmap_s reservation 310 * will be cleared upon completion or error. 311 */ 312 for (i = 0; i < shminfo.shmseg; i++) { 313 if (shmmap_s->shmid == -1 && shmmap_s->reserved == 0) { 314 shmmap_s->reserved = 1; 315 break; 316 } 317 shmmap_s++; 318 } 319 if (i >= shminfo.shmseg) { 320 error = EMFILE; 321 goto done; 322 } 323 size = round_page(shmseg->shm_segsz); 324 #ifdef VM_PROT_READ_IS_EXEC 325 prot = VM_PROT_READ | VM_PROT_EXECUTE; 326 #else 327 prot = VM_PROT_READ; 328 #endif 329 if ((uap->shmflg & SHM_RDONLY) == 0) 330 prot |= VM_PROT_WRITE; 331 flags = MAP_ANON | MAP_SHARED; 332 if (uap->shmaddr) { 333 flags |= MAP_FIXED; 334 if (uap->shmflg & SHM_RND) { 335 attach_va = 336 rounddown2((vm_offset_t)uap->shmaddr, SHMLBA); 337 } else if (((vm_offset_t)uap->shmaddr & (SHMLBA-1)) == 0) { 338 attach_va = (vm_offset_t)uap->shmaddr; 339 } else { 340 error = EINVAL; 341 shmmap_s->reserved = 0; 342 goto done; 343 } 344 } else { 345 /* 346 * This is just a hint to vm_map_find() about where to put it. 347 */ 348 attach_va = round_page((vm_offset_t)p->p_vmspace->vm_taddr + 349 maxtsiz + maxdsiz); 350 } 351 352 /* 353 * Handle alignment. For large memory maps it is possible 354 * that the MMU can optimize the page table so align anything 355 * that is a multiple of SEG_SIZE to SEG_SIZE. 356 */ 357 if ((flags & MAP_FIXED) == 0 && (size & SEG_MASK) == 0) 358 align = SEG_SIZE; 359 else 360 align = PAGE_SIZE; 361 362 shm_handle = shmseg->shm_internal; 363 vm_object_hold(shm_handle->shm_object); 364 vm_object_reference_locked(shm_handle->shm_object); 365 rv = vm_map_find(&p->p_vmspace->vm_map, 366 shm_handle->shm_object, NULL, 367 0, &attach_va, size, 368 align, 369 ((flags & MAP_FIXED) ? 0 : 1), 370 VM_MAPTYPE_NORMAL, VM_SUBSYS_SHMEM, 371 prot, prot, 0); 372 vm_object_drop(shm_handle->shm_object); 373 if (rv != KERN_SUCCESS) { 374 vm_object_deallocate(shm_handle->shm_object); 375 shmmap_s->reserved = 0; 376 error = ENOMEM; 377 goto done; 378 } 379 vm_map_inherit(&p->p_vmspace->vm_map, 380 attach_va, attach_va + size, VM_INHERIT_SHARE); 381 382 KKASSERT(shmmap_s->shmid == -1); 383 shmmap_s->va = attach_va; 384 shmmap_s->shmid = uap->shmid; 385 shmmap_s->reserved = 0; 386 shmseg->shm_lpid = p->p_pid; 387 shmseg->shm_atime = time_second; 388 shmseg->shm_nattch++; 389 sysmsg->sysmsg_resultp = (void *)attach_va; 390 error = 0; 391 done: 392 lwkt_reltoken(&shm_token); 393 394 return error; 395 } 396 397 /* 398 * MPALMOSTSAFE 399 */ 400 int 401 sys_shmctl(struct sysmsg *sysmsg, const struct shmctl_args *uap) 402 { 403 struct thread *td = curthread; 404 struct proc *p = td->td_proc; 405 struct prison *pr = p->p_ucred->cr_prison; 406 int error; 407 struct shmid_ds inbuf; 408 struct shmid_ds *shmseg; 409 410 if (pr && !PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_SYS_SYSVIPC)) 411 return (ENOSYS); 412 413 lwkt_gettoken(&shm_token); 414 shmseg = shm_find_segment_by_shmid(uap->shmid); 415 if (shmseg == NULL) { 416 error = EINVAL; 417 goto done; 418 } 419 420 switch (uap->cmd) { 421 case IPC_STAT: 422 error = ipcperm(p, &shmseg->shm_perm, IPC_R); 423 if (error == 0) 424 error = copyout(shmseg, uap->buf, sizeof(inbuf)); 425 break; 426 case IPC_SET: 427 error = ipcperm(p, &shmseg->shm_perm, IPC_M); 428 if (error == 0) 429 error = copyin(uap->buf, &inbuf, sizeof(inbuf)); 430 if (error == 0) { 431 shmseg->shm_perm.uid = inbuf.shm_perm.uid; 432 shmseg->shm_perm.gid = inbuf.shm_perm.gid; 433 shmseg->shm_perm.mode = 434 (shmseg->shm_perm.mode & ~ACCESSPERMS) | 435 (inbuf.shm_perm.mode & ACCESSPERMS); 436 shmseg->shm_ctime = time_second; 437 } 438 break; 439 case IPC_RMID: 440 error = ipcperm(p, &shmseg->shm_perm, IPC_M); 441 if (error == 0) { 442 shmseg->shm_perm.key = IPC_PRIVATE; 443 shmseg->shm_perm.mode |= SHMSEG_REMOVED; 444 if (shmseg->shm_nattch <= 0) { 445 shm_deallocate_segment(shmseg); 446 shm_last_free = IPCID_TO_IX(uap->shmid); 447 } 448 } 449 break; 450 #if 0 451 case SHM_LOCK: 452 case SHM_UNLOCK: 453 #endif 454 default: 455 error = EINVAL; 456 break; 457 } 458 done: 459 lwkt_reltoken(&shm_token); 460 461 return error; 462 } 463 464 static int 465 shmget_existing(struct proc *p, struct sysmsg *sysmsg, 466 const struct shmget_args *uap, int mode, int segnum) 467 { 468 struct shmid_ds *shmseg; 469 int error; 470 471 shmseg = &shmsegs[segnum]; 472 if (shmseg->shm_perm.mode & SHMSEG_REMOVED) { 473 /* 474 * This segment is in the process of being allocated. Wait 475 * until it's done, and look the key up again (in case the 476 * allocation failed or it was freed). 477 */ 478 shmseg->shm_perm.mode |= SHMSEG_WANTED; 479 error = tsleep((caddr_t)shmseg, PCATCH, "shmget", 0); 480 if (error) 481 return error; 482 return EAGAIN; 483 } 484 if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL)) 485 return EEXIST; 486 error = ipcperm(p, &shmseg->shm_perm, mode); 487 if (error) 488 return error; 489 if (uap->size && uap->size > shmseg->shm_segsz) 490 return EINVAL; 491 sysmsg->sysmsg_result = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); 492 return 0; 493 } 494 495 static int 496 shmget_allocate_segment(struct proc *p, struct sysmsg *sysmsg, 497 const struct shmget_args *uap, int mode) 498 { 499 int i, segnum, shmid; 500 size_t size; 501 struct ucred *cred = p->p_ucred; 502 struct shmid_ds *shmseg; 503 struct shm_handle *shm_handle; 504 505 if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax) 506 return EINVAL; 507 if (shm_nused >= shminfo.shmmni) /* any shmids left? */ 508 return ENOSPC; 509 size = round_page(uap->size); 510 if (shm_committed + btoc(size) > shminfo.shmall) 511 return ENOMEM; 512 if (shm_last_free < 0) { 513 shmrealloc(); /* maybe expand the shmsegs[] array */ 514 for (i = 0; i < shmalloced; i++) { 515 if (shmsegs[i].shm_perm.mode & SHMSEG_FREE) 516 break; 517 } 518 if (i == shmalloced) 519 return ENOSPC; 520 segnum = i; 521 } else { 522 segnum = shm_last_free; 523 shm_last_free = -1; 524 } 525 shmseg = &shmsegs[segnum]; 526 /* 527 * In case we sleep in malloc(), mark the segment present but deleted 528 * so that noone else tries to create the same key. 529 */ 530 shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED; 531 shmseg->shm_perm.key = uap->key; 532 shmseg->shm_perm.seq = (shmseg->shm_perm.seq + 1) & 0x7fff; 533 shm_handle = kmalloc(sizeof(struct shm_handle), M_SHM, M_WAITOK); 534 shmid = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); 535 536 /* 537 * We make sure that we have allocated a pager before we need 538 * to. 539 */ 540 if (shm_use_phys) { 541 shm_handle->shm_object = 542 phys_pager_alloc(NULL, size, VM_PROT_DEFAULT, 0); 543 } else { 544 shm_handle->shm_object = 545 swap_pager_alloc(NULL, size, VM_PROT_DEFAULT, 0); 546 } 547 vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING); 548 vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT); 549 550 shmseg->shm_internal = shm_handle; 551 shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid; 552 shmseg->shm_perm.cgid = shmseg->shm_perm.gid = cred->cr_gid; 553 shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) | 554 (mode & ACCESSPERMS) | SHMSEG_ALLOCATED; 555 shmseg->shm_segsz = uap->size; 556 shmseg->shm_cpid = p->p_pid; 557 shmseg->shm_lpid = shmseg->shm_nattch = 0; 558 shmseg->shm_atime = shmseg->shm_dtime = 0; 559 shmseg->shm_ctime = time_second; 560 shm_committed += btoc(size); 561 shm_nused++; 562 563 /* 564 * If a physical mapping is desired and we have a ton of free pages 565 * we pre-allocate the pages here in order to avoid on-the-fly 566 * allocation later. This has a big effect on database warm-up 567 * times since DFly supports concurrent page faults coming from the 568 * same VM object for pages which already exist. 569 * 570 * This can hang the kernel for a while so only do it if shm_use_phys 571 * is set to 2 or higher. 572 */ 573 if (shm_use_phys > 1) { 574 vm_pindex_t pi, pmax; 575 vm_page_t m; 576 577 pmax = round_page(shmseg->shm_segsz) >> PAGE_SHIFT; 578 vm_object_hold(shm_handle->shm_object); 579 if (pmax > vmstats.v_free_count) 580 pmax = vmstats.v_free_count; 581 for (pi = 0; pi < pmax; ++pi) { 582 m = vm_page_grab(shm_handle->shm_object, pi, 583 VM_ALLOC_SYSTEM | VM_ALLOC_NULL_OK | 584 VM_ALLOC_ZERO); 585 if (m == NULL) 586 break; 587 vm_pager_get_page(shm_handle->shm_object, pi, &m, 1); 588 vm_page_activate(m); 589 vm_page_wakeup(m); 590 lwkt_yield(); 591 } 592 vm_object_drop(shm_handle->shm_object); 593 } 594 595 if (shmseg->shm_perm.mode & SHMSEG_WANTED) { 596 /* 597 * Somebody else wanted this key while we were asleep. Wake 598 * them up now. 599 */ 600 shmseg->shm_perm.mode &= ~SHMSEG_WANTED; 601 wakeup((caddr_t)shmseg); 602 } 603 sysmsg->sysmsg_result = shmid; 604 return 0; 605 } 606 607 /* 608 * MPALMOSTSAFE 609 */ 610 int 611 sys_shmget(struct sysmsg *sysmsg, const struct shmget_args *uap) 612 { 613 struct thread *td = curthread; 614 struct proc *p = td->td_proc; 615 struct prison *pr = p->p_ucred->cr_prison; 616 int segnum, mode, error; 617 618 if (pr && !PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_SYS_SYSVIPC)) 619 return (ENOSYS); 620 621 mode = uap->shmflg & ACCESSPERMS; 622 623 lwkt_gettoken(&shm_token); 624 625 if (uap->key != IPC_PRIVATE) { 626 again: 627 segnum = shm_find_segment_by_key(uap->key); 628 if (segnum >= 0) { 629 error = shmget_existing(p, sysmsg, uap, mode, segnum); 630 if (error == EAGAIN) 631 goto again; 632 goto done; 633 } 634 if ((uap->shmflg & IPC_CREAT) == 0) { 635 error = ENOENT; 636 goto done; 637 } 638 } 639 error = shmget_allocate_segment(p, sysmsg, uap, mode); 640 done: 641 lwkt_reltoken(&shm_token); 642 643 return (error); 644 } 645 646 void 647 shmfork(struct proc *p1, struct proc *p2) 648 { 649 struct shmmap_state *shmmap_s; 650 size_t size; 651 int i; 652 653 lwkt_gettoken(&shm_token); 654 size = shminfo.shmseg * sizeof(struct shmmap_state); 655 shmmap_s = kmalloc(size, M_SHM, M_WAITOK); 656 bcopy((caddr_t)p1->p_vmspace->vm_shm, (caddr_t)shmmap_s, size); 657 p2->p_vmspace->vm_shm = (caddr_t)shmmap_s; 658 for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) { 659 if (shmmap_s->shmid != -1) 660 shmsegs[IPCID_TO_IX(shmmap_s->shmid)].shm_nattch++; 661 } 662 lwkt_reltoken(&shm_token); 663 } 664 665 void 666 shmexit(struct vmspace *vm) 667 { 668 struct shmmap_state *base, *shm; 669 int i; 670 671 if ((base = (struct shmmap_state *)vm->vm_shm) != NULL) { 672 vm->vm_shm = NULL; 673 lwkt_gettoken(&shm_token); 674 for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) { 675 if (shm->shmid != -1) 676 shm_delete_mapping(vm, shm); 677 } 678 kfree(base, M_SHM); 679 lwkt_reltoken(&shm_token); 680 } 681 } 682 683 static void 684 shmrealloc(void) 685 { 686 int i; 687 struct shmid_ds *newsegs; 688 689 if (shmalloced >= shminfo.shmmni) 690 return; 691 692 newsegs = kmalloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK); 693 for (i = 0; i < shmalloced; i++) 694 bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0])); 695 for (; i < shminfo.shmmni; i++) { 696 shmsegs[i].shm_perm.mode = SHMSEG_FREE; 697 shmsegs[i].shm_perm.seq = 0; 698 } 699 kfree(shmsegs, M_SHM); 700 shmsegs = newsegs; 701 shmalloced = shminfo.shmmni; 702 } 703 704 static void 705 shminit(void *dummy) 706 { 707 int i; 708 709 /* 710 * If not overridden by a tunable set the maximum shm to 711 * 2/3 of main memory. 712 */ 713 if (shminfo.shmall == 0) 714 shminfo.shmall = (size_t)vmstats.v_page_count * 2 / 3; 715 716 shminfo.shmmax = shminfo.shmall * PAGE_SIZE; 717 shmalloced = shminfo.shmmni; 718 shmsegs = kmalloc(shmalloced * sizeof(shmsegs[0]), M_SHM, M_WAITOK); 719 for (i = 0; i < shmalloced; i++) { 720 shmsegs[i].shm_perm.mode = SHMSEG_FREE; 721 shmsegs[i].shm_perm.seq = 0; 722 } 723 shm_last_free = 0; 724 shm_nused = 0; 725 shm_committed = 0; 726 } 727 SYSINIT(sysv_shm, SI_SUB_SYSV_SHM, SI_ORDER_FIRST, shminit, NULL); 728