1 /* 2 * Copyright (c) 1994 Adam Glass and Charles Hannum. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. All advertising materials mentioning features or use of this software 13 * must display the following acknowledgement: 14 * This product includes software developed by Adam Glass and Charles 15 * Hannum. 16 * 4. The names of the authors may not be used to endorse or promote products 17 * derived from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "opt_compat.h" 32 #include "opt_sysvipc.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/sysproto.h> 37 #include <sys/kernel.h> 38 #include <sys/sysctl.h> 39 #include <sys/shm.h> 40 #include <sys/proc.h> 41 #include <sys/malloc.h> 42 #include <sys/mman.h> 43 #include <sys/stat.h> 44 #include <sys/sysent.h> 45 #include <sys/jail.h> 46 47 #include <sys/mplock2.h> 48 49 #include <vm/vm.h> 50 #include <vm/vm_param.h> 51 #include <sys/lock.h> 52 #include <vm/pmap.h> 53 #include <vm/vm_object.h> 54 #include <vm/vm_map.h> 55 #include <vm/vm_page.h> 56 #include <vm/vm_pager.h> 57 58 static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments"); 59 60 struct oshmctl_args; 61 static int sys_oshmctl (struct proc *p, struct oshmctl_args *uap); 62 63 static int shmget_allocate_segment (struct proc *p, struct shmget_args *uap, int mode); 64 static int shmget_existing (struct proc *p, struct shmget_args *uap, int mode, int segnum); 65 66 /* XXX casting to (sy_call_t *) is bogus, as usual. */ 67 static sy_call_t *shmcalls[] = { 68 (sy_call_t *)sys_shmat, (sy_call_t *)sys_oshmctl, 69 (sy_call_t *)sys_shmdt, (sy_call_t *)sys_shmget, 70 (sy_call_t *)sys_shmctl 71 }; 72 73 #define SHMSEG_FREE 0x0200 74 #define SHMSEG_REMOVED 0x0400 75 #define SHMSEG_ALLOCATED 0x0800 76 #define SHMSEG_WANTED 0x1000 77 78 static int shm_last_free, shm_committed, shmalloced; 79 int shm_nused; 80 static struct shmid_ds *shmsegs; 81 82 struct shm_handle { 83 /* vm_offset_t kva; */ 84 vm_object_t shm_object; 85 }; 86 87 struct shmmap_state { 88 vm_offset_t va; 89 int shmid; 90 }; 91 92 static void shm_deallocate_segment (struct shmid_ds *); 93 static int shm_find_segment_by_key (key_t); 94 static struct shmid_ds *shm_find_segment_by_shmid (int); 95 static int shm_delete_mapping (struct vmspace *vm, struct shmmap_state *); 96 static void shmrealloc (void); 97 static void shminit (void *); 98 99 /* 100 * Tuneable values 101 */ 102 #ifndef SHMMIN 103 #define SHMMIN 1 104 #endif 105 #ifndef SHMMNI 106 #define SHMMNI 512 107 #endif 108 #ifndef SHMSEG 109 #define SHMSEG 1024 110 #endif 111 112 struct shminfo shminfo = { 113 0, 114 SHMMIN, 115 SHMMNI, 116 SHMSEG, 117 0 118 }; 119 120 static int shm_use_phys = 1; 121 122 TUNABLE_LONG("kern.ipc.shmmin", &shminfo.shmmin); 123 TUNABLE_LONG("kern.ipc.shmmni", &shminfo.shmmni); 124 TUNABLE_LONG("kern.ipc.shmseg", &shminfo.shmseg); 125 TUNABLE_LONG("kern.ipc.shmmaxpgs", &shminfo.shmall); 126 TUNABLE_INT("kern.ipc.shm_use_phys", &shm_use_phys); 127 128 SYSCTL_LONG(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0, 129 "Max shared memory segment size"); 130 SYSCTL_LONG(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0, 131 "Min shared memory segment size"); 132 SYSCTL_LONG(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RD, &shminfo.shmmni, 0, 133 "Max number of shared memory identifiers"); 134 SYSCTL_LONG(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RW, &shminfo.shmseg, 0, 135 "Max shared memory segments per process"); 136 SYSCTL_LONG(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0, 137 "Max pages of shared memory"); 138 SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW, &shm_use_phys, 0, 139 "Use phys pager allocation instead of swap pager allocation"); 140 141 static int 142 shm_find_segment_by_key(key_t key) 143 { 144 int i; 145 146 for (i = 0; i < shmalloced; i++) { 147 if ((shmsegs[i].shm_perm.mode & SHMSEG_ALLOCATED) && 148 shmsegs[i].shm_perm.key == key) 149 return i; 150 } 151 return -1; 152 } 153 154 static struct shmid_ds * 155 shm_find_segment_by_shmid(int shmid) 156 { 157 int segnum; 158 struct shmid_ds *shmseg; 159 160 segnum = IPCID_TO_IX(shmid); 161 if (segnum < 0 || segnum >= shmalloced) 162 return NULL; 163 shmseg = &shmsegs[segnum]; 164 if ((shmseg->shm_perm.mode & (SHMSEG_ALLOCATED | SHMSEG_REMOVED)) 165 != SHMSEG_ALLOCATED || 166 shmseg->shm_perm.seq != IPCID_TO_SEQ(shmid)) { 167 return NULL; 168 } 169 return shmseg; 170 } 171 172 static void 173 shm_deallocate_segment(struct shmid_ds *shmseg) 174 { 175 struct shm_handle *shm_handle; 176 size_t size; 177 178 shm_handle = shmseg->shm_internal; 179 vm_object_deallocate(shm_handle->shm_object); 180 kfree((caddr_t)shm_handle, M_SHM); 181 shmseg->shm_internal = NULL; 182 size = round_page(shmseg->shm_segsz); 183 shm_committed -= btoc(size); 184 shm_nused--; 185 shmseg->shm_perm.mode = SHMSEG_FREE; 186 } 187 188 static int 189 shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s) 190 { 191 struct shmid_ds *shmseg; 192 int segnum, result; 193 size_t size; 194 195 segnum = IPCID_TO_IX(shmmap_s->shmid); 196 shmseg = &shmsegs[segnum]; 197 size = round_page(shmseg->shm_segsz); 198 result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size); 199 if (result != KERN_SUCCESS) 200 return EINVAL; 201 shmmap_s->shmid = -1; 202 shmseg->shm_dtime = time_second; 203 if ((--shmseg->shm_nattch <= 0) && 204 (shmseg->shm_perm.mode & SHMSEG_REMOVED)) { 205 shm_deallocate_segment(shmseg); 206 shm_last_free = segnum; 207 } 208 return 0; 209 } 210 211 /* 212 * MPALMOSTSAFE 213 */ 214 int 215 sys_shmdt(struct shmdt_args *uap) 216 { 217 struct thread *td = curthread; 218 struct proc *p = td->td_proc; 219 struct shmmap_state *shmmap_s; 220 long i; 221 int error; 222 223 if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL) 224 return (ENOSYS); 225 226 get_mplock(); 227 shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; 228 if (shmmap_s == NULL) { 229 error = EINVAL; 230 goto done; 231 } 232 for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) { 233 if (shmmap_s->shmid != -1 && 234 shmmap_s->va == (vm_offset_t)uap->shmaddr) 235 break; 236 } 237 if (i == shminfo.shmseg) 238 error = EINVAL; 239 else 240 error = shm_delete_mapping(p->p_vmspace, shmmap_s); 241 done: 242 rel_mplock(); 243 return (error); 244 } 245 246 /* 247 * MPALMOSTSAFE 248 */ 249 int 250 sys_shmat(struct shmat_args *uap) 251 { 252 struct thread *td = curthread; 253 struct proc *p = td->td_proc; 254 int error, flags; 255 long i; 256 struct shmid_ds *shmseg; 257 struct shmmap_state *shmmap_s = NULL; 258 struct shm_handle *shm_handle; 259 vm_offset_t attach_va; 260 vm_prot_t prot; 261 vm_size_t size; 262 vm_size_t align; 263 int rv; 264 265 if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL) 266 return (ENOSYS); 267 268 get_mplock(); 269 again: 270 shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; 271 if (shmmap_s == NULL) { 272 size = shminfo.shmseg * sizeof(struct shmmap_state); 273 shmmap_s = kmalloc(size, M_SHM, M_WAITOK); 274 for (i = 0; i < shminfo.shmseg; i++) 275 shmmap_s[i].shmid = -1; 276 if (p->p_vmspace->vm_shm != NULL) { 277 kfree(shmmap_s, M_SHM); 278 goto again; 279 } 280 p->p_vmspace->vm_shm = (caddr_t)shmmap_s; 281 } 282 shmseg = shm_find_segment_by_shmid(uap->shmid); 283 if (shmseg == NULL) { 284 error = EINVAL; 285 goto done; 286 } 287 error = ipcperm(p, &shmseg->shm_perm, 288 (uap->shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W); 289 if (error) 290 goto done; 291 for (i = 0; i < shminfo.shmseg; i++) { 292 if (shmmap_s->shmid == -1) 293 break; 294 shmmap_s++; 295 } 296 if (i >= shminfo.shmseg) { 297 error = EMFILE; 298 goto done; 299 } 300 size = round_page(shmseg->shm_segsz); 301 #ifdef VM_PROT_READ_IS_EXEC 302 prot = VM_PROT_READ | VM_PROT_EXECUTE; 303 #else 304 prot = VM_PROT_READ; 305 #endif 306 if ((uap->shmflg & SHM_RDONLY) == 0) 307 prot |= VM_PROT_WRITE; 308 flags = MAP_ANON | MAP_SHARED; 309 if (uap->shmaddr) { 310 flags |= MAP_FIXED; 311 if (uap->shmflg & SHM_RND) { 312 attach_va = (vm_offset_t)uap->shmaddr & ~(SHMLBA-1); 313 } else if (((vm_offset_t)uap->shmaddr & (SHMLBA-1)) == 0) { 314 attach_va = (vm_offset_t)uap->shmaddr; 315 } else { 316 error = EINVAL; 317 goto done; 318 } 319 } else { 320 /* 321 * This is just a hint to vm_map_find() about where to put it. 322 */ 323 attach_va = round_page((vm_offset_t)p->p_vmspace->vm_taddr + 324 maxtsiz + maxdsiz); 325 } 326 327 /* 328 * Handle alignment. For large memory maps it is possible 329 * that the MMU can optimize the page table so align anything 330 * that is a multiple of SEG_SIZE to SEG_SIZE. 331 */ 332 if ((flags & MAP_FIXED) == 0 && (size & SEG_MASK) == 0) 333 align = SEG_SIZE; 334 else 335 align = PAGE_SIZE; 336 337 shm_handle = shmseg->shm_internal; 338 vm_object_hold(shm_handle->shm_object); 339 vm_object_chain_wait(shm_handle->shm_object); 340 vm_object_reference_locked(shm_handle->shm_object); 341 rv = vm_map_find(&p->p_vmspace->vm_map, 342 shm_handle->shm_object, 0, 343 &attach_va, 344 size, align, 345 ((flags & MAP_FIXED) ? 0 : 1), 346 VM_MAPTYPE_NORMAL, 347 prot, prot, 348 0); 349 vm_object_drop(shm_handle->shm_object); 350 if (rv != KERN_SUCCESS) { 351 vm_object_deallocate(shm_handle->shm_object); 352 error = ENOMEM; 353 goto done; 354 } 355 vm_map_inherit(&p->p_vmspace->vm_map, 356 attach_va, attach_va + size, VM_INHERIT_SHARE); 357 358 KKASSERT(shmmap_s->shmid == -1); 359 shmmap_s->va = attach_va; 360 shmmap_s->shmid = uap->shmid; 361 shmseg->shm_lpid = p->p_pid; 362 shmseg->shm_atime = time_second; 363 shmseg->shm_nattch++; 364 uap->sysmsg_resultp = (void *)attach_va; 365 error = 0; 366 done: 367 rel_mplock(); 368 return error; 369 } 370 371 struct oshmid_ds { 372 struct ipc_perm shm_perm; /* operation perms */ 373 int shm_segsz; /* size of segment (bytes) */ 374 ushort shm_cpid; /* pid, creator */ 375 ushort shm_lpid; /* pid, last operation */ 376 short shm_nattch; /* no. of current attaches */ 377 time_t shm_atime; /* last attach time */ 378 time_t shm_dtime; /* last detach time */ 379 time_t shm_ctime; /* last change time */ 380 void *shm_handle; /* internal handle for shm segment */ 381 }; 382 383 struct oshmctl_args { 384 struct sysmsg sysmsg; 385 int shmid; 386 int cmd; 387 struct oshmid_ds *ubuf; 388 }; 389 390 /* 391 * MPALMOSTSAFE 392 */ 393 static int 394 sys_oshmctl(struct proc *p, struct oshmctl_args *uap) 395 { 396 #ifdef COMPAT_43 397 struct thread *td = curthread; 398 struct shmid_ds *shmseg; 399 struct oshmid_ds outbuf; 400 int error; 401 402 if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL) 403 return (ENOSYS); 404 405 get_mplock(); 406 shmseg = shm_find_segment_by_shmid(uap->shmid); 407 if (shmseg == NULL) { 408 error = EINVAL; 409 goto done; 410 } 411 412 switch (uap->cmd) { 413 case IPC_STAT: 414 error = ipcperm(p, &shmseg->shm_perm, IPC_R); 415 if (error) 416 break; 417 outbuf.shm_perm = shmseg->shm_perm; 418 outbuf.shm_segsz = shmseg->shm_segsz; 419 outbuf.shm_cpid = shmseg->shm_cpid; 420 outbuf.shm_lpid = shmseg->shm_lpid; 421 outbuf.shm_nattch = shmseg->shm_nattch; 422 outbuf.shm_atime = shmseg->shm_atime; 423 outbuf.shm_dtime = shmseg->shm_dtime; 424 outbuf.shm_ctime = shmseg->shm_ctime; 425 outbuf.shm_handle = shmseg->shm_internal; 426 error = copyout((caddr_t)&outbuf, uap->ubuf, sizeof(outbuf)); 427 break; 428 default: 429 /* XXX casting to (sy_call_t *) is bogus, as usual. */ 430 error = sys_shmctl((struct shmctl_args *)uap); 431 } 432 done: 433 rel_mplock(); 434 return error; 435 #else 436 return EINVAL; 437 #endif 438 } 439 440 /* 441 * MPALMOSTSAFE 442 */ 443 int 444 sys_shmctl(struct shmctl_args *uap) 445 { 446 struct thread *td = curthread; 447 struct proc *p = td->td_proc; 448 int error; 449 struct shmid_ds inbuf; 450 struct shmid_ds *shmseg; 451 452 if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL) 453 return (ENOSYS); 454 455 get_mplock(); 456 shmseg = shm_find_segment_by_shmid(uap->shmid); 457 if (shmseg == NULL) { 458 error = EINVAL; 459 goto done; 460 } 461 462 switch (uap->cmd) { 463 case IPC_STAT: 464 error = ipcperm(p, &shmseg->shm_perm, IPC_R); 465 if (error == 0) 466 error = copyout(shmseg, uap->buf, sizeof(inbuf)); 467 break; 468 case IPC_SET: 469 error = ipcperm(p, &shmseg->shm_perm, IPC_M); 470 if (error == 0) 471 error = copyin(uap->buf, &inbuf, sizeof(inbuf)); 472 if (error == 0) { 473 shmseg->shm_perm.uid = inbuf.shm_perm.uid; 474 shmseg->shm_perm.gid = inbuf.shm_perm.gid; 475 shmseg->shm_perm.mode = 476 (shmseg->shm_perm.mode & ~ACCESSPERMS) | 477 (inbuf.shm_perm.mode & ACCESSPERMS); 478 shmseg->shm_ctime = time_second; 479 } 480 break; 481 case IPC_RMID: 482 error = ipcperm(p, &shmseg->shm_perm, IPC_M); 483 if (error == 0) { 484 shmseg->shm_perm.key = IPC_PRIVATE; 485 shmseg->shm_perm.mode |= SHMSEG_REMOVED; 486 if (shmseg->shm_nattch <= 0) { 487 shm_deallocate_segment(shmseg); 488 shm_last_free = IPCID_TO_IX(uap->shmid); 489 } 490 } 491 break; 492 #if 0 493 case SHM_LOCK: 494 case SHM_UNLOCK: 495 #endif 496 default: 497 error = EINVAL; 498 break; 499 } 500 done: 501 rel_mplock(); 502 return error; 503 } 504 505 static int 506 shmget_existing(struct proc *p, struct shmget_args *uap, int mode, int segnum) 507 { 508 struct shmid_ds *shmseg; 509 int error; 510 511 shmseg = &shmsegs[segnum]; 512 if (shmseg->shm_perm.mode & SHMSEG_REMOVED) { 513 /* 514 * This segment is in the process of being allocated. Wait 515 * until it's done, and look the key up again (in case the 516 * allocation failed or it was freed). 517 */ 518 shmseg->shm_perm.mode |= SHMSEG_WANTED; 519 error = tsleep((caddr_t)shmseg, PCATCH, "shmget", 0); 520 if (error) 521 return error; 522 return EAGAIN; 523 } 524 if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL)) 525 return EEXIST; 526 error = ipcperm(p, &shmseg->shm_perm, mode); 527 if (error) 528 return error; 529 if (uap->size && uap->size > shmseg->shm_segsz) 530 return EINVAL; 531 uap->sysmsg_result = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); 532 return 0; 533 } 534 535 static int 536 shmget_allocate_segment(struct proc *p, struct shmget_args *uap, int mode) 537 { 538 int i, segnum, shmid; 539 size_t size; 540 struct ucred *cred = p->p_ucred; 541 struct shmid_ds *shmseg; 542 struct shm_handle *shm_handle; 543 544 if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax) 545 return EINVAL; 546 if (shm_nused >= shminfo.shmmni) /* any shmids left? */ 547 return ENOSPC; 548 size = round_page(uap->size); 549 if (shm_committed + btoc(size) > shminfo.shmall) 550 return ENOMEM; 551 if (shm_last_free < 0) { 552 shmrealloc(); /* maybe expand the shmsegs[] array */ 553 for (i = 0; i < shmalloced; i++) { 554 if (shmsegs[i].shm_perm.mode & SHMSEG_FREE) 555 break; 556 } 557 if (i == shmalloced) 558 return ENOSPC; 559 segnum = i; 560 } else { 561 segnum = shm_last_free; 562 shm_last_free = -1; 563 } 564 shmseg = &shmsegs[segnum]; 565 /* 566 * In case we sleep in malloc(), mark the segment present but deleted 567 * so that noone else tries to create the same key. 568 */ 569 shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED; 570 shmseg->shm_perm.key = uap->key; 571 shmseg->shm_perm.seq = (shmseg->shm_perm.seq + 1) & 0x7fff; 572 shm_handle = kmalloc(sizeof(struct shm_handle), M_SHM, M_WAITOK); 573 shmid = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); 574 575 /* 576 * We make sure that we have allocated a pager before we need 577 * to. 578 */ 579 if (shm_use_phys) { 580 shm_handle->shm_object = 581 phys_pager_alloc(NULL, size, VM_PROT_DEFAULT, 0); 582 } else { 583 shm_handle->shm_object = 584 swap_pager_alloc(NULL, size, VM_PROT_DEFAULT, 0); 585 } 586 vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING); 587 vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT); 588 589 shmseg->shm_internal = shm_handle; 590 shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid; 591 shmseg->shm_perm.cgid = shmseg->shm_perm.gid = cred->cr_gid; 592 shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) | 593 (mode & ACCESSPERMS) | SHMSEG_ALLOCATED; 594 shmseg->shm_segsz = uap->size; 595 shmseg->shm_cpid = p->p_pid; 596 shmseg->shm_lpid = shmseg->shm_nattch = 0; 597 shmseg->shm_atime = shmseg->shm_dtime = 0; 598 shmseg->shm_ctime = time_second; 599 shm_committed += btoc(size); 600 shm_nused++; 601 602 /* 603 * If a physical mapping is desired and we have a ton of free pages 604 * we pre-allocate the pages here in order to avoid on-the-fly 605 * allocation later. This has a big effect on database warm-up 606 * times since DFly supports concurrent page faults coming from the 607 * same VM object for pages which already exist. 608 * 609 * This can hang the kernel for a while so only do it if shm_use_phys 610 * is set to 2 or higher. 611 */ 612 if (shm_use_phys > 1) { 613 vm_pindex_t pi, pmax; 614 vm_page_t m; 615 616 pmax = round_page(shmseg->shm_segsz) >> PAGE_SHIFT; 617 vm_object_hold(shm_handle->shm_object); 618 if (pmax > vmstats.v_free_count) 619 pmax = vmstats.v_free_count; 620 for (pi = 0; pi < pmax; ++pi) { 621 m = vm_page_grab(shm_handle->shm_object, pi, 622 VM_ALLOC_SYSTEM | VM_ALLOC_NULL_OK | 623 VM_ALLOC_ZERO); 624 if (m == NULL) 625 break; 626 vm_pager_get_page(shm_handle->shm_object, &m, 1); 627 vm_page_activate(m); 628 vm_page_wakeup(m); 629 lwkt_yield(); 630 } 631 vm_object_drop(shm_handle->shm_object); 632 } 633 634 if (shmseg->shm_perm.mode & SHMSEG_WANTED) { 635 /* 636 * Somebody else wanted this key while we were asleep. Wake 637 * them up now. 638 */ 639 shmseg->shm_perm.mode &= ~SHMSEG_WANTED; 640 wakeup((caddr_t)shmseg); 641 } 642 uap->sysmsg_result = shmid; 643 return 0; 644 } 645 646 /* 647 * MPALMOSTSAFE 648 */ 649 int 650 sys_shmget(struct shmget_args *uap) 651 { 652 struct thread *td = curthread; 653 struct proc *p = td->td_proc; 654 int segnum, mode, error; 655 656 if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL) 657 return (ENOSYS); 658 659 mode = uap->shmflg & ACCESSPERMS; 660 get_mplock(); 661 662 if (uap->key != IPC_PRIVATE) { 663 again: 664 segnum = shm_find_segment_by_key(uap->key); 665 if (segnum >= 0) { 666 error = shmget_existing(p, uap, mode, segnum); 667 if (error == EAGAIN) 668 goto again; 669 goto done; 670 } 671 if ((uap->shmflg & IPC_CREAT) == 0) { 672 error = ENOENT; 673 goto done; 674 } 675 } 676 error = shmget_allocate_segment(p, uap, mode); 677 done: 678 rel_mplock(); 679 return (error); 680 } 681 682 /* 683 * shmsys_args(int which, int a2, ...) (VARARGS) 684 * 685 * MPALMOSTSAFE 686 */ 687 int 688 sys_shmsys(struct shmsys_args *uap) 689 { 690 struct thread *td = curthread; 691 unsigned int which = (unsigned int)uap->which; 692 int error; 693 694 if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL) 695 return (ENOSYS); 696 697 if (which >= NELEM(shmcalls)) 698 return EINVAL; 699 get_mplock(); 700 bcopy(&uap->a2, &uap->which, 701 sizeof(struct shmsys_args) - offsetof(struct shmsys_args, a2)); 702 error = ((*shmcalls[which])(uap)); 703 rel_mplock(); 704 705 return(error); 706 } 707 708 void 709 shmfork(struct proc *p1, struct proc *p2) 710 { 711 struct shmmap_state *shmmap_s; 712 size_t size; 713 int i; 714 715 get_mplock(); 716 size = shminfo.shmseg * sizeof(struct shmmap_state); 717 shmmap_s = kmalloc(size, M_SHM, M_WAITOK); 718 bcopy((caddr_t)p1->p_vmspace->vm_shm, (caddr_t)shmmap_s, size); 719 p2->p_vmspace->vm_shm = (caddr_t)shmmap_s; 720 for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) { 721 if (shmmap_s->shmid != -1) 722 shmsegs[IPCID_TO_IX(shmmap_s->shmid)].shm_nattch++; 723 } 724 rel_mplock(); 725 } 726 727 void 728 shmexit(struct vmspace *vm) 729 { 730 struct shmmap_state *base, *shm; 731 int i; 732 733 if ((base = (struct shmmap_state *)vm->vm_shm) != NULL) { 734 vm->vm_shm = NULL; 735 get_mplock(); 736 for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) { 737 if (shm->shmid != -1) 738 shm_delete_mapping(vm, shm); 739 } 740 kfree(base, M_SHM); 741 rel_mplock(); 742 } 743 } 744 745 static void 746 shmrealloc(void) 747 { 748 int i; 749 struct shmid_ds *newsegs; 750 751 if (shmalloced >= shminfo.shmmni) 752 return; 753 754 newsegs = kmalloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK); 755 for (i = 0; i < shmalloced; i++) 756 bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0])); 757 for (; i < shminfo.shmmni; i++) { 758 shmsegs[i].shm_perm.mode = SHMSEG_FREE; 759 shmsegs[i].shm_perm.seq = 0; 760 } 761 kfree(shmsegs, M_SHM); 762 shmsegs = newsegs; 763 shmalloced = shminfo.shmmni; 764 } 765 766 static void 767 shminit(void *dummy) 768 { 769 int i; 770 771 /* 772 * If not overridden by a tunable set the maximum shm to 773 * 2/3 of main memory. 774 */ 775 if (shminfo.shmall == 0) 776 shminfo.shmall = (size_t)vmstats.v_page_count * 2 / 3; 777 778 shminfo.shmmax = shminfo.shmall * PAGE_SIZE; 779 shmalloced = shminfo.shmmni; 780 shmsegs = kmalloc(shmalloced * sizeof(shmsegs[0]), M_SHM, M_WAITOK); 781 for (i = 0; i < shmalloced; i++) { 782 shmsegs[i].shm_perm.mode = SHMSEG_FREE; 783 shmsegs[i].shm_perm.seq = 0; 784 } 785 shm_last_free = 0; 786 shm_nused = 0; 787 shm_committed = 0; 788 } 789 SYSINIT(sysv_shm, SI_SUB_SYSV_SHM, SI_ORDER_FIRST, shminit, NULL); 790