xref: /dragonfly/sys/kern/sysv_msg.c (revision bcb3e04d)
1 /* $FreeBSD: src/sys/kern/sysv_msg.c,v 1.23.2.5 2002/12/31 08:54:53 maxim Exp $ */
2 /* $DragonFly: src/sys/kern/sysv_msg.c,v 1.18 2008/01/06 16:55:51 swildner Exp $ */
3 
4 /*
5  * Implementation of SVID messages
6  *
7  * Author:  Daniel Boulet
8  *
9  * Copyright 1993 Daniel Boulet and RTMX Inc.
10  *
11  * This system call was implemented by Daniel Boulet under contract from RTMX.
12  *
13  * Redistribution and use in source forms, with and without modification,
14  * are permitted provided that this entire comment appears intact.
15  *
16  * Redistribution in binary form may occur without any restrictions.
17  * Obviously, it would be nice if you gave credit where credit is due
18  * but requiring it would be too onerous.
19  *
20  * This software is provided ``AS IS'' without any warranties of any kind.
21  */
22 
23 #include "opt_sysvipc.h"
24 
25 #include <sys/param.h>
26 #include <sys/systm.h>
27 #include <sys/sysproto.h>
28 #include <sys/kernel.h>
29 #include <sys/proc.h>
30 #include <sys/priv.h>
31 #include <sys/msg.h>
32 #include <sys/sysent.h>
33 #include <sys/sysctl.h>
34 #include <sys/malloc.h>
35 #include <sys/jail.h>
36 
37 #include <sys/mplock2.h>
38 
39 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
40 
41 static void msginit (void *);
42 
43 #define MSG_DEBUG
44 #undef MSG_DEBUG_OK
45 
46 static void msg_freehdr (struct msg *msghdr);
47 
48 /* XXX casting to (sy_call_t *) is bogus, as usual. */
49 static sy_call_t *msgcalls[] = {
50 	(sy_call_t *)sys_msgctl, (sy_call_t *)sys_msgget,
51 	(sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv
52 };
53 
54 struct msg {
55 	struct	msg *msg_next;	/* next msg in the chain */
56 	long	msg_type;	/* type of this message */
57     				/* >0 -> type of this message */
58     				/* 0 -> free header */
59 	u_short	msg_ts;		/* size of this message */
60 	short	msg_spot;	/* location of start of msg in buffer */
61 };
62 
63 
64 #ifndef MSGSSZ
65 #define MSGSSZ	8		/* Each segment must be 2^N long */
66 #endif
67 #ifndef MSGSEG
68 #define MSGSEG	2048		/* must be less than 32767 */
69 #endif
70 #define MSGMAX	(MSGSSZ*MSGSEG)
71 #ifndef MSGMNB
72 #define MSGMNB	2048		/* max # of bytes in a queue */
73 #endif
74 #ifndef MSGMNI
75 #define MSGMNI	40
76 #endif
77 #ifndef MSGTQL
78 #define MSGTQL	40
79 #endif
80 
81 /*
82  * Based on the configuration parameters described in an SVR2 (yes, two)
83  * config(1m) man page.
84  *
85  * Each message is broken up and stored in segments that are msgssz bytes
86  * long.  For efficiency reasons, this should be a power of two.  Also,
87  * it doesn't make sense if it is less than 8 or greater than about 256.
88  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
89  * two between 8 and 1024 inclusive (and panic's if it isn't).
90  */
91 struct msginfo msginfo = {
92                 MSGMAX,         /* max chars in a message */
93                 MSGMNI,         /* # of message queue identifiers */
94                 MSGMNB,         /* max chars in a queue */
95                 MSGTQL,         /* max messages in system */
96                 MSGSSZ,         /* size of a message segment */
97                 		/* (must be small power of 2 greater than 4) */
98                 MSGSEG          /* number of message segments */
99 };
100 
101 /*
102  * macros to convert between msqid_ds's and msqid's.
103  * (specific to this implementation)
104  */
105 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
106 #define MSQID_IX(id)	((id) & 0xffff)
107 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
108 
109 /*
110  * The rest of this file is specific to this particular implementation.
111  */
112 
113 struct msgmap {
114 	short	next;		/* next segment in buffer */
115     				/* -1 -> available */
116     				/* 0..(MSGSEG-1) -> index of next segment */
117 };
118 
119 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
120 
121 static int nfree_msgmaps;	/* # of free map entries */
122 static short free_msgmaps;	/* head of linked list of free map entries */
123 static struct msg *free_msghdrs;/* list of free msg headers */
124 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
125 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
126 static struct msg *msghdrs;	/* MSGTQL msg headers */
127 static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
128 
129 static void
130 msginit(void *dummy)
131 {
132 	int i;
133 
134 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
135 	msgpool = kmalloc(msginfo.msgmax, M_MSG, M_WAITOK);
136 	msgmaps = kmalloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
137 	msghdrs = kmalloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
138 	msqids = kmalloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
139 
140 	/*
141 	 * msginfo.msgssz should be a power of two for efficiency reasons.
142 	 * It is also pretty silly if msginfo.msgssz is less than 8
143 	 * or greater than about 256 so ...
144 	 */
145 
146 	i = 8;
147 	while (i < 1024 && i != msginfo.msgssz)
148 		i <<= 1;
149     	if (i != msginfo.msgssz) {
150 		kprintf("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
151 		    msginfo.msgssz);
152 		panic("msginfo.msgssz not a small power of 2");
153 	}
154 
155 	if (msginfo.msgseg > 32767) {
156 		kprintf("msginfo.msgseg=%d\n", msginfo.msgseg);
157 		panic("msginfo.msgseg > 32767");
158 	}
159 
160 	if (msgmaps == NULL)
161 		panic("msgmaps is NULL");
162 
163 	for (i = 0; i < msginfo.msgseg; i++) {
164 		if (i > 0)
165 			msgmaps[i-1].next = i;
166 		msgmaps[i].next = -1;	/* implies entry is available */
167 	}
168 	free_msgmaps = 0;
169 	nfree_msgmaps = msginfo.msgseg;
170 
171 	if (msghdrs == NULL)
172 		panic("msghdrs is NULL");
173 
174 	for (i = 0; i < msginfo.msgtql; i++) {
175 		msghdrs[i].msg_type = 0;
176 		if (i > 0)
177 			msghdrs[i-1].msg_next = &msghdrs[i];
178 		msghdrs[i].msg_next = NULL;
179     	}
180 	free_msghdrs = &msghdrs[0];
181 
182 	if (msqids == NULL)
183 		panic("msqids is NULL");
184 
185 	for (i = 0; i < msginfo.msgmni; i++) {
186 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
187 		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
188 		msqids[i].msg_perm.mode = 0;
189 	}
190 }
191 SYSINIT(sysv_msg, SI_SUB_SYSV_MSG, SI_ORDER_FIRST, msginit, NULL)
192 
193 /*
194  * Entry point for all MSG calls
195  *
196  * msgsys_args(int which, int a2, ...) (VARARGS)
197  *
198  * MPALMOSTSAFE
199  */
200 int
201 sys_msgsys(struct msgsys_args *uap)
202 {
203 	struct thread *td = curthread;
204 	unsigned int which = (unsigned int)uap->which;
205 	int error;
206 
207 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
208 		return (ENOSYS);
209 
210 	if (which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
211 		return (EINVAL);
212 	bcopy(&uap->a2, &uap->which,
213 	      sizeof(struct msgsys_args) - offsetof(struct msgsys_args, a2));
214 	get_mplock();
215 	error = (*msgcalls[which])(uap);
216 	rel_mplock();
217 	return (error);
218 }
219 
220 static void
221 msg_freehdr(struct msg *msghdr)
222 {
223 	while (msghdr->msg_ts > 0) {
224 		short next;
225 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
226 			panic("msghdr->msg_spot out of range");
227 		next = msgmaps[msghdr->msg_spot].next;
228 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
229 		free_msgmaps = msghdr->msg_spot;
230 		nfree_msgmaps++;
231 		msghdr->msg_spot = next;
232 		if (msghdr->msg_ts >= msginfo.msgssz)
233 			msghdr->msg_ts -= msginfo.msgssz;
234 		else
235 			msghdr->msg_ts = 0;
236 	}
237 	if (msghdr->msg_spot != -1)
238 		panic("msghdr->msg_spot != -1");
239 	msghdr->msg_next = free_msghdrs;
240 	free_msghdrs = msghdr;
241 }
242 
243 /*
244  * MPALMOSTSAFE
245  */
246 int
247 sys_msgctl(struct msgctl_args *uap)
248 {
249 	struct thread *td = curthread;
250 	struct proc *p = td->td_proc;
251 	int msqid = uap->msqid;
252 	int cmd = uap->cmd;
253 	struct msqid_ds *user_msqptr = uap->buf;
254 	int rval, eval;
255 	struct msqid_ds msqbuf;
256 	struct msqid_ds *msqptr;
257 
258 #ifdef MSG_DEBUG_OK
259 	kprintf("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr);
260 #endif
261 
262 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
263 		return (ENOSYS);
264 
265 	get_mplock();
266 	msqid = IPCID_TO_IX(msqid);
267 
268 	if (msqid < 0 || msqid >= msginfo.msgmni) {
269 #ifdef MSG_DEBUG_OK
270 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
271 		    msginfo.msgmni);
272 #endif
273 		eval = EINVAL;
274 		goto done;
275 	}
276 
277 	msqptr = &msqids[msqid];
278 
279 	if (msqptr->msg_qbytes == 0) {
280 #ifdef MSG_DEBUG_OK
281 		kprintf("no such msqid\n");
282 #endif
283 		eval = EINVAL;
284 		goto done;
285 	}
286 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
287 #ifdef MSG_DEBUG_OK
288 		kprintf("wrong sequence number\n");
289 #endif
290 		eval = EINVAL;
291 		goto done;
292 	}
293 
294 	rval = 0;
295 
296 	switch (cmd) {
297 	case IPC_RMID:
298 	{
299 		struct msg *msghdr;
300 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)) != 0)
301 			break;
302 		/* Free the message headers */
303 		msghdr = msqptr->msg_first;
304 		while (msghdr != NULL) {
305 			struct msg *msghdr_tmp;
306 
307 			/* Free the segments of each message */
308 			msqptr->msg_cbytes -= msghdr->msg_ts;
309 			msqptr->msg_qnum--;
310 			msghdr_tmp = msghdr;
311 			msghdr = msghdr->msg_next;
312 			msg_freehdr(msghdr_tmp);
313 		}
314 
315 		if (msqptr->msg_cbytes != 0)
316 			panic("msg_cbytes is screwed up");
317 		if (msqptr->msg_qnum != 0)
318 			panic("msg_qnum is screwed up");
319 
320 		msqptr->msg_qbytes = 0;	/* Mark it as free */
321 
322 		wakeup((caddr_t)msqptr);
323 	}
324 
325 		break;
326 
327 	case IPC_SET:
328 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)) != 0)
329 			break;
330 		if ((eval = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
331 			break;
332 		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
333 			eval = priv_check(td, PRIV_ROOT);
334 			if (eval)
335 				break;
336 		}
337 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
338 #ifdef MSG_DEBUG_OK
339 			kprintf("can't increase msg_qbytes beyond %d (truncating)\n",
340 			    msginfo.msgmnb);
341 #endif
342 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
343 		}
344 		if (msqbuf.msg_qbytes == 0) {
345 #ifdef MSG_DEBUG_OK
346 			kprintf("can't reduce msg_qbytes to 0\n");
347 #endif
348 			eval = EINVAL;		/* non-standard errno! */
349 			break;
350 		}
351 		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
352 		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
353 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
354 					(msqbuf.msg_perm.mode & 0777);
355 		msqptr->msg_qbytes = msqbuf.msg_qbytes;
356 		msqptr->msg_ctime = time_second;
357 		break;
358 
359 	case IPC_STAT:
360 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_R))) {
361 #ifdef MSG_DEBUG_OK
362 			kprintf("requester doesn't have read access\n");
363 #endif
364 			eval = EINVAL;
365 			break;
366 		}
367 		eval = copyout(msqptr, user_msqptr, sizeof(struct msqid_ds));
368 		break;
369 
370 	default:
371 #ifdef MSG_DEBUG_OK
372 		kprintf("invalid command %d\n", cmd);
373 #endif
374 		eval = EINVAL;
375 		break;
376 	}
377 done:
378 	rel_mplock();
379 	if (eval == 0)
380 		uap->sysmsg_result = rval;
381 	return(eval);
382 }
383 
384 /*
385  * MPALMOSTSAFE
386  */
387 int
388 sys_msgget(struct msgget_args *uap)
389 {
390 	struct thread *td = curthread;
391 	int msqid, eval;
392 	int key = uap->key;
393 	int msgflg = uap->msgflg;
394 	struct ucred *cred = td->td_ucred;
395 	struct msqid_ds *msqptr = NULL;
396 
397 #ifdef MSG_DEBUG_OK
398 	kprintf("msgget(0x%x, 0%o)\n", key, msgflg);
399 #endif
400 	if (!jail_sysvipc_allowed && cred->cr_prison != NULL)
401 		return (ENOSYS);
402 
403 	eval = 0;
404 	get_mplock();
405 
406 	if (key != IPC_PRIVATE) {
407 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
408 			msqptr = &msqids[msqid];
409 			if (msqptr->msg_qbytes != 0 &&
410 			    msqptr->msg_perm.key == key)
411 				break;
412 		}
413 		if (msqid < msginfo.msgmni) {
414 #ifdef MSG_DEBUG_OK
415 			kprintf("found public key\n");
416 #endif
417 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
418 #ifdef MSG_DEBUG_OK
419 				kprintf("not exclusive\n");
420 #endif
421 				eval = EEXIST;
422 				goto done;
423 			}
424 			if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, msgflg & 0700 ))) {
425 #ifdef MSG_DEBUG_OK
426 				kprintf("requester doesn't have 0%o access\n",
427 				    msgflg & 0700);
428 #endif
429 				goto done;
430 			}
431 			goto done;
432 		}
433 	}
434 
435 #ifdef MSG_DEBUG_OK
436 	kprintf("need to allocate the msqid_ds\n");
437 #endif
438 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
439 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
440 			/*
441 			 * Look for an unallocated and unlocked msqid_ds.
442 			 * msqid_ds's can be locked by msgsnd or msgrcv while
443 			 * they are copying the message in/out.  We can't
444 			 * re-use the entry until they release it.
445 			 */
446 			msqptr = &msqids[msqid];
447 			if (msqptr->msg_qbytes == 0 &&
448 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
449 				break;
450 		}
451 		if (msqid == msginfo.msgmni) {
452 #ifdef MSG_DEBUG_OK
453 			kprintf("no more msqid_ds's available\n");
454 #endif
455 			eval = ENOSPC;
456 			goto done;
457 		}
458 #ifdef MSG_DEBUG_OK
459 		kprintf("msqid %d is available\n", msqid);
460 #endif
461 		msqptr->msg_perm.key = key;
462 		msqptr->msg_perm.cuid = cred->cr_uid;
463 		msqptr->msg_perm.uid = cred->cr_uid;
464 		msqptr->msg_perm.cgid = cred->cr_gid;
465 		msqptr->msg_perm.gid = cred->cr_gid;
466 		msqptr->msg_perm.mode = (msgflg & 0777);
467 		/* Make sure that the returned msqid is unique */
468 		msqptr->msg_perm.seq = (msqptr->msg_perm.seq + 1) & 0x7fff;
469 		msqptr->msg_first = NULL;
470 		msqptr->msg_last = NULL;
471 		msqptr->msg_cbytes = 0;
472 		msqptr->msg_qnum = 0;
473 		msqptr->msg_qbytes = msginfo.msgmnb;
474 		msqptr->msg_lspid = 0;
475 		msqptr->msg_lrpid = 0;
476 		msqptr->msg_stime = 0;
477 		msqptr->msg_rtime = 0;
478 		msqptr->msg_ctime = time_second;
479 	} else {
480 #ifdef MSG_DEBUG_OK
481 		kprintf("didn't find it and wasn't asked to create it\n");
482 #endif
483 		eval = ENOENT;
484 	}
485 
486 done:
487 	rel_mplock();
488 	/* Construct the unique msqid */
489 	if (eval == 0)
490 		uap->sysmsg_result = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
491 	return(eval);
492 }
493 
494 /*
495  * MPALMOSTSAFE
496  */
497 int
498 sys_msgsnd(struct msgsnd_args *uap)
499 {
500 	struct thread *td = curthread;
501 	int msqid = uap->msqid;
502 	void *user_msgp = uap->msgp;
503 	size_t msgsz = uap->msgsz;
504 	int msgflg = uap->msgflg;
505 	int segs_needed, eval;
506 	struct msqid_ds *msqptr;
507 	struct msg *msghdr;
508 	short next;
509 
510 #ifdef MSG_DEBUG_OK
511 	kprintf("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
512 	    msgflg);
513 #endif
514 
515 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
516 		return (ENOSYS);
517 
518 	get_mplock();
519 	msqid = IPCID_TO_IX(msqid);
520 
521 	if (msqid < 0 || msqid >= msginfo.msgmni) {
522 #ifdef MSG_DEBUG_OK
523 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
524 		    msginfo.msgmni);
525 #endif
526 		eval = EINVAL;
527 		goto done;
528 	}
529 
530 	msqptr = &msqids[msqid];
531 	if (msqptr->msg_qbytes == 0) {
532 #ifdef MSG_DEBUG_OK
533 		kprintf("no such message queue id\n");
534 #endif
535 		eval = EINVAL;
536 		goto done;
537 	}
538 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
539 #ifdef MSG_DEBUG_OK
540 		kprintf("wrong sequence number\n");
541 #endif
542 		eval = EINVAL;
543 		goto done;
544 	}
545 
546 	if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, IPC_W))) {
547 #ifdef MSG_DEBUG_OK
548 		kprintf("requester doesn't have write access\n");
549 #endif
550 		eval = eval;
551 		goto done;
552 	}
553 
554 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
555 #ifdef MSG_DEBUG_OK
556 	kprintf("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
557 	    segs_needed);
558 #endif
559 	for (;;) {
560 		int need_more_resources = 0;
561 
562 		/*
563 		 * check msgsz
564 		 * (inside this loop in case msg_qbytes changes while we sleep)
565 		 */
566 
567 		if (msgsz > msqptr->msg_qbytes) {
568 #ifdef MSG_DEBUG_OK
569 			kprintf("msgsz > msqptr->msg_qbytes\n");
570 #endif
571 			eval = EINVAL;
572 			goto done;
573 		}
574 
575 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
576 #ifdef MSG_DEBUG_OK
577 			kprintf("msqid is locked\n");
578 #endif
579 			need_more_resources = 1;
580 		}
581 		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
582 #ifdef MSG_DEBUG_OK
583 			kprintf("msgsz + msg_cbytes > msg_qbytes\n");
584 #endif
585 			need_more_resources = 1;
586 		}
587 		if (segs_needed > nfree_msgmaps) {
588 #ifdef MSG_DEBUG_OK
589 			kprintf("segs_needed > nfree_msgmaps\n");
590 #endif
591 			need_more_resources = 1;
592 		}
593 		if (free_msghdrs == NULL) {
594 #ifdef MSG_DEBUG_OK
595 			kprintf("no more msghdrs\n");
596 #endif
597 			need_more_resources = 1;
598 		}
599 
600 		if (need_more_resources) {
601 			int we_own_it;
602 
603 			if ((msgflg & IPC_NOWAIT) != 0) {
604 #ifdef MSG_DEBUG_OK
605 				kprintf("need more resources but caller doesn't want to wait\n");
606 #endif
607 				eval = EAGAIN;
608 				goto done;
609 			}
610 
611 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
612 #ifdef MSG_DEBUG_OK
613 				kprintf("we don't own the msqid_ds\n");
614 #endif
615 				we_own_it = 0;
616 			} else {
617 				/* Force later arrivals to wait for our
618 				   request */
619 #ifdef MSG_DEBUG_OK
620 				kprintf("we own the msqid_ds\n");
621 #endif
622 				msqptr->msg_perm.mode |= MSG_LOCKED;
623 				we_own_it = 1;
624 			}
625 #ifdef MSG_DEBUG_OK
626 			kprintf("goodnight\n");
627 #endif
628 			eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
629 #ifdef MSG_DEBUG_OK
630 			kprintf("good morning, eval=%d\n", eval);
631 #endif
632 			if (we_own_it)
633 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
634 			if (eval != 0) {
635 #ifdef MSG_DEBUG_OK
636 				kprintf("msgsnd:  interrupted system call\n");
637 #endif
638 				eval = EINTR;
639 				goto done;
640 			}
641 
642 			/*
643 			 * Make sure that the msq queue still exists
644 			 */
645 
646 			if (msqptr->msg_qbytes == 0) {
647 #ifdef MSG_DEBUG_OK
648 				kprintf("msqid deleted\n");
649 #endif
650 				eval = EIDRM;
651 				goto done;
652 			}
653 
654 		} else {
655 #ifdef MSG_DEBUG_OK
656 			kprintf("got all the resources that we need\n");
657 #endif
658 			break;
659 		}
660 	}
661 
662 	/*
663 	 * We have the resources that we need.
664 	 * Make sure!
665 	 */
666 
667 	if (msqptr->msg_perm.mode & MSG_LOCKED)
668 		panic("msg_perm.mode & MSG_LOCKED");
669 	if (segs_needed > nfree_msgmaps)
670 		panic("segs_needed > nfree_msgmaps");
671 	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
672 		panic("msgsz + msg_cbytes > msg_qbytes");
673 	if (free_msghdrs == NULL)
674 		panic("no more msghdrs");
675 
676 	/*
677 	 * Re-lock the msqid_ds in case we page-fault when copying in the
678 	 * message
679 	 */
680 
681 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
682 		panic("msqid_ds is already locked");
683 	msqptr->msg_perm.mode |= MSG_LOCKED;
684 
685 	/*
686 	 * Allocate a message header
687 	 */
688 
689 	msghdr = free_msghdrs;
690 	free_msghdrs = msghdr->msg_next;
691 	msghdr->msg_spot = -1;
692 	msghdr->msg_ts = msgsz;
693 
694 	/*
695 	 * Allocate space for the message
696 	 */
697 
698 	while (segs_needed > 0) {
699 		if (nfree_msgmaps <= 0)
700 			panic("not enough msgmaps");
701 		if (free_msgmaps == -1)
702 			panic("nil free_msgmaps");
703 		next = free_msgmaps;
704 		if (next <= -1)
705 			panic("next too low #1");
706 		if (next >= msginfo.msgseg)
707 			panic("next out of range #1");
708 #ifdef MSG_DEBUG_OK
709 		kprintf("allocating segment %d to message\n", next);
710 #endif
711 		free_msgmaps = msgmaps[next].next;
712 		nfree_msgmaps--;
713 		msgmaps[next].next = msghdr->msg_spot;
714 		msghdr->msg_spot = next;
715 		segs_needed--;
716 	}
717 
718 	/*
719 	 * Copy in the message type
720 	 */
721 
722 	if ((eval = copyin(user_msgp, &msghdr->msg_type,
723 	    sizeof(msghdr->msg_type))) != 0) {
724 #ifdef MSG_DEBUG_OK
725 		kprintf("error %d copying the message type\n", eval);
726 #endif
727 		msg_freehdr(msghdr);
728 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
729 		wakeup((caddr_t)msqptr);
730 		goto done;
731 	}
732 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
733 
734 	/*
735 	 * Validate the message type
736 	 */
737 
738 	if (msghdr->msg_type < 1) {
739 		msg_freehdr(msghdr);
740 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
741 		wakeup((caddr_t)msqptr);
742 #ifdef MSG_DEBUG_OK
743 		kprintf("mtype (%d) < 1\n", msghdr->msg_type);
744 #endif
745 		eval = EINVAL;
746 		goto done;
747 	}
748 
749 	/*
750 	 * Copy in the message body
751 	 */
752 
753 	next = msghdr->msg_spot;
754 	while (msgsz > 0) {
755 		size_t tlen;
756 		if (msgsz > msginfo.msgssz)
757 			tlen = msginfo.msgssz;
758 		else
759 			tlen = msgsz;
760 		if (next <= -1)
761 			panic("next too low #2");
762 		if (next >= msginfo.msgseg)
763 			panic("next out of range #2");
764 		if ((eval = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
765 		    tlen)) != 0) {
766 #ifdef MSG_DEBUG_OK
767 			kprintf("error %d copying in message segment\n", eval);
768 #endif
769 			msg_freehdr(msghdr);
770 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
771 			wakeup((caddr_t)msqptr);
772 			goto done;
773 		}
774 		msgsz -= tlen;
775 		user_msgp = (char *)user_msgp + tlen;
776 		next = msgmaps[next].next;
777 	}
778 	if (next != -1)
779 		panic("didn't use all the msg segments");
780 
781 	/*
782 	 * We've got the message.  Unlock the msqid_ds.
783 	 */
784 
785 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
786 
787 	/*
788 	 * Make sure that the msqid_ds is still allocated.
789 	 */
790 
791 	if (msqptr->msg_qbytes == 0) {
792 		msg_freehdr(msghdr);
793 		wakeup((caddr_t)msqptr);
794 		eval = EIDRM;
795 		goto done;
796 	}
797 
798 	/*
799 	 * Put the message into the queue
800 	 */
801 
802 	if (msqptr->msg_first == NULL) {
803 		msqptr->msg_first = msghdr;
804 		msqptr->msg_last = msghdr;
805 	} else {
806 		msqptr->msg_last->msg_next = msghdr;
807 		msqptr->msg_last = msghdr;
808 	}
809 	msqptr->msg_last->msg_next = NULL;
810 
811 	msqptr->msg_cbytes += msghdr->msg_ts;
812 	msqptr->msg_qnum++;
813 	msqptr->msg_lspid = td->td_proc->p_pid;
814 	msqptr->msg_stime = time_second;
815 
816 	wakeup((caddr_t)msqptr);
817 	eval = 0;
818 done:
819 	rel_mplock();
820 	if (eval == 0)
821 		uap->sysmsg_result = 0;
822 	return (eval);
823 }
824 
825 /*
826  * MPALMOSTSAFE
827  */
828 int
829 sys_msgrcv(struct msgrcv_args *uap)
830 {
831 	struct thread *td = curthread;
832 	int msqid = uap->msqid;
833 	void *user_msgp = uap->msgp;
834 	size_t msgsz = uap->msgsz;
835 	long msgtyp = uap->msgtyp;
836 	int msgflg = uap->msgflg;
837 	size_t len;
838 	struct msqid_ds *msqptr;
839 	struct msg *msghdr;
840 	int eval;
841 	short next;
842 
843 #ifdef MSG_DEBUG_OK
844 	kprintf("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
845 	    msgsz, msgtyp, msgflg);
846 #endif
847 
848 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
849 		return (ENOSYS);
850 
851 	get_mplock();
852 	msqid = IPCID_TO_IX(msqid);
853 
854 	if (msqid < 0 || msqid >= msginfo.msgmni) {
855 #ifdef MSG_DEBUG_OK
856 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
857 		    msginfo.msgmni);
858 #endif
859 		eval = EINVAL;
860 		goto done;
861 	}
862 
863 	msqptr = &msqids[msqid];
864 	if (msqptr->msg_qbytes == 0) {
865 #ifdef MSG_DEBUG_OK
866 		kprintf("no such message queue id\n");
867 #endif
868 		eval = EINVAL;
869 		goto done;
870 	}
871 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
872 #ifdef MSG_DEBUG_OK
873 		kprintf("wrong sequence number\n");
874 #endif
875 		eval = EINVAL;
876 		goto done;
877 	}
878 
879 	if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, IPC_R))) {
880 #ifdef MSG_DEBUG_OK
881 		kprintf("requester doesn't have read access\n");
882 #endif
883 		goto done;
884 	}
885 
886 	msghdr = NULL;
887 	while (msghdr == NULL) {
888 		if (msgtyp == 0) {
889 			msghdr = msqptr->msg_first;
890 			if (msghdr != NULL) {
891 				if (msgsz < msghdr->msg_ts &&
892 				    (msgflg & MSG_NOERROR) == 0) {
893 #ifdef MSG_DEBUG_OK
894 					kprintf("first message on the queue is too big (want %d, got %d)\n",
895 					    msgsz, msghdr->msg_ts);
896 #endif
897 					eval = E2BIG;
898 					goto done;
899 				}
900 				if (msqptr->msg_first == msqptr->msg_last) {
901 					msqptr->msg_first = NULL;
902 					msqptr->msg_last = NULL;
903 				} else {
904 					msqptr->msg_first = msghdr->msg_next;
905 					if (msqptr->msg_first == NULL)
906 						panic("msg_first/last screwed up #1");
907 				}
908 			}
909 		} else {
910 			struct msg *previous;
911 			struct msg **prev;
912 
913 			previous = NULL;
914 			prev = &(msqptr->msg_first);
915 			while ((msghdr = *prev) != NULL) {
916 				/*
917 				 * Is this message's type an exact match or is
918 				 * this message's type less than or equal to
919 				 * the absolute value of a negative msgtyp?
920 				 * Note that the second half of this test can
921 				 * NEVER be true if msgtyp is positive since
922 				 * msg_type is always positive!
923 				 */
924 
925 				if (msgtyp == msghdr->msg_type ||
926 				    msghdr->msg_type <= -msgtyp) {
927 #ifdef MSG_DEBUG_OK
928 					kprintf("found message type %d, requested %d\n",
929 					    msghdr->msg_type, msgtyp);
930 #endif
931 					if (msgsz < msghdr->msg_ts &&
932 					    (msgflg & MSG_NOERROR) == 0) {
933 #ifdef MSG_DEBUG_OK
934 						kprintf("requested message on the queue is too big (want %d, got %d)\n",
935 						    msgsz, msghdr->msg_ts);
936 #endif
937 						eval = E2BIG;
938 						goto done;
939 					}
940 					*prev = msghdr->msg_next;
941 					if (msghdr == msqptr->msg_last) {
942 						if (previous == NULL) {
943 							if (prev !=
944 							    &msqptr->msg_first)
945 								panic("msg_first/last screwed up #2");
946 							msqptr->msg_first =
947 							    NULL;
948 							msqptr->msg_last =
949 							    NULL;
950 						} else {
951 							if (prev ==
952 							    &msqptr->msg_first)
953 								panic("msg_first/last screwed up #3");
954 							msqptr->msg_last =
955 							    previous;
956 						}
957 					}
958 					break;
959 				}
960 				previous = msghdr;
961 				prev = &(msghdr->msg_next);
962 			}
963 		}
964 
965 		/*
966 		 * We've either extracted the msghdr for the appropriate
967 		 * message or there isn't one.
968 		 * If there is one then bail out of this loop.
969 		 */
970 
971 		if (msghdr != NULL)
972 			break;
973 
974 		/*
975 		 * Hmph!  No message found.  Does the user want to wait?
976 		 */
977 
978 		if ((msgflg & IPC_NOWAIT) != 0) {
979 #ifdef MSG_DEBUG_OK
980 			kprintf("no appropriate message found (msgtyp=%d)\n",
981 			    msgtyp);
982 #endif
983 			/* The SVID says to return ENOMSG. */
984 #ifdef ENOMSG
985 			eval = ENOMSG;
986 #else
987 			/* Unfortunately, BSD doesn't define that code yet! */
988 			eval = EAGAIN;
989 #endif
990 			goto done;
991 		}
992 
993 		/*
994 		 * Wait for something to happen
995 		 */
996 
997 #ifdef MSG_DEBUG_OK
998 		kprintf("msgrcv:  goodnight\n");
999 #endif
1000 		eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
1001 #ifdef MSG_DEBUG_OK
1002 		kprintf("msgrcv:  good morning (eval=%d)\n", eval);
1003 #endif
1004 
1005 		if (eval != 0) {
1006 #ifdef MSG_DEBUG_OK
1007 			kprintf("msgsnd:  interrupted system call\n");
1008 #endif
1009 			eval = EINTR;
1010 			goto done;
1011 		}
1012 
1013 		/*
1014 		 * Make sure that the msq queue still exists
1015 		 */
1016 
1017 		if (msqptr->msg_qbytes == 0 ||
1018 		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1019 #ifdef MSG_DEBUG_OK
1020 			kprintf("msqid deleted\n");
1021 #endif
1022 			eval = EIDRM;
1023 			goto done;
1024 		}
1025 	}
1026 
1027 	/*
1028 	 * Return the message to the user.
1029 	 *
1030 	 * First, do the bookkeeping (before we risk being interrupted).
1031 	 */
1032 
1033 	msqptr->msg_cbytes -= msghdr->msg_ts;
1034 	msqptr->msg_qnum--;
1035 	msqptr->msg_lrpid = td->td_proc->p_pid;
1036 	msqptr->msg_rtime = time_second;
1037 
1038 	/*
1039 	 * Make msgsz the actual amount that we'll be returning.
1040 	 * Note that this effectively truncates the message if it is too long
1041 	 * (since msgsz is never increased).
1042 	 */
1043 
1044 #ifdef MSG_DEBUG_OK
1045 	kprintf("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1046 	    msghdr->msg_ts);
1047 #endif
1048 	if (msgsz > msghdr->msg_ts)
1049 		msgsz = msghdr->msg_ts;
1050 
1051 	/*
1052 	 * Return the type to the user.
1053 	 */
1054 
1055 	eval = copyout((caddr_t)&(msghdr->msg_type), user_msgp,
1056 	    sizeof(msghdr->msg_type));
1057 	if (eval != 0) {
1058 #ifdef MSG_DEBUG_OK
1059 		kprintf("error (%d) copying out message type\n", eval);
1060 #endif
1061 		msg_freehdr(msghdr);
1062 		wakeup((caddr_t)msqptr);
1063 		goto done;
1064 	}
1065 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1066 
1067 	/*
1068 	 * Return the segments to the user
1069 	 */
1070 
1071 	next = msghdr->msg_spot;
1072 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1073 		size_t tlen;
1074 
1075 		if (msgsz - len > msginfo.msgssz)
1076 			tlen = msginfo.msgssz;
1077 		else
1078 			tlen = msgsz - len;
1079 		if (next <= -1)
1080 			panic("next too low #3");
1081 		if (next >= msginfo.msgseg)
1082 			panic("next out of range #3");
1083 		eval = copyout((caddr_t)&msgpool[next * msginfo.msgssz],
1084 		    user_msgp, tlen);
1085 		if (eval != 0) {
1086 #ifdef MSG_DEBUG_OK
1087 			kprintf("error (%d) copying out message segment\n",
1088 			    eval);
1089 #endif
1090 			msg_freehdr(msghdr);
1091 			wakeup((caddr_t)msqptr);
1092 			goto done;
1093 		}
1094 		user_msgp = (char *)user_msgp + tlen;
1095 		next = msgmaps[next].next;
1096 	}
1097 
1098 	/*
1099 	 * Done, return the actual number of bytes copied out.
1100 	 */
1101 
1102 	msg_freehdr(msghdr);
1103 	wakeup((caddr_t)msqptr);
1104 	eval = 0;
1105 done:
1106 	rel_mplock();
1107 	if (eval == 0)
1108 		uap->sysmsg_result = msgsz;
1109 	return(eval);
1110 }
1111 
1112 static int
1113 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1114 {
1115 
1116 	return (SYSCTL_OUT(req, msqids,
1117 	    sizeof(struct msqid_ds) * msginfo.msgmni));
1118 }
1119 
1120 TUNABLE_INT("kern.ipc.msgseg", &msginfo.msgseg);
1121 TUNABLE_INT("kern.ipc.msgssz", &msginfo.msgssz);
1122 TUNABLE_INT("kern.ipc.msgmni", &msginfo.msgmni);
1123 
1124 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1125 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RD, &msginfo.msgmni, 0, "");
1126 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0, "");
1127 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0, "");
1128 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RD, &msginfo.msgssz, 0, "");
1129 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RD, &msginfo.msgseg, 0, "");
1130 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1131     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1132