xref: /dragonfly/sys/kern/sysv_msg.c (revision 8a7bdfea)
1 /* $FreeBSD: src/sys/kern/sysv_msg.c,v 1.23.2.5 2002/12/31 08:54:53 maxim Exp $ */
2 /* $DragonFly: src/sys/kern/sysv_msg.c,v 1.18 2008/01/06 16:55:51 swildner Exp $ */
3 
4 /*
5  * Implementation of SVID messages
6  *
7  * Author:  Daniel Boulet
8  *
9  * Copyright 1993 Daniel Boulet and RTMX Inc.
10  *
11  * This system call was implemented by Daniel Boulet under contract from RTMX.
12  *
13  * Redistribution and use in source forms, with and without modification,
14  * are permitted provided that this entire comment appears intact.
15  *
16  * Redistribution in binary form may occur without any restrictions.
17  * Obviously, it would be nice if you gave credit where credit is due
18  * but requiring it would be too onerous.
19  *
20  * This software is provided ``AS IS'' without any warranties of any kind.
21  */
22 
23 #include "opt_sysvipc.h"
24 
25 #include <sys/param.h>
26 #include <sys/systm.h>
27 #include <sys/sysproto.h>
28 #include <sys/kernel.h>
29 #include <sys/proc.h>
30 #include <sys/msg.h>
31 #include <sys/sysent.h>
32 #include <sys/sysctl.h>
33 #include <sys/malloc.h>
34 #include <sys/jail.h>
35 
36 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
37 
38 static void msginit (void *);
39 
40 #define MSG_DEBUG
41 #undef MSG_DEBUG_OK
42 
43 static void msg_freehdr (struct msg *msghdr);
44 
45 /* XXX casting to (sy_call_t *) is bogus, as usual. */
46 static sy_call_t *msgcalls[] = {
47 	(sy_call_t *)sys_msgctl, (sy_call_t *)sys_msgget,
48 	(sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv
49 };
50 
51 struct msg {
52 	struct	msg *msg_next;	/* next msg in the chain */
53 	long	msg_type;	/* type of this message */
54     				/* >0 -> type of this message */
55     				/* 0 -> free header */
56 	u_short	msg_ts;		/* size of this message */
57 	short	msg_spot;	/* location of start of msg in buffer */
58 };
59 
60 
61 #ifndef MSGSSZ
62 #define MSGSSZ	8		/* Each segment must be 2^N long */
63 #endif
64 #ifndef MSGSEG
65 #define MSGSEG	2048		/* must be less than 32767 */
66 #endif
67 #define MSGMAX	(MSGSSZ*MSGSEG)
68 #ifndef MSGMNB
69 #define MSGMNB	2048		/* max # of bytes in a queue */
70 #endif
71 #ifndef MSGMNI
72 #define MSGMNI	40
73 #endif
74 #ifndef MSGTQL
75 #define MSGTQL	40
76 #endif
77 
78 /*
79  * Based on the configuration parameters described in an SVR2 (yes, two)
80  * config(1m) man page.
81  *
82  * Each message is broken up and stored in segments that are msgssz bytes
83  * long.  For efficiency reasons, this should be a power of two.  Also,
84  * it doesn't make sense if it is less than 8 or greater than about 256.
85  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
86  * two between 8 and 1024 inclusive (and panic's if it isn't).
87  */
88 struct msginfo msginfo = {
89                 MSGMAX,         /* max chars in a message */
90                 MSGMNI,         /* # of message queue identifiers */
91                 MSGMNB,         /* max chars in a queue */
92                 MSGTQL,         /* max messages in system */
93                 MSGSSZ,         /* size of a message segment */
94                 		/* (must be small power of 2 greater than 4) */
95                 MSGSEG          /* number of message segments */
96 };
97 
98 /*
99  * macros to convert between msqid_ds's and msqid's.
100  * (specific to this implementation)
101  */
102 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
103 #define MSQID_IX(id)	((id) & 0xffff)
104 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
105 
106 /*
107  * The rest of this file is specific to this particular implementation.
108  */
109 
110 struct msgmap {
111 	short	next;		/* next segment in buffer */
112     				/* -1 -> available */
113     				/* 0..(MSGSEG-1) -> index of next segment */
114 };
115 
116 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
117 
118 static int nfree_msgmaps;	/* # of free map entries */
119 static short free_msgmaps;	/* head of linked list of free map entries */
120 static struct msg *free_msghdrs;/* list of free msg headers */
121 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
122 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
123 static struct msg *msghdrs;	/* MSGTQL msg headers */
124 static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
125 
126 static void
127 msginit(void *dummy)
128 {
129 	int i;
130 
131 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
132 	msgpool = kmalloc(msginfo.msgmax, M_MSG, M_WAITOK);
133 	msgmaps = kmalloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
134 	msghdrs = kmalloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
135 	msqids = kmalloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
136 
137 	/*
138 	 * msginfo.msgssz should be a power of two for efficiency reasons.
139 	 * It is also pretty silly if msginfo.msgssz is less than 8
140 	 * or greater than about 256 so ...
141 	 */
142 
143 	i = 8;
144 	while (i < 1024 && i != msginfo.msgssz)
145 		i <<= 1;
146     	if (i != msginfo.msgssz) {
147 		kprintf("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
148 		    msginfo.msgssz);
149 		panic("msginfo.msgssz not a small power of 2");
150 	}
151 
152 	if (msginfo.msgseg > 32767) {
153 		kprintf("msginfo.msgseg=%d\n", msginfo.msgseg);
154 		panic("msginfo.msgseg > 32767");
155 	}
156 
157 	if (msgmaps == NULL)
158 		panic("msgmaps is NULL");
159 
160 	for (i = 0; i < msginfo.msgseg; i++) {
161 		if (i > 0)
162 			msgmaps[i-1].next = i;
163 		msgmaps[i].next = -1;	/* implies entry is available */
164 	}
165 	free_msgmaps = 0;
166 	nfree_msgmaps = msginfo.msgseg;
167 
168 	if (msghdrs == NULL)
169 		panic("msghdrs is NULL");
170 
171 	for (i = 0; i < msginfo.msgtql; i++) {
172 		msghdrs[i].msg_type = 0;
173 		if (i > 0)
174 			msghdrs[i-1].msg_next = &msghdrs[i];
175 		msghdrs[i].msg_next = NULL;
176     	}
177 	free_msghdrs = &msghdrs[0];
178 
179 	if (msqids == NULL)
180 		panic("msqids is NULL");
181 
182 	for (i = 0; i < msginfo.msgmni; i++) {
183 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
184 		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
185 		msqids[i].msg_perm.mode = 0;
186 	}
187 }
188 SYSINIT(sysv_msg, SI_SUB_SYSV_MSG, SI_ORDER_FIRST, msginit, NULL)
189 
190 /*
191  * Entry point for all MSG calls
192  *
193  * msgsys_args(int which, int a2, ...) (VARARGS)
194  */
195 int
196 sys_msgsys(struct msgsys_args *uap)
197 {
198 	struct proc *p = curproc;
199 	unsigned int which = (unsigned int)uap->which;
200 
201 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
202 		return (ENOSYS);
203 
204 	if (which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
205 		return (EINVAL);
206 	bcopy(&uap->a2, &uap->which,
207 	    sizeof(struct msgsys_args) - offsetof(struct msgsys_args, a2));
208 	return ((*msgcalls[which])(uap));
209 }
210 
211 static void
212 msg_freehdr(struct msg *msghdr)
213 {
214 	while (msghdr->msg_ts > 0) {
215 		short next;
216 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
217 			panic("msghdr->msg_spot out of range");
218 		next = msgmaps[msghdr->msg_spot].next;
219 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
220 		free_msgmaps = msghdr->msg_spot;
221 		nfree_msgmaps++;
222 		msghdr->msg_spot = next;
223 		if (msghdr->msg_ts >= msginfo.msgssz)
224 			msghdr->msg_ts -= msginfo.msgssz;
225 		else
226 			msghdr->msg_ts = 0;
227 	}
228 	if (msghdr->msg_spot != -1)
229 		panic("msghdr->msg_spot != -1");
230 	msghdr->msg_next = free_msghdrs;
231 	free_msghdrs = msghdr;
232 }
233 
234 int
235 sys_msgctl(struct msgctl_args *uap)
236 {
237 	struct thread *td = curthread;
238 	struct proc *p = td->td_proc;
239 	int msqid = uap->msqid;
240 	int cmd = uap->cmd;
241 	struct msqid_ds *user_msqptr = uap->buf;
242 	int rval, eval;
243 	struct msqid_ds msqbuf;
244 	struct msqid_ds *msqptr;
245 
246 #ifdef MSG_DEBUG_OK
247 	kprintf("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr);
248 #endif
249 
250 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
251 		return (ENOSYS);
252 
253 	msqid = IPCID_TO_IX(msqid);
254 
255 	if (msqid < 0 || msqid >= msginfo.msgmni) {
256 #ifdef MSG_DEBUG_OK
257 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
258 		    msginfo.msgmni);
259 #endif
260 		return(EINVAL);
261 	}
262 
263 	msqptr = &msqids[msqid];
264 
265 	if (msqptr->msg_qbytes == 0) {
266 #ifdef MSG_DEBUG_OK
267 		kprintf("no such msqid\n");
268 #endif
269 		return(EINVAL);
270 	}
271 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
272 #ifdef MSG_DEBUG_OK
273 		kprintf("wrong sequence number\n");
274 #endif
275 		return(EINVAL);
276 	}
277 
278 	eval = 0;
279 	rval = 0;
280 
281 	switch (cmd) {
282 
283 	case IPC_RMID:
284 	{
285 		struct msg *msghdr;
286 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)))
287 			return(eval);
288 		/* Free the message headers */
289 		msghdr = msqptr->msg_first;
290 		while (msghdr != NULL) {
291 			struct msg *msghdr_tmp;
292 
293 			/* Free the segments of each message */
294 			msqptr->msg_cbytes -= msghdr->msg_ts;
295 			msqptr->msg_qnum--;
296 			msghdr_tmp = msghdr;
297 			msghdr = msghdr->msg_next;
298 			msg_freehdr(msghdr_tmp);
299 		}
300 
301 		if (msqptr->msg_cbytes != 0)
302 			panic("msg_cbytes is screwed up");
303 		if (msqptr->msg_qnum != 0)
304 			panic("msg_qnum is screwed up");
305 
306 		msqptr->msg_qbytes = 0;	/* Mark it as free */
307 
308 		wakeup((caddr_t)msqptr);
309 	}
310 
311 		break;
312 
313 	case IPC_SET:
314 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)))
315 			return(eval);
316 		if ((eval = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
317 			return(eval);
318 		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
319 			eval = suser(td);
320 			if (eval)
321 				return(eval);
322 		}
323 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
324 #ifdef MSG_DEBUG_OK
325 			kprintf("can't increase msg_qbytes beyond %d (truncating)\n",
326 			    msginfo.msgmnb);
327 #endif
328 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
329 		}
330 		if (msqbuf.msg_qbytes == 0) {
331 #ifdef MSG_DEBUG_OK
332 			kprintf("can't reduce msg_qbytes to 0\n");
333 #endif
334 			return(EINVAL);		/* non-standard errno! */
335 		}
336 		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
337 		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
338 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
339 		    (msqbuf.msg_perm.mode & 0777);
340 		msqptr->msg_qbytes = msqbuf.msg_qbytes;
341 		msqptr->msg_ctime = time_second;
342 		break;
343 
344 	case IPC_STAT:
345 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_R))) {
346 #ifdef MSG_DEBUG_OK
347 			kprintf("requester doesn't have read access\n");
348 #endif
349 			return(eval);
350 		}
351 		eval = copyout((caddr_t)msqptr, user_msqptr,
352 		    sizeof(struct msqid_ds));
353 		break;
354 
355 	default:
356 #ifdef MSG_DEBUG_OK
357 		kprintf("invalid command %d\n", cmd);
358 #endif
359 		return(EINVAL);
360 	}
361 
362 	if (eval == 0)
363 		uap->sysmsg_result = rval;
364 	return(eval);
365 }
366 
367 int
368 sys_msgget(struct msgget_args *uap)
369 {
370 	struct proc *p = curproc;
371 	int msqid, eval;
372 	int key = uap->key;
373 	int msgflg = uap->msgflg;
374 	struct ucred *cred = p->p_ucred;
375 	struct msqid_ds *msqptr = NULL;
376 
377 #ifdef MSG_DEBUG_OK
378 	kprintf("msgget(0x%x, 0%o)\n", key, msgflg);
379 #endif
380 
381 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
382 		return (ENOSYS);
383 
384 	if (key != IPC_PRIVATE) {
385 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
386 			msqptr = &msqids[msqid];
387 			if (msqptr->msg_qbytes != 0 &&
388 			    msqptr->msg_perm.key == key)
389 				break;
390 		}
391 		if (msqid < msginfo.msgmni) {
392 #ifdef MSG_DEBUG_OK
393 			kprintf("found public key\n");
394 #endif
395 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
396 #ifdef MSG_DEBUG_OK
397 				kprintf("not exclusive\n");
398 #endif
399 				return(EEXIST);
400 			}
401 			if ((eval = ipcperm(p, &msqptr->msg_perm, msgflg & 0700 ))) {
402 #ifdef MSG_DEBUG_OK
403 				kprintf("requester doesn't have 0%o access\n",
404 				    msgflg & 0700);
405 #endif
406 				return(eval);
407 			}
408 			goto found;
409 		}
410 	}
411 
412 #ifdef MSG_DEBUG_OK
413 	kprintf("need to allocate the msqid_ds\n");
414 #endif
415 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
416 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
417 			/*
418 			 * Look for an unallocated and unlocked msqid_ds.
419 			 * msqid_ds's can be locked by msgsnd or msgrcv while
420 			 * they are copying the message in/out.  We can't
421 			 * re-use the entry until they release it.
422 			 */
423 			msqptr = &msqids[msqid];
424 			if (msqptr->msg_qbytes == 0 &&
425 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
426 				break;
427 		}
428 		if (msqid == msginfo.msgmni) {
429 #ifdef MSG_DEBUG_OK
430 			kprintf("no more msqid_ds's available\n");
431 #endif
432 			return(ENOSPC);
433 		}
434 #ifdef MSG_DEBUG_OK
435 		kprintf("msqid %d is available\n", msqid);
436 #endif
437 		msqptr->msg_perm.key = key;
438 		msqptr->msg_perm.cuid = cred->cr_uid;
439 		msqptr->msg_perm.uid = cred->cr_uid;
440 		msqptr->msg_perm.cgid = cred->cr_gid;
441 		msqptr->msg_perm.gid = cred->cr_gid;
442 		msqptr->msg_perm.mode = (msgflg & 0777);
443 		/* Make sure that the returned msqid is unique */
444 		msqptr->msg_perm.seq = (msqptr->msg_perm.seq + 1) & 0x7fff;
445 		msqptr->msg_first = NULL;
446 		msqptr->msg_last = NULL;
447 		msqptr->msg_cbytes = 0;
448 		msqptr->msg_qnum = 0;
449 		msqptr->msg_qbytes = msginfo.msgmnb;
450 		msqptr->msg_lspid = 0;
451 		msqptr->msg_lrpid = 0;
452 		msqptr->msg_stime = 0;
453 		msqptr->msg_rtime = 0;
454 		msqptr->msg_ctime = time_second;
455 	} else {
456 #ifdef MSG_DEBUG_OK
457 		kprintf("didn't find it and wasn't asked to create it\n");
458 #endif
459 		return(ENOENT);
460 	}
461 
462 found:
463 	/* Construct the unique msqid */
464 	uap->sysmsg_result = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
465 	return(0);
466 }
467 
468 int
469 sys_msgsnd(struct msgsnd_args *uap)
470 {
471 	struct proc *p = curproc;
472 	int msqid = uap->msqid;
473 	void *user_msgp = uap->msgp;
474 	size_t msgsz = uap->msgsz;
475 	int msgflg = uap->msgflg;
476 	int segs_needed, eval;
477 	struct msqid_ds *msqptr;
478 	struct msg *msghdr;
479 	short next;
480 
481 #ifdef MSG_DEBUG_OK
482 	kprintf("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
483 	    msgflg);
484 #endif
485 
486 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
487 		return (ENOSYS);
488 
489 	msqid = IPCID_TO_IX(msqid);
490 
491 	if (msqid < 0 || msqid >= msginfo.msgmni) {
492 #ifdef MSG_DEBUG_OK
493 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
494 		    msginfo.msgmni);
495 #endif
496 		return(EINVAL);
497 	}
498 
499 	msqptr = &msqids[msqid];
500 	if (msqptr->msg_qbytes == 0) {
501 #ifdef MSG_DEBUG_OK
502 		kprintf("no such message queue id\n");
503 #endif
504 		return(EINVAL);
505 	}
506 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
507 #ifdef MSG_DEBUG_OK
508 		kprintf("wrong sequence number\n");
509 #endif
510 		return(EINVAL);
511 	}
512 
513 	if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_W))) {
514 #ifdef MSG_DEBUG_OK
515 		kprintf("requester doesn't have write access\n");
516 #endif
517 		return(eval);
518 	}
519 
520 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
521 #ifdef MSG_DEBUG_OK
522 	kprintf("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
523 	    segs_needed);
524 #endif
525 	for (;;) {
526 		int need_more_resources = 0;
527 
528 		/*
529 		 * check msgsz
530 		 * (inside this loop in case msg_qbytes changes while we sleep)
531 		 */
532 
533 		if (msgsz > msqptr->msg_qbytes) {
534 #ifdef MSG_DEBUG_OK
535 			kprintf("msgsz > msqptr->msg_qbytes\n");
536 #endif
537 			return(EINVAL);
538 		}
539 
540 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
541 #ifdef MSG_DEBUG_OK
542 			kprintf("msqid is locked\n");
543 #endif
544 			need_more_resources = 1;
545 		}
546 		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
547 #ifdef MSG_DEBUG_OK
548 			kprintf("msgsz + msg_cbytes > msg_qbytes\n");
549 #endif
550 			need_more_resources = 1;
551 		}
552 		if (segs_needed > nfree_msgmaps) {
553 #ifdef MSG_DEBUG_OK
554 			kprintf("segs_needed > nfree_msgmaps\n");
555 #endif
556 			need_more_resources = 1;
557 		}
558 		if (free_msghdrs == NULL) {
559 #ifdef MSG_DEBUG_OK
560 			kprintf("no more msghdrs\n");
561 #endif
562 			need_more_resources = 1;
563 		}
564 
565 		if (need_more_resources) {
566 			int we_own_it;
567 
568 			if ((msgflg & IPC_NOWAIT) != 0) {
569 #ifdef MSG_DEBUG_OK
570 				kprintf("need more resources but caller doesn't want to wait\n");
571 #endif
572 				return(EAGAIN);
573 			}
574 
575 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
576 #ifdef MSG_DEBUG_OK
577 				kprintf("we don't own the msqid_ds\n");
578 #endif
579 				we_own_it = 0;
580 			} else {
581 				/* Force later arrivals to wait for our
582 				   request */
583 #ifdef MSG_DEBUG_OK
584 				kprintf("we own the msqid_ds\n");
585 #endif
586 				msqptr->msg_perm.mode |= MSG_LOCKED;
587 				we_own_it = 1;
588 			}
589 #ifdef MSG_DEBUG_OK
590 			kprintf("goodnight\n");
591 #endif
592 			eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
593 #ifdef MSG_DEBUG_OK
594 			kprintf("good morning, eval=%d\n", eval);
595 #endif
596 			if (we_own_it)
597 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
598 			if (eval != 0) {
599 #ifdef MSG_DEBUG_OK
600 				kprintf("msgsnd:  interrupted system call\n");
601 #endif
602 				return(EINTR);
603 			}
604 
605 			/*
606 			 * Make sure that the msq queue still exists
607 			 */
608 
609 			if (msqptr->msg_qbytes == 0) {
610 #ifdef MSG_DEBUG_OK
611 				kprintf("msqid deleted\n");
612 #endif
613 				return(EIDRM);
614 			}
615 
616 		} else {
617 #ifdef MSG_DEBUG_OK
618 			kprintf("got all the resources that we need\n");
619 #endif
620 			break;
621 		}
622 	}
623 
624 	/*
625 	 * We have the resources that we need.
626 	 * Make sure!
627 	 */
628 
629 	if (msqptr->msg_perm.mode & MSG_LOCKED)
630 		panic("msg_perm.mode & MSG_LOCKED");
631 	if (segs_needed > nfree_msgmaps)
632 		panic("segs_needed > nfree_msgmaps");
633 	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
634 		panic("msgsz + msg_cbytes > msg_qbytes");
635 	if (free_msghdrs == NULL)
636 		panic("no more msghdrs");
637 
638 	/*
639 	 * Re-lock the msqid_ds in case we page-fault when copying in the
640 	 * message
641 	 */
642 
643 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
644 		panic("msqid_ds is already locked");
645 	msqptr->msg_perm.mode |= MSG_LOCKED;
646 
647 	/*
648 	 * Allocate a message header
649 	 */
650 
651 	msghdr = free_msghdrs;
652 	free_msghdrs = msghdr->msg_next;
653 	msghdr->msg_spot = -1;
654 	msghdr->msg_ts = msgsz;
655 
656 	/*
657 	 * Allocate space for the message
658 	 */
659 
660 	while (segs_needed > 0) {
661 		if (nfree_msgmaps <= 0)
662 			panic("not enough msgmaps");
663 		if (free_msgmaps == -1)
664 			panic("nil free_msgmaps");
665 		next = free_msgmaps;
666 		if (next <= -1)
667 			panic("next too low #1");
668 		if (next >= msginfo.msgseg)
669 			panic("next out of range #1");
670 #ifdef MSG_DEBUG_OK
671 		kprintf("allocating segment %d to message\n", next);
672 #endif
673 		free_msgmaps = msgmaps[next].next;
674 		nfree_msgmaps--;
675 		msgmaps[next].next = msghdr->msg_spot;
676 		msghdr->msg_spot = next;
677 		segs_needed--;
678 	}
679 
680 	/*
681 	 * Copy in the message type
682 	 */
683 
684 	if ((eval = copyin(user_msgp, &msghdr->msg_type,
685 	    sizeof(msghdr->msg_type))) != 0) {
686 #ifdef MSG_DEBUG_OK
687 		kprintf("error %d copying the message type\n", eval);
688 #endif
689 		msg_freehdr(msghdr);
690 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
691 		wakeup((caddr_t)msqptr);
692 		return(eval);
693 	}
694 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
695 
696 	/*
697 	 * Validate the message type
698 	 */
699 
700 	if (msghdr->msg_type < 1) {
701 		msg_freehdr(msghdr);
702 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
703 		wakeup((caddr_t)msqptr);
704 #ifdef MSG_DEBUG_OK
705 		kprintf("mtype (%d) < 1\n", msghdr->msg_type);
706 #endif
707 		return(EINVAL);
708 	}
709 
710 	/*
711 	 * Copy in the message body
712 	 */
713 
714 	next = msghdr->msg_spot;
715 	while (msgsz > 0) {
716 		size_t tlen;
717 		if (msgsz > msginfo.msgssz)
718 			tlen = msginfo.msgssz;
719 		else
720 			tlen = msgsz;
721 		if (next <= -1)
722 			panic("next too low #2");
723 		if (next >= msginfo.msgseg)
724 			panic("next out of range #2");
725 		if ((eval = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
726 		    tlen)) != 0) {
727 #ifdef MSG_DEBUG_OK
728 			kprintf("error %d copying in message segment\n", eval);
729 #endif
730 			msg_freehdr(msghdr);
731 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
732 			wakeup((caddr_t)msqptr);
733 			return(eval);
734 		}
735 		msgsz -= tlen;
736 		user_msgp = (char *)user_msgp + tlen;
737 		next = msgmaps[next].next;
738 	}
739 	if (next != -1)
740 		panic("didn't use all the msg segments");
741 
742 	/*
743 	 * We've got the message.  Unlock the msqid_ds.
744 	 */
745 
746 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
747 
748 	/*
749 	 * Make sure that the msqid_ds is still allocated.
750 	 */
751 
752 	if (msqptr->msg_qbytes == 0) {
753 		msg_freehdr(msghdr);
754 		wakeup((caddr_t)msqptr);
755 		return(EIDRM);
756 	}
757 
758 	/*
759 	 * Put the message into the queue
760 	 */
761 
762 	if (msqptr->msg_first == NULL) {
763 		msqptr->msg_first = msghdr;
764 		msqptr->msg_last = msghdr;
765 	} else {
766 		msqptr->msg_last->msg_next = msghdr;
767 		msqptr->msg_last = msghdr;
768 	}
769 	msqptr->msg_last->msg_next = NULL;
770 
771 	msqptr->msg_cbytes += msghdr->msg_ts;
772 	msqptr->msg_qnum++;
773 	msqptr->msg_lspid = p->p_pid;
774 	msqptr->msg_stime = time_second;
775 
776 	wakeup((caddr_t)msqptr);
777 	uap->sysmsg_result = 0;
778 	return(0);
779 }
780 
781 int
782 sys_msgrcv(struct msgrcv_args *uap)
783 {
784 	struct proc *p = curproc;
785 	int msqid = uap->msqid;
786 	void *user_msgp = uap->msgp;
787 	size_t msgsz = uap->msgsz;
788 	long msgtyp = uap->msgtyp;
789 	int msgflg = uap->msgflg;
790 	size_t len;
791 	struct msqid_ds *msqptr;
792 	struct msg *msghdr;
793 	int eval;
794 	short next;
795 
796 #ifdef MSG_DEBUG_OK
797 	kprintf("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
798 	    msgsz, msgtyp, msgflg);
799 #endif
800 
801 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
802 		return (ENOSYS);
803 
804 	msqid = IPCID_TO_IX(msqid);
805 
806 	if (msqid < 0 || msqid >= msginfo.msgmni) {
807 #ifdef MSG_DEBUG_OK
808 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
809 		    msginfo.msgmni);
810 #endif
811 		return(EINVAL);
812 	}
813 
814 	msqptr = &msqids[msqid];
815 	if (msqptr->msg_qbytes == 0) {
816 #ifdef MSG_DEBUG_OK
817 		kprintf("no such message queue id\n");
818 #endif
819 		return(EINVAL);
820 	}
821 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
822 #ifdef MSG_DEBUG_OK
823 		kprintf("wrong sequence number\n");
824 #endif
825 		return(EINVAL);
826 	}
827 
828 	if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_R))) {
829 #ifdef MSG_DEBUG_OK
830 		kprintf("requester doesn't have read access\n");
831 #endif
832 		return(eval);
833 	}
834 
835 	msghdr = NULL;
836 	while (msghdr == NULL) {
837 		if (msgtyp == 0) {
838 			msghdr = msqptr->msg_first;
839 			if (msghdr != NULL) {
840 				if (msgsz < msghdr->msg_ts &&
841 				    (msgflg & MSG_NOERROR) == 0) {
842 #ifdef MSG_DEBUG_OK
843 					kprintf("first message on the queue is too big (want %d, got %d)\n",
844 					    msgsz, msghdr->msg_ts);
845 #endif
846 					return(E2BIG);
847 				}
848 				if (msqptr->msg_first == msqptr->msg_last) {
849 					msqptr->msg_first = NULL;
850 					msqptr->msg_last = NULL;
851 				} else {
852 					msqptr->msg_first = msghdr->msg_next;
853 					if (msqptr->msg_first == NULL)
854 						panic("msg_first/last screwed up #1");
855 				}
856 			}
857 		} else {
858 			struct msg *previous;
859 			struct msg **prev;
860 
861 			previous = NULL;
862 			prev = &(msqptr->msg_first);
863 			while ((msghdr = *prev) != NULL) {
864 				/*
865 				 * Is this message's type an exact match or is
866 				 * this message's type less than or equal to
867 				 * the absolute value of a negative msgtyp?
868 				 * Note that the second half of this test can
869 				 * NEVER be true if msgtyp is positive since
870 				 * msg_type is always positive!
871 				 */
872 
873 				if (msgtyp == msghdr->msg_type ||
874 				    msghdr->msg_type <= -msgtyp) {
875 #ifdef MSG_DEBUG_OK
876 					kprintf("found message type %d, requested %d\n",
877 					    msghdr->msg_type, msgtyp);
878 #endif
879 					if (msgsz < msghdr->msg_ts &&
880 					    (msgflg & MSG_NOERROR) == 0) {
881 #ifdef MSG_DEBUG_OK
882 						kprintf("requested message on the queue is too big (want %d, got %d)\n",
883 						    msgsz, msghdr->msg_ts);
884 #endif
885 						return(E2BIG);
886 					}
887 					*prev = msghdr->msg_next;
888 					if (msghdr == msqptr->msg_last) {
889 						if (previous == NULL) {
890 							if (prev !=
891 							    &msqptr->msg_first)
892 								panic("msg_first/last screwed up #2");
893 							msqptr->msg_first =
894 							    NULL;
895 							msqptr->msg_last =
896 							    NULL;
897 						} else {
898 							if (prev ==
899 							    &msqptr->msg_first)
900 								panic("msg_first/last screwed up #3");
901 							msqptr->msg_last =
902 							    previous;
903 						}
904 					}
905 					break;
906 				}
907 				previous = msghdr;
908 				prev = &(msghdr->msg_next);
909 			}
910 		}
911 
912 		/*
913 		 * We've either extracted the msghdr for the appropriate
914 		 * message or there isn't one.
915 		 * If there is one then bail out of this loop.
916 		 */
917 
918 		if (msghdr != NULL)
919 			break;
920 
921 		/*
922 		 * Hmph!  No message found.  Does the user want to wait?
923 		 */
924 
925 		if ((msgflg & IPC_NOWAIT) != 0) {
926 #ifdef MSG_DEBUG_OK
927 			kprintf("no appropriate message found (msgtyp=%d)\n",
928 			    msgtyp);
929 #endif
930 			/* The SVID says to return ENOMSG. */
931 #ifdef ENOMSG
932 			return(ENOMSG);
933 #else
934 			/* Unfortunately, BSD doesn't define that code yet! */
935 			return(EAGAIN);
936 #endif
937 		}
938 
939 		/*
940 		 * Wait for something to happen
941 		 */
942 
943 #ifdef MSG_DEBUG_OK
944 		kprintf("msgrcv:  goodnight\n");
945 #endif
946 		eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
947 #ifdef MSG_DEBUG_OK
948 		kprintf("msgrcv:  good morning (eval=%d)\n", eval);
949 #endif
950 
951 		if (eval != 0) {
952 #ifdef MSG_DEBUG_OK
953 			kprintf("msgsnd:  interrupted system call\n");
954 #endif
955 			return(EINTR);
956 		}
957 
958 		/*
959 		 * Make sure that the msq queue still exists
960 		 */
961 
962 		if (msqptr->msg_qbytes == 0 ||
963 		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
964 #ifdef MSG_DEBUG_OK
965 			kprintf("msqid deleted\n");
966 #endif
967 			return(EIDRM);
968 		}
969 	}
970 
971 	/*
972 	 * Return the message to the user.
973 	 *
974 	 * First, do the bookkeeping (before we risk being interrupted).
975 	 */
976 
977 	msqptr->msg_cbytes -= msghdr->msg_ts;
978 	msqptr->msg_qnum--;
979 	msqptr->msg_lrpid = p->p_pid;
980 	msqptr->msg_rtime = time_second;
981 
982 	/*
983 	 * Make msgsz the actual amount that we'll be returning.
984 	 * Note that this effectively truncates the message if it is too long
985 	 * (since msgsz is never increased).
986 	 */
987 
988 #ifdef MSG_DEBUG_OK
989 	kprintf("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
990 	    msghdr->msg_ts);
991 #endif
992 	if (msgsz > msghdr->msg_ts)
993 		msgsz = msghdr->msg_ts;
994 
995 	/*
996 	 * Return the type to the user.
997 	 */
998 
999 	eval = copyout((caddr_t)&(msghdr->msg_type), user_msgp,
1000 	    sizeof(msghdr->msg_type));
1001 	if (eval != 0) {
1002 #ifdef MSG_DEBUG_OK
1003 		kprintf("error (%d) copying out message type\n", eval);
1004 #endif
1005 		msg_freehdr(msghdr);
1006 		wakeup((caddr_t)msqptr);
1007 		return(eval);
1008 	}
1009 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1010 
1011 	/*
1012 	 * Return the segments to the user
1013 	 */
1014 
1015 	next = msghdr->msg_spot;
1016 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1017 		size_t tlen;
1018 
1019 		if (msgsz - len > msginfo.msgssz)
1020 			tlen = msginfo.msgssz;
1021 		else
1022 			tlen = msgsz - len;
1023 		if (next <= -1)
1024 			panic("next too low #3");
1025 		if (next >= msginfo.msgseg)
1026 			panic("next out of range #3");
1027 		eval = copyout((caddr_t)&msgpool[next * msginfo.msgssz],
1028 		    user_msgp, tlen);
1029 		if (eval != 0) {
1030 #ifdef MSG_DEBUG_OK
1031 			kprintf("error (%d) copying out message segment\n",
1032 			    eval);
1033 #endif
1034 			msg_freehdr(msghdr);
1035 			wakeup((caddr_t)msqptr);
1036 			return(eval);
1037 		}
1038 		user_msgp = (char *)user_msgp + tlen;
1039 		next = msgmaps[next].next;
1040 	}
1041 
1042 	/*
1043 	 * Done, return the actual number of bytes copied out.
1044 	 */
1045 
1046 	msg_freehdr(msghdr);
1047 	wakeup((caddr_t)msqptr);
1048 	uap->sysmsg_result = msgsz;
1049 	return(0);
1050 }
1051 
1052 static int
1053 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1054 {
1055 
1056 	return (SYSCTL_OUT(req, msqids,
1057 	    sizeof(struct msqid_ds) * msginfo.msgmni));
1058 }
1059 
1060 TUNABLE_INT("kern.ipc.msgseg", &msginfo.msgseg);
1061 TUNABLE_INT("kern.ipc.msgssz", &msginfo.msgssz);
1062 TUNABLE_INT("kern.ipc.msgmni", &msginfo.msgmni);
1063 
1064 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1065 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RD, &msginfo.msgmni, 0, "");
1066 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0, "");
1067 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0, "");
1068 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RD, &msginfo.msgssz, 0, "");
1069 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RD, &msginfo.msgseg, 0, "");
1070 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1071     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1072