xref: /dragonfly/sys/kern/sysv_msg.c (revision 2038fb68)
1 /* $FreeBSD: src/sys/kern/sysv_msg.c,v 1.23.2.5 2002/12/31 08:54:53 maxim Exp $ */
2 /* $DragonFly: src/sys/kern/sysv_msg.c,v 1.18 2008/01/06 16:55:51 swildner Exp $ */
3 
4 /*
5  * Implementation of SVID messages
6  *
7  * Author:  Daniel Boulet
8  *
9  * Copyright 1993 Daniel Boulet and RTMX Inc.
10  *
11  * This system call was implemented by Daniel Boulet under contract from RTMX.
12  *
13  * Redistribution and use in source forms, with and without modification,
14  * are permitted provided that this entire comment appears intact.
15  *
16  * Redistribution in binary form may occur without any restrictions.
17  * Obviously, it would be nice if you gave credit where credit is due
18  * but requiring it would be too onerous.
19  *
20  * This software is provided ``AS IS'' without any warranties of any kind.
21  */
22 
23 #include "opt_sysvipc.h"
24 
25 #include <sys/param.h>
26 #include <sys/systm.h>
27 #include <sys/sysproto.h>
28 #include <sys/kernel.h>
29 #include <sys/proc.h>
30 #include <sys/priv.h>
31 #include <sys/msg.h>
32 #include <sys/sysent.h>
33 #include <sys/sysctl.h>
34 #include <sys/malloc.h>
35 #include <sys/jail.h>
36 
37 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
38 
39 static void msginit (void *);
40 
41 #define MSG_DEBUG
42 #undef MSG_DEBUG_OK
43 
44 static void msg_freehdr (struct msg *msghdr);
45 
46 /* XXX casting to (sy_call_t *) is bogus, as usual. */
47 static sy_call_t *msgcalls[] = {
48 	(sy_call_t *)sys_msgctl, (sy_call_t *)sys_msgget,
49 	(sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv
50 };
51 
52 struct msg {
53 	struct	msg *msg_next;	/* next msg in the chain */
54 	long	msg_type;	/* type of this message */
55     				/* >0 -> type of this message */
56     				/* 0 -> free header */
57 	u_short	msg_ts;		/* size of this message */
58 	short	msg_spot;	/* location of start of msg in buffer */
59 };
60 
61 
62 #ifndef MSGSSZ
63 #define MSGSSZ	8		/* Each segment must be 2^N long */
64 #endif
65 #ifndef MSGSEG
66 #define MSGSEG	2048		/* must be less than 32767 */
67 #endif
68 #define MSGMAX	(MSGSSZ*MSGSEG)
69 #ifndef MSGMNB
70 #define MSGMNB	2048		/* max # of bytes in a queue */
71 #endif
72 #ifndef MSGMNI
73 #define MSGMNI	40
74 #endif
75 #ifndef MSGTQL
76 #define MSGTQL	40
77 #endif
78 
79 /*
80  * Based on the configuration parameters described in an SVR2 (yes, two)
81  * config(1m) man page.
82  *
83  * Each message is broken up and stored in segments that are msgssz bytes
84  * long.  For efficiency reasons, this should be a power of two.  Also,
85  * it doesn't make sense if it is less than 8 or greater than about 256.
86  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
87  * two between 8 and 1024 inclusive (and panic's if it isn't).
88  */
89 struct msginfo msginfo = {
90                 MSGMAX,         /* max chars in a message */
91                 MSGMNI,         /* # of message queue identifiers */
92                 MSGMNB,         /* max chars in a queue */
93                 MSGTQL,         /* max messages in system */
94                 MSGSSZ,         /* size of a message segment */
95                 		/* (must be small power of 2 greater than 4) */
96                 MSGSEG          /* number of message segments */
97 };
98 
99 /*
100  * macros to convert between msqid_ds's and msqid's.
101  * (specific to this implementation)
102  */
103 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
104 #define MSQID_IX(id)	((id) & 0xffff)
105 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
106 
107 /*
108  * The rest of this file is specific to this particular implementation.
109  */
110 
111 struct msgmap {
112 	short	next;		/* next segment in buffer */
113     				/* -1 -> available */
114     				/* 0..(MSGSEG-1) -> index of next segment */
115 };
116 
117 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
118 
119 static int nfree_msgmaps;	/* # of free map entries */
120 static short free_msgmaps;	/* head of linked list of free map entries */
121 static struct msg *free_msghdrs;/* list of free msg headers */
122 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
123 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
124 static struct msg *msghdrs;	/* MSGTQL msg headers */
125 static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
126 
127 static void
128 msginit(void *dummy)
129 {
130 	int i;
131 
132 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
133 	msgpool = kmalloc(msginfo.msgmax, M_MSG, M_WAITOK);
134 	msgmaps = kmalloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
135 	msghdrs = kmalloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
136 	msqids = kmalloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
137 
138 	/*
139 	 * msginfo.msgssz should be a power of two for efficiency reasons.
140 	 * It is also pretty silly if msginfo.msgssz is less than 8
141 	 * or greater than about 256 so ...
142 	 */
143 
144 	i = 8;
145 	while (i < 1024 && i != msginfo.msgssz)
146 		i <<= 1;
147     	if (i != msginfo.msgssz) {
148 		kprintf("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
149 		    msginfo.msgssz);
150 		panic("msginfo.msgssz not a small power of 2");
151 	}
152 
153 	if (msginfo.msgseg > 32767) {
154 		kprintf("msginfo.msgseg=%d\n", msginfo.msgseg);
155 		panic("msginfo.msgseg > 32767");
156 	}
157 
158 	if (msgmaps == NULL)
159 		panic("msgmaps is NULL");
160 
161 	for (i = 0; i < msginfo.msgseg; i++) {
162 		if (i > 0)
163 			msgmaps[i-1].next = i;
164 		msgmaps[i].next = -1;	/* implies entry is available */
165 	}
166 	free_msgmaps = 0;
167 	nfree_msgmaps = msginfo.msgseg;
168 
169 	if (msghdrs == NULL)
170 		panic("msghdrs is NULL");
171 
172 	for (i = 0; i < msginfo.msgtql; i++) {
173 		msghdrs[i].msg_type = 0;
174 		if (i > 0)
175 			msghdrs[i-1].msg_next = &msghdrs[i];
176 		msghdrs[i].msg_next = NULL;
177     	}
178 	free_msghdrs = &msghdrs[0];
179 
180 	if (msqids == NULL)
181 		panic("msqids is NULL");
182 
183 	for (i = 0; i < msginfo.msgmni; i++) {
184 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
185 		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
186 		msqids[i].msg_perm.mode = 0;
187 	}
188 }
189 SYSINIT(sysv_msg, SI_SUB_SYSV_MSG, SI_ORDER_FIRST, msginit, NULL)
190 
191 /*
192  * Entry point for all MSG calls
193  *
194  * msgsys_args(int which, int a2, ...) (VARARGS)
195  */
196 int
197 sys_msgsys(struct msgsys_args *uap)
198 {
199 	struct proc *p = curproc;
200 	unsigned int which = (unsigned int)uap->which;
201 
202 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
203 		return (ENOSYS);
204 
205 	if (which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
206 		return (EINVAL);
207 	bcopy(&uap->a2, &uap->which,
208 	    sizeof(struct msgsys_args) - offsetof(struct msgsys_args, a2));
209 	return ((*msgcalls[which])(uap));
210 }
211 
212 static void
213 msg_freehdr(struct msg *msghdr)
214 {
215 	while (msghdr->msg_ts > 0) {
216 		short next;
217 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
218 			panic("msghdr->msg_spot out of range");
219 		next = msgmaps[msghdr->msg_spot].next;
220 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
221 		free_msgmaps = msghdr->msg_spot;
222 		nfree_msgmaps++;
223 		msghdr->msg_spot = next;
224 		if (msghdr->msg_ts >= msginfo.msgssz)
225 			msghdr->msg_ts -= msginfo.msgssz;
226 		else
227 			msghdr->msg_ts = 0;
228 	}
229 	if (msghdr->msg_spot != -1)
230 		panic("msghdr->msg_spot != -1");
231 	msghdr->msg_next = free_msghdrs;
232 	free_msghdrs = msghdr;
233 }
234 
235 int
236 sys_msgctl(struct msgctl_args *uap)
237 {
238 	struct thread *td = curthread;
239 	struct proc *p = td->td_proc;
240 	int msqid = uap->msqid;
241 	int cmd = uap->cmd;
242 	struct msqid_ds *user_msqptr = uap->buf;
243 	int rval, eval;
244 	struct msqid_ds msqbuf;
245 	struct msqid_ds *msqptr;
246 
247 #ifdef MSG_DEBUG_OK
248 	kprintf("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr);
249 #endif
250 
251 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
252 		return (ENOSYS);
253 
254 	msqid = IPCID_TO_IX(msqid);
255 
256 	if (msqid < 0 || msqid >= msginfo.msgmni) {
257 #ifdef MSG_DEBUG_OK
258 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
259 		    msginfo.msgmni);
260 #endif
261 		return(EINVAL);
262 	}
263 
264 	msqptr = &msqids[msqid];
265 
266 	if (msqptr->msg_qbytes == 0) {
267 #ifdef MSG_DEBUG_OK
268 		kprintf("no such msqid\n");
269 #endif
270 		return(EINVAL);
271 	}
272 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
273 #ifdef MSG_DEBUG_OK
274 		kprintf("wrong sequence number\n");
275 #endif
276 		return(EINVAL);
277 	}
278 
279 	eval = 0;
280 	rval = 0;
281 
282 	switch (cmd) {
283 
284 	case IPC_RMID:
285 	{
286 		struct msg *msghdr;
287 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)))
288 			return(eval);
289 		/* Free the message headers */
290 		msghdr = msqptr->msg_first;
291 		while (msghdr != NULL) {
292 			struct msg *msghdr_tmp;
293 
294 			/* Free the segments of each message */
295 			msqptr->msg_cbytes -= msghdr->msg_ts;
296 			msqptr->msg_qnum--;
297 			msghdr_tmp = msghdr;
298 			msghdr = msghdr->msg_next;
299 			msg_freehdr(msghdr_tmp);
300 		}
301 
302 		if (msqptr->msg_cbytes != 0)
303 			panic("msg_cbytes is screwed up");
304 		if (msqptr->msg_qnum != 0)
305 			panic("msg_qnum is screwed up");
306 
307 		msqptr->msg_qbytes = 0;	/* Mark it as free */
308 
309 		wakeup((caddr_t)msqptr);
310 	}
311 
312 		break;
313 
314 	case IPC_SET:
315 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)))
316 			return(eval);
317 		if ((eval = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
318 			return(eval);
319 		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
320 			eval = priv_check(td, PRIV_ROOT);
321 			if (eval)
322 				return(eval);
323 		}
324 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
325 #ifdef MSG_DEBUG_OK
326 			kprintf("can't increase msg_qbytes beyond %d (truncating)\n",
327 			    msginfo.msgmnb);
328 #endif
329 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
330 		}
331 		if (msqbuf.msg_qbytes == 0) {
332 #ifdef MSG_DEBUG_OK
333 			kprintf("can't reduce msg_qbytes to 0\n");
334 #endif
335 			return(EINVAL);		/* non-standard errno! */
336 		}
337 		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
338 		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
339 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
340 		    (msqbuf.msg_perm.mode & 0777);
341 		msqptr->msg_qbytes = msqbuf.msg_qbytes;
342 		msqptr->msg_ctime = time_second;
343 		break;
344 
345 	case IPC_STAT:
346 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_R))) {
347 #ifdef MSG_DEBUG_OK
348 			kprintf("requester doesn't have read access\n");
349 #endif
350 			return(eval);
351 		}
352 		eval = copyout((caddr_t)msqptr, user_msqptr,
353 		    sizeof(struct msqid_ds));
354 		break;
355 
356 	default:
357 #ifdef MSG_DEBUG_OK
358 		kprintf("invalid command %d\n", cmd);
359 #endif
360 		return(EINVAL);
361 	}
362 
363 	if (eval == 0)
364 		uap->sysmsg_result = rval;
365 	return(eval);
366 }
367 
368 int
369 sys_msgget(struct msgget_args *uap)
370 {
371 	struct proc *p = curproc;
372 	int msqid, eval;
373 	int key = uap->key;
374 	int msgflg = uap->msgflg;
375 	struct ucred *cred = p->p_ucred;
376 	struct msqid_ds *msqptr = NULL;
377 
378 #ifdef MSG_DEBUG_OK
379 	kprintf("msgget(0x%x, 0%o)\n", key, msgflg);
380 #endif
381 
382 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
383 		return (ENOSYS);
384 
385 	if (key != IPC_PRIVATE) {
386 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
387 			msqptr = &msqids[msqid];
388 			if (msqptr->msg_qbytes != 0 &&
389 			    msqptr->msg_perm.key == key)
390 				break;
391 		}
392 		if (msqid < msginfo.msgmni) {
393 #ifdef MSG_DEBUG_OK
394 			kprintf("found public key\n");
395 #endif
396 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
397 #ifdef MSG_DEBUG_OK
398 				kprintf("not exclusive\n");
399 #endif
400 				return(EEXIST);
401 			}
402 			if ((eval = ipcperm(p, &msqptr->msg_perm, msgflg & 0700 ))) {
403 #ifdef MSG_DEBUG_OK
404 				kprintf("requester doesn't have 0%o access\n",
405 				    msgflg & 0700);
406 #endif
407 				return(eval);
408 			}
409 			goto found;
410 		}
411 	}
412 
413 #ifdef MSG_DEBUG_OK
414 	kprintf("need to allocate the msqid_ds\n");
415 #endif
416 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
417 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
418 			/*
419 			 * Look for an unallocated and unlocked msqid_ds.
420 			 * msqid_ds's can be locked by msgsnd or msgrcv while
421 			 * they are copying the message in/out.  We can't
422 			 * re-use the entry until they release it.
423 			 */
424 			msqptr = &msqids[msqid];
425 			if (msqptr->msg_qbytes == 0 &&
426 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
427 				break;
428 		}
429 		if (msqid == msginfo.msgmni) {
430 #ifdef MSG_DEBUG_OK
431 			kprintf("no more msqid_ds's available\n");
432 #endif
433 			return(ENOSPC);
434 		}
435 #ifdef MSG_DEBUG_OK
436 		kprintf("msqid %d is available\n", msqid);
437 #endif
438 		msqptr->msg_perm.key = key;
439 		msqptr->msg_perm.cuid = cred->cr_uid;
440 		msqptr->msg_perm.uid = cred->cr_uid;
441 		msqptr->msg_perm.cgid = cred->cr_gid;
442 		msqptr->msg_perm.gid = cred->cr_gid;
443 		msqptr->msg_perm.mode = (msgflg & 0777);
444 		/* Make sure that the returned msqid is unique */
445 		msqptr->msg_perm.seq = (msqptr->msg_perm.seq + 1) & 0x7fff;
446 		msqptr->msg_first = NULL;
447 		msqptr->msg_last = NULL;
448 		msqptr->msg_cbytes = 0;
449 		msqptr->msg_qnum = 0;
450 		msqptr->msg_qbytes = msginfo.msgmnb;
451 		msqptr->msg_lspid = 0;
452 		msqptr->msg_lrpid = 0;
453 		msqptr->msg_stime = 0;
454 		msqptr->msg_rtime = 0;
455 		msqptr->msg_ctime = time_second;
456 	} else {
457 #ifdef MSG_DEBUG_OK
458 		kprintf("didn't find it and wasn't asked to create it\n");
459 #endif
460 		return(ENOENT);
461 	}
462 
463 found:
464 	/* Construct the unique msqid */
465 	uap->sysmsg_result = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
466 	return(0);
467 }
468 
469 int
470 sys_msgsnd(struct msgsnd_args *uap)
471 {
472 	struct proc *p = curproc;
473 	int msqid = uap->msqid;
474 	void *user_msgp = uap->msgp;
475 	size_t msgsz = uap->msgsz;
476 	int msgflg = uap->msgflg;
477 	int segs_needed, eval;
478 	struct msqid_ds *msqptr;
479 	struct msg *msghdr;
480 	short next;
481 
482 #ifdef MSG_DEBUG_OK
483 	kprintf("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
484 	    msgflg);
485 #endif
486 
487 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
488 		return (ENOSYS);
489 
490 	msqid = IPCID_TO_IX(msqid);
491 
492 	if (msqid < 0 || msqid >= msginfo.msgmni) {
493 #ifdef MSG_DEBUG_OK
494 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
495 		    msginfo.msgmni);
496 #endif
497 		return(EINVAL);
498 	}
499 
500 	msqptr = &msqids[msqid];
501 	if (msqptr->msg_qbytes == 0) {
502 #ifdef MSG_DEBUG_OK
503 		kprintf("no such message queue id\n");
504 #endif
505 		return(EINVAL);
506 	}
507 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
508 #ifdef MSG_DEBUG_OK
509 		kprintf("wrong sequence number\n");
510 #endif
511 		return(EINVAL);
512 	}
513 
514 	if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_W))) {
515 #ifdef MSG_DEBUG_OK
516 		kprintf("requester doesn't have write access\n");
517 #endif
518 		return(eval);
519 	}
520 
521 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
522 #ifdef MSG_DEBUG_OK
523 	kprintf("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
524 	    segs_needed);
525 #endif
526 	for (;;) {
527 		int need_more_resources = 0;
528 
529 		/*
530 		 * check msgsz
531 		 * (inside this loop in case msg_qbytes changes while we sleep)
532 		 */
533 
534 		if (msgsz > msqptr->msg_qbytes) {
535 #ifdef MSG_DEBUG_OK
536 			kprintf("msgsz > msqptr->msg_qbytes\n");
537 #endif
538 			return(EINVAL);
539 		}
540 
541 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
542 #ifdef MSG_DEBUG_OK
543 			kprintf("msqid is locked\n");
544 #endif
545 			need_more_resources = 1;
546 		}
547 		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
548 #ifdef MSG_DEBUG_OK
549 			kprintf("msgsz + msg_cbytes > msg_qbytes\n");
550 #endif
551 			need_more_resources = 1;
552 		}
553 		if (segs_needed > nfree_msgmaps) {
554 #ifdef MSG_DEBUG_OK
555 			kprintf("segs_needed > nfree_msgmaps\n");
556 #endif
557 			need_more_resources = 1;
558 		}
559 		if (free_msghdrs == NULL) {
560 #ifdef MSG_DEBUG_OK
561 			kprintf("no more msghdrs\n");
562 #endif
563 			need_more_resources = 1;
564 		}
565 
566 		if (need_more_resources) {
567 			int we_own_it;
568 
569 			if ((msgflg & IPC_NOWAIT) != 0) {
570 #ifdef MSG_DEBUG_OK
571 				kprintf("need more resources but caller doesn't want to wait\n");
572 #endif
573 				return(EAGAIN);
574 			}
575 
576 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
577 #ifdef MSG_DEBUG_OK
578 				kprintf("we don't own the msqid_ds\n");
579 #endif
580 				we_own_it = 0;
581 			} else {
582 				/* Force later arrivals to wait for our
583 				   request */
584 #ifdef MSG_DEBUG_OK
585 				kprintf("we own the msqid_ds\n");
586 #endif
587 				msqptr->msg_perm.mode |= MSG_LOCKED;
588 				we_own_it = 1;
589 			}
590 #ifdef MSG_DEBUG_OK
591 			kprintf("goodnight\n");
592 #endif
593 			eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
594 #ifdef MSG_DEBUG_OK
595 			kprintf("good morning, eval=%d\n", eval);
596 #endif
597 			if (we_own_it)
598 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
599 			if (eval != 0) {
600 #ifdef MSG_DEBUG_OK
601 				kprintf("msgsnd:  interrupted system call\n");
602 #endif
603 				return(EINTR);
604 			}
605 
606 			/*
607 			 * Make sure that the msq queue still exists
608 			 */
609 
610 			if (msqptr->msg_qbytes == 0) {
611 #ifdef MSG_DEBUG_OK
612 				kprintf("msqid deleted\n");
613 #endif
614 				return(EIDRM);
615 			}
616 
617 		} else {
618 #ifdef MSG_DEBUG_OK
619 			kprintf("got all the resources that we need\n");
620 #endif
621 			break;
622 		}
623 	}
624 
625 	/*
626 	 * We have the resources that we need.
627 	 * Make sure!
628 	 */
629 
630 	if (msqptr->msg_perm.mode & MSG_LOCKED)
631 		panic("msg_perm.mode & MSG_LOCKED");
632 	if (segs_needed > nfree_msgmaps)
633 		panic("segs_needed > nfree_msgmaps");
634 	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
635 		panic("msgsz + msg_cbytes > msg_qbytes");
636 	if (free_msghdrs == NULL)
637 		panic("no more msghdrs");
638 
639 	/*
640 	 * Re-lock the msqid_ds in case we page-fault when copying in the
641 	 * message
642 	 */
643 
644 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
645 		panic("msqid_ds is already locked");
646 	msqptr->msg_perm.mode |= MSG_LOCKED;
647 
648 	/*
649 	 * Allocate a message header
650 	 */
651 
652 	msghdr = free_msghdrs;
653 	free_msghdrs = msghdr->msg_next;
654 	msghdr->msg_spot = -1;
655 	msghdr->msg_ts = msgsz;
656 
657 	/*
658 	 * Allocate space for the message
659 	 */
660 
661 	while (segs_needed > 0) {
662 		if (nfree_msgmaps <= 0)
663 			panic("not enough msgmaps");
664 		if (free_msgmaps == -1)
665 			panic("nil free_msgmaps");
666 		next = free_msgmaps;
667 		if (next <= -1)
668 			panic("next too low #1");
669 		if (next >= msginfo.msgseg)
670 			panic("next out of range #1");
671 #ifdef MSG_DEBUG_OK
672 		kprintf("allocating segment %d to message\n", next);
673 #endif
674 		free_msgmaps = msgmaps[next].next;
675 		nfree_msgmaps--;
676 		msgmaps[next].next = msghdr->msg_spot;
677 		msghdr->msg_spot = next;
678 		segs_needed--;
679 	}
680 
681 	/*
682 	 * Copy in the message type
683 	 */
684 
685 	if ((eval = copyin(user_msgp, &msghdr->msg_type,
686 	    sizeof(msghdr->msg_type))) != 0) {
687 #ifdef MSG_DEBUG_OK
688 		kprintf("error %d copying the message type\n", eval);
689 #endif
690 		msg_freehdr(msghdr);
691 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
692 		wakeup((caddr_t)msqptr);
693 		return(eval);
694 	}
695 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
696 
697 	/*
698 	 * Validate the message type
699 	 */
700 
701 	if (msghdr->msg_type < 1) {
702 		msg_freehdr(msghdr);
703 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
704 		wakeup((caddr_t)msqptr);
705 #ifdef MSG_DEBUG_OK
706 		kprintf("mtype (%d) < 1\n", msghdr->msg_type);
707 #endif
708 		return(EINVAL);
709 	}
710 
711 	/*
712 	 * Copy in the message body
713 	 */
714 
715 	next = msghdr->msg_spot;
716 	while (msgsz > 0) {
717 		size_t tlen;
718 		if (msgsz > msginfo.msgssz)
719 			tlen = msginfo.msgssz;
720 		else
721 			tlen = msgsz;
722 		if (next <= -1)
723 			panic("next too low #2");
724 		if (next >= msginfo.msgseg)
725 			panic("next out of range #2");
726 		if ((eval = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
727 		    tlen)) != 0) {
728 #ifdef MSG_DEBUG_OK
729 			kprintf("error %d copying in message segment\n", eval);
730 #endif
731 			msg_freehdr(msghdr);
732 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
733 			wakeup((caddr_t)msqptr);
734 			return(eval);
735 		}
736 		msgsz -= tlen;
737 		user_msgp = (char *)user_msgp + tlen;
738 		next = msgmaps[next].next;
739 	}
740 	if (next != -1)
741 		panic("didn't use all the msg segments");
742 
743 	/*
744 	 * We've got the message.  Unlock the msqid_ds.
745 	 */
746 
747 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
748 
749 	/*
750 	 * Make sure that the msqid_ds is still allocated.
751 	 */
752 
753 	if (msqptr->msg_qbytes == 0) {
754 		msg_freehdr(msghdr);
755 		wakeup((caddr_t)msqptr);
756 		return(EIDRM);
757 	}
758 
759 	/*
760 	 * Put the message into the queue
761 	 */
762 
763 	if (msqptr->msg_first == NULL) {
764 		msqptr->msg_first = msghdr;
765 		msqptr->msg_last = msghdr;
766 	} else {
767 		msqptr->msg_last->msg_next = msghdr;
768 		msqptr->msg_last = msghdr;
769 	}
770 	msqptr->msg_last->msg_next = NULL;
771 
772 	msqptr->msg_cbytes += msghdr->msg_ts;
773 	msqptr->msg_qnum++;
774 	msqptr->msg_lspid = p->p_pid;
775 	msqptr->msg_stime = time_second;
776 
777 	wakeup((caddr_t)msqptr);
778 	uap->sysmsg_result = 0;
779 	return(0);
780 }
781 
782 int
783 sys_msgrcv(struct msgrcv_args *uap)
784 {
785 	struct proc *p = curproc;
786 	int msqid = uap->msqid;
787 	void *user_msgp = uap->msgp;
788 	size_t msgsz = uap->msgsz;
789 	long msgtyp = uap->msgtyp;
790 	int msgflg = uap->msgflg;
791 	size_t len;
792 	struct msqid_ds *msqptr;
793 	struct msg *msghdr;
794 	int eval;
795 	short next;
796 
797 #ifdef MSG_DEBUG_OK
798 	kprintf("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
799 	    msgsz, msgtyp, msgflg);
800 #endif
801 
802 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
803 		return (ENOSYS);
804 
805 	msqid = IPCID_TO_IX(msqid);
806 
807 	if (msqid < 0 || msqid >= msginfo.msgmni) {
808 #ifdef MSG_DEBUG_OK
809 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
810 		    msginfo.msgmni);
811 #endif
812 		return(EINVAL);
813 	}
814 
815 	msqptr = &msqids[msqid];
816 	if (msqptr->msg_qbytes == 0) {
817 #ifdef MSG_DEBUG_OK
818 		kprintf("no such message queue id\n");
819 #endif
820 		return(EINVAL);
821 	}
822 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
823 #ifdef MSG_DEBUG_OK
824 		kprintf("wrong sequence number\n");
825 #endif
826 		return(EINVAL);
827 	}
828 
829 	if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_R))) {
830 #ifdef MSG_DEBUG_OK
831 		kprintf("requester doesn't have read access\n");
832 #endif
833 		return(eval);
834 	}
835 
836 	msghdr = NULL;
837 	while (msghdr == NULL) {
838 		if (msgtyp == 0) {
839 			msghdr = msqptr->msg_first;
840 			if (msghdr != NULL) {
841 				if (msgsz < msghdr->msg_ts &&
842 				    (msgflg & MSG_NOERROR) == 0) {
843 #ifdef MSG_DEBUG_OK
844 					kprintf("first message on the queue is too big (want %d, got %d)\n",
845 					    msgsz, msghdr->msg_ts);
846 #endif
847 					return(E2BIG);
848 				}
849 				if (msqptr->msg_first == msqptr->msg_last) {
850 					msqptr->msg_first = NULL;
851 					msqptr->msg_last = NULL;
852 				} else {
853 					msqptr->msg_first = msghdr->msg_next;
854 					if (msqptr->msg_first == NULL)
855 						panic("msg_first/last screwed up #1");
856 				}
857 			}
858 		} else {
859 			struct msg *previous;
860 			struct msg **prev;
861 
862 			previous = NULL;
863 			prev = &(msqptr->msg_first);
864 			while ((msghdr = *prev) != NULL) {
865 				/*
866 				 * Is this message's type an exact match or is
867 				 * this message's type less than or equal to
868 				 * the absolute value of a negative msgtyp?
869 				 * Note that the second half of this test can
870 				 * NEVER be true if msgtyp is positive since
871 				 * msg_type is always positive!
872 				 */
873 
874 				if (msgtyp == msghdr->msg_type ||
875 				    msghdr->msg_type <= -msgtyp) {
876 #ifdef MSG_DEBUG_OK
877 					kprintf("found message type %d, requested %d\n",
878 					    msghdr->msg_type, msgtyp);
879 #endif
880 					if (msgsz < msghdr->msg_ts &&
881 					    (msgflg & MSG_NOERROR) == 0) {
882 #ifdef MSG_DEBUG_OK
883 						kprintf("requested message on the queue is too big (want %d, got %d)\n",
884 						    msgsz, msghdr->msg_ts);
885 #endif
886 						return(E2BIG);
887 					}
888 					*prev = msghdr->msg_next;
889 					if (msghdr == msqptr->msg_last) {
890 						if (previous == NULL) {
891 							if (prev !=
892 							    &msqptr->msg_first)
893 								panic("msg_first/last screwed up #2");
894 							msqptr->msg_first =
895 							    NULL;
896 							msqptr->msg_last =
897 							    NULL;
898 						} else {
899 							if (prev ==
900 							    &msqptr->msg_first)
901 								panic("msg_first/last screwed up #3");
902 							msqptr->msg_last =
903 							    previous;
904 						}
905 					}
906 					break;
907 				}
908 				previous = msghdr;
909 				prev = &(msghdr->msg_next);
910 			}
911 		}
912 
913 		/*
914 		 * We've either extracted the msghdr for the appropriate
915 		 * message or there isn't one.
916 		 * If there is one then bail out of this loop.
917 		 */
918 
919 		if (msghdr != NULL)
920 			break;
921 
922 		/*
923 		 * Hmph!  No message found.  Does the user want to wait?
924 		 */
925 
926 		if ((msgflg & IPC_NOWAIT) != 0) {
927 #ifdef MSG_DEBUG_OK
928 			kprintf("no appropriate message found (msgtyp=%d)\n",
929 			    msgtyp);
930 #endif
931 			/* The SVID says to return ENOMSG. */
932 #ifdef ENOMSG
933 			return(ENOMSG);
934 #else
935 			/* Unfortunately, BSD doesn't define that code yet! */
936 			return(EAGAIN);
937 #endif
938 		}
939 
940 		/*
941 		 * Wait for something to happen
942 		 */
943 
944 #ifdef MSG_DEBUG_OK
945 		kprintf("msgrcv:  goodnight\n");
946 #endif
947 		eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
948 #ifdef MSG_DEBUG_OK
949 		kprintf("msgrcv:  good morning (eval=%d)\n", eval);
950 #endif
951 
952 		if (eval != 0) {
953 #ifdef MSG_DEBUG_OK
954 			kprintf("msgsnd:  interrupted system call\n");
955 #endif
956 			return(EINTR);
957 		}
958 
959 		/*
960 		 * Make sure that the msq queue still exists
961 		 */
962 
963 		if (msqptr->msg_qbytes == 0 ||
964 		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
965 #ifdef MSG_DEBUG_OK
966 			kprintf("msqid deleted\n");
967 #endif
968 			return(EIDRM);
969 		}
970 	}
971 
972 	/*
973 	 * Return the message to the user.
974 	 *
975 	 * First, do the bookkeeping (before we risk being interrupted).
976 	 */
977 
978 	msqptr->msg_cbytes -= msghdr->msg_ts;
979 	msqptr->msg_qnum--;
980 	msqptr->msg_lrpid = p->p_pid;
981 	msqptr->msg_rtime = time_second;
982 
983 	/*
984 	 * Make msgsz the actual amount that we'll be returning.
985 	 * Note that this effectively truncates the message if it is too long
986 	 * (since msgsz is never increased).
987 	 */
988 
989 #ifdef MSG_DEBUG_OK
990 	kprintf("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
991 	    msghdr->msg_ts);
992 #endif
993 	if (msgsz > msghdr->msg_ts)
994 		msgsz = msghdr->msg_ts;
995 
996 	/*
997 	 * Return the type to the user.
998 	 */
999 
1000 	eval = copyout((caddr_t)&(msghdr->msg_type), user_msgp,
1001 	    sizeof(msghdr->msg_type));
1002 	if (eval != 0) {
1003 #ifdef MSG_DEBUG_OK
1004 		kprintf("error (%d) copying out message type\n", eval);
1005 #endif
1006 		msg_freehdr(msghdr);
1007 		wakeup((caddr_t)msqptr);
1008 		return(eval);
1009 	}
1010 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1011 
1012 	/*
1013 	 * Return the segments to the user
1014 	 */
1015 
1016 	next = msghdr->msg_spot;
1017 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1018 		size_t tlen;
1019 
1020 		if (msgsz - len > msginfo.msgssz)
1021 			tlen = msginfo.msgssz;
1022 		else
1023 			tlen = msgsz - len;
1024 		if (next <= -1)
1025 			panic("next too low #3");
1026 		if (next >= msginfo.msgseg)
1027 			panic("next out of range #3");
1028 		eval = copyout((caddr_t)&msgpool[next * msginfo.msgssz],
1029 		    user_msgp, tlen);
1030 		if (eval != 0) {
1031 #ifdef MSG_DEBUG_OK
1032 			kprintf("error (%d) copying out message segment\n",
1033 			    eval);
1034 #endif
1035 			msg_freehdr(msghdr);
1036 			wakeup((caddr_t)msqptr);
1037 			return(eval);
1038 		}
1039 		user_msgp = (char *)user_msgp + tlen;
1040 		next = msgmaps[next].next;
1041 	}
1042 
1043 	/*
1044 	 * Done, return the actual number of bytes copied out.
1045 	 */
1046 
1047 	msg_freehdr(msghdr);
1048 	wakeup((caddr_t)msqptr);
1049 	uap->sysmsg_result = msgsz;
1050 	return(0);
1051 }
1052 
1053 static int
1054 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1055 {
1056 
1057 	return (SYSCTL_OUT(req, msqids,
1058 	    sizeof(struct msqid_ds) * msginfo.msgmni));
1059 }
1060 
1061 TUNABLE_INT("kern.ipc.msgseg", &msginfo.msgseg);
1062 TUNABLE_INT("kern.ipc.msgssz", &msginfo.msgssz);
1063 TUNABLE_INT("kern.ipc.msgmni", &msginfo.msgmni);
1064 
1065 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1066 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RD, &msginfo.msgmni, 0, "");
1067 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0, "");
1068 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0, "");
1069 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RD, &msginfo.msgssz, 0, "");
1070 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RD, &msginfo.msgseg, 0, "");
1071 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1072     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1073