xref: /dragonfly/sys/kern/sysv_msg.c (revision 0ac6bf9d)
1 /* $FreeBSD: src/sys/kern/sysv_msg.c,v 1.23.2.5 2002/12/31 08:54:53 maxim Exp $ */
2 /* $DragonFly: src/sys/kern/sysv_msg.c,v 1.15 2006/09/05 00:55:45 dillon Exp $ */
3 
4 /*
5  * Implementation of SVID messages
6  *
7  * Author:  Daniel Boulet
8  *
9  * Copyright 1993 Daniel Boulet and RTMX Inc.
10  *
11  * This system call was implemented by Daniel Boulet under contract from RTMX.
12  *
13  * Redistribution and use in source forms, with and without modification,
14  * are permitted provided that this entire comment appears intact.
15  *
16  * Redistribution in binary form may occur without any restrictions.
17  * Obviously, it would be nice if you gave credit where credit is due
18  * but requiring it would be too onerous.
19  *
20  * This software is provided ``AS IS'' without any warranties of any kind.
21  */
22 
23 #include "opt_sysvipc.h"
24 
25 #include <sys/param.h>
26 #include <sys/systm.h>
27 #include <sys/sysproto.h>
28 #include <sys/kernel.h>
29 #include <sys/proc.h>
30 #include <sys/msg.h>
31 #include <sys/sysent.h>
32 #include <sys/sysctl.h>
33 #include <sys/malloc.h>
34 #include <sys/jail.h>
35 
36 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
37 
38 static void msginit (void *);
39 
40 #define MSG_DEBUG
41 #undef MSG_DEBUG_OK
42 
43 static void msg_freehdr (struct msg *msghdr);
44 
45 /* XXX casting to (sy_call_t *) is bogus, as usual. */
46 static sy_call_t *msgcalls[] = {
47 	(sy_call_t *)sys_msgctl, (sy_call_t *)sys_msgget,
48 	(sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv
49 };
50 
51 struct msg {
52 	struct	msg *msg_next;	/* next msg in the chain */
53 	long	msg_type;	/* type of this message */
54     				/* >0 -> type of this message */
55     				/* 0 -> free header */
56 	u_short	msg_ts;		/* size of this message */
57 	short	msg_spot;	/* location of start of msg in buffer */
58 };
59 
60 
61 #ifndef MSGSSZ
62 #define MSGSSZ	8		/* Each segment must be 2^N long */
63 #endif
64 #ifndef MSGSEG
65 #define MSGSEG	2048		/* must be less than 32767 */
66 #endif
67 #define MSGMAX	(MSGSSZ*MSGSEG)
68 #ifndef MSGMNB
69 #define MSGMNB	2048		/* max # of bytes in a queue */
70 #endif
71 #ifndef MSGMNI
72 #define MSGMNI	40
73 #endif
74 #ifndef MSGTQL
75 #define MSGTQL	40
76 #endif
77 
78 /*
79  * Based on the configuration parameters described in an SVR2 (yes, two)
80  * config(1m) man page.
81  *
82  * Each message is broken up and stored in segments that are msgssz bytes
83  * long.  For efficiency reasons, this should be a power of two.  Also,
84  * it doesn't make sense if it is less than 8 or greater than about 256.
85  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
86  * two between 8 and 1024 inclusive (and panic's if it isn't).
87  */
88 struct msginfo msginfo = {
89                 MSGMAX,         /* max chars in a message */
90                 MSGMNI,         /* # of message queue identifiers */
91                 MSGMNB,         /* max chars in a queue */
92                 MSGTQL,         /* max messages in system */
93                 MSGSSZ,         /* size of a message segment */
94                 		/* (must be small power of 2 greater than 4) */
95                 MSGSEG          /* number of message segments */
96 };
97 
98 /*
99  * macros to convert between msqid_ds's and msqid's.
100  * (specific to this implementation)
101  */
102 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
103 #define MSQID_IX(id)	((id) & 0xffff)
104 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
105 
106 /*
107  * The rest of this file is specific to this particular implementation.
108  */
109 
110 struct msgmap {
111 	short	next;		/* next segment in buffer */
112     				/* -1 -> available */
113     				/* 0..(MSGSEG-1) -> index of next segment */
114 };
115 
116 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
117 
118 static int nfree_msgmaps;	/* # of free map entries */
119 static short free_msgmaps;	/* head of linked list of free map entries */
120 static struct msg *free_msghdrs;/* list of free msg headers */
121 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
122 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
123 static struct msg *msghdrs;	/* MSGTQL msg headers */
124 static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
125 
126 static void
127 msginit(dummy)
128 	void *dummy;
129 {
130 	int i;
131 
132 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
133 	msgpool = kmalloc(msginfo.msgmax, M_MSG, M_WAITOK);
134 	if (msgpool == NULL)
135 		panic("msgpool is NULL");
136 	msgmaps = kmalloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
137 	if (msgmaps == NULL)
138 		panic("msgmaps is NULL");
139 	msghdrs = kmalloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
140 	if (msghdrs == NULL)
141 		panic("msghdrs is NULL");
142 	msqids = kmalloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
143 	if (msqids == NULL)
144 		panic("msqids is NULL");
145 
146 	/*
147 	 * msginfo.msgssz should be a power of two for efficiency reasons.
148 	 * It is also pretty silly if msginfo.msgssz is less than 8
149 	 * or greater than about 256 so ...
150 	 */
151 
152 	i = 8;
153 	while (i < 1024 && i != msginfo.msgssz)
154 		i <<= 1;
155     	if (i != msginfo.msgssz) {
156 		printf("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
157 		    msginfo.msgssz);
158 		panic("msginfo.msgssz not a small power of 2");
159 	}
160 
161 	if (msginfo.msgseg > 32767) {
162 		printf("msginfo.msgseg=%d\n", msginfo.msgseg);
163 		panic("msginfo.msgseg > 32767");
164 	}
165 
166 	if (msgmaps == NULL)
167 		panic("msgmaps is NULL");
168 
169 	for (i = 0; i < msginfo.msgseg; i++) {
170 		if (i > 0)
171 			msgmaps[i-1].next = i;
172 		msgmaps[i].next = -1;	/* implies entry is available */
173 	}
174 	free_msgmaps = 0;
175 	nfree_msgmaps = msginfo.msgseg;
176 
177 	if (msghdrs == NULL)
178 		panic("msghdrs is NULL");
179 
180 	for (i = 0; i < msginfo.msgtql; i++) {
181 		msghdrs[i].msg_type = 0;
182 		if (i > 0)
183 			msghdrs[i-1].msg_next = &msghdrs[i];
184 		msghdrs[i].msg_next = NULL;
185     	}
186 	free_msghdrs = &msghdrs[0];
187 
188 	if (msqids == NULL)
189 		panic("msqids is NULL");
190 
191 	for (i = 0; i < msginfo.msgmni; i++) {
192 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
193 		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
194 		msqids[i].msg_perm.mode = 0;
195 	}
196 }
197 SYSINIT(sysv_msg, SI_SUB_SYSV_MSG, SI_ORDER_FIRST, msginit, NULL)
198 
199 /*
200  * Entry point for all MSG calls
201  *
202  * msgsys_args(int which, int a2, ...) (VARARGS)
203  */
204 int
205 sys_msgsys(struct msgsys_args *uap)
206 {
207 	struct proc *p = curproc;
208 	unsigned int which = (unsigned int)uap->which;
209 
210 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
211 		return (ENOSYS);
212 
213 	if (which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
214 		return (EINVAL);
215 	bcopy(&uap->a2, &uap->which,
216 	    sizeof(struct msgsys_args) - offsetof(struct msgsys_args, a2));
217 	return ((*msgcalls[which])(uap));
218 }
219 
220 static void
221 msg_freehdr(struct msg *msghdr)
222 {
223 	while (msghdr->msg_ts > 0) {
224 		short next;
225 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
226 			panic("msghdr->msg_spot out of range");
227 		next = msgmaps[msghdr->msg_spot].next;
228 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
229 		free_msgmaps = msghdr->msg_spot;
230 		nfree_msgmaps++;
231 		msghdr->msg_spot = next;
232 		if (msghdr->msg_ts >= msginfo.msgssz)
233 			msghdr->msg_ts -= msginfo.msgssz;
234 		else
235 			msghdr->msg_ts = 0;
236 	}
237 	if (msghdr->msg_spot != -1)
238 		panic("msghdr->msg_spot != -1");
239 	msghdr->msg_next = free_msghdrs;
240 	free_msghdrs = msghdr;
241 }
242 
243 int
244 sys_msgctl(struct msgctl_args *uap)
245 {
246 	struct thread *td = curthread;
247 	struct proc *p = td->td_proc;
248 	int msqid = uap->msqid;
249 	int cmd = uap->cmd;
250 	struct msqid_ds *user_msqptr = uap->buf;
251 	int rval, eval;
252 	struct msqid_ds msqbuf;
253 	struct msqid_ds *msqptr;
254 
255 #ifdef MSG_DEBUG_OK
256 	printf("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr);
257 #endif
258 
259 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
260 		return (ENOSYS);
261 
262 	msqid = IPCID_TO_IX(msqid);
263 
264 	if (msqid < 0 || msqid >= msginfo.msgmni) {
265 #ifdef MSG_DEBUG_OK
266 		printf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
267 		    msginfo.msgmni);
268 #endif
269 		return(EINVAL);
270 	}
271 
272 	msqptr = &msqids[msqid];
273 
274 	if (msqptr->msg_qbytes == 0) {
275 #ifdef MSG_DEBUG_OK
276 		printf("no such msqid\n");
277 #endif
278 		return(EINVAL);
279 	}
280 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
281 #ifdef MSG_DEBUG_OK
282 		printf("wrong sequence number\n");
283 #endif
284 		return(EINVAL);
285 	}
286 
287 	eval = 0;
288 	rval = 0;
289 
290 	switch (cmd) {
291 
292 	case IPC_RMID:
293 	{
294 		struct msg *msghdr;
295 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)))
296 			return(eval);
297 		/* Free the message headers */
298 		msghdr = msqptr->msg_first;
299 		while (msghdr != NULL) {
300 			struct msg *msghdr_tmp;
301 
302 			/* Free the segments of each message */
303 			msqptr->msg_cbytes -= msghdr->msg_ts;
304 			msqptr->msg_qnum--;
305 			msghdr_tmp = msghdr;
306 			msghdr = msghdr->msg_next;
307 			msg_freehdr(msghdr_tmp);
308 		}
309 
310 		if (msqptr->msg_cbytes != 0)
311 			panic("msg_cbytes is screwed up");
312 		if (msqptr->msg_qnum != 0)
313 			panic("msg_qnum is screwed up");
314 
315 		msqptr->msg_qbytes = 0;	/* Mark it as free */
316 
317 		wakeup((caddr_t)msqptr);
318 	}
319 
320 		break;
321 
322 	case IPC_SET:
323 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)))
324 			return(eval);
325 		if ((eval = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
326 			return(eval);
327 		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
328 			eval = suser(td);
329 			if (eval)
330 				return(eval);
331 		}
332 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
333 #ifdef MSG_DEBUG_OK
334 			printf("can't increase msg_qbytes beyond %d (truncating)\n",
335 			    msginfo.msgmnb);
336 #endif
337 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
338 		}
339 		if (msqbuf.msg_qbytes == 0) {
340 #ifdef MSG_DEBUG_OK
341 			printf("can't reduce msg_qbytes to 0\n");
342 #endif
343 			return(EINVAL);		/* non-standard errno! */
344 		}
345 		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
346 		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
347 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
348 		    (msqbuf.msg_perm.mode & 0777);
349 		msqptr->msg_qbytes = msqbuf.msg_qbytes;
350 		msqptr->msg_ctime = time_second;
351 		break;
352 
353 	case IPC_STAT:
354 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_R))) {
355 #ifdef MSG_DEBUG_OK
356 			printf("requester doesn't have read access\n");
357 #endif
358 			return(eval);
359 		}
360 		eval = copyout((caddr_t)msqptr, user_msqptr,
361 		    sizeof(struct msqid_ds));
362 		break;
363 
364 	default:
365 #ifdef MSG_DEBUG_OK
366 		printf("invalid command %d\n", cmd);
367 #endif
368 		return(EINVAL);
369 	}
370 
371 	if (eval == 0)
372 		uap->sysmsg_result = rval;
373 	return(eval);
374 }
375 
376 int
377 sys_msgget(struct msgget_args *uap)
378 {
379 	struct proc *p = curproc;
380 	int msqid, eval;
381 	int key = uap->key;
382 	int msgflg = uap->msgflg;
383 	struct ucred *cred = p->p_ucred;
384 	struct msqid_ds *msqptr = NULL;
385 
386 #ifdef MSG_DEBUG_OK
387 	printf("msgget(0x%x, 0%o)\n", key, msgflg);
388 #endif
389 
390 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
391 		return (ENOSYS);
392 
393 	if (key != IPC_PRIVATE) {
394 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
395 			msqptr = &msqids[msqid];
396 			if (msqptr->msg_qbytes != 0 &&
397 			    msqptr->msg_perm.key == key)
398 				break;
399 		}
400 		if (msqid < msginfo.msgmni) {
401 #ifdef MSG_DEBUG_OK
402 			printf("found public key\n");
403 #endif
404 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
405 #ifdef MSG_DEBUG_OK
406 				printf("not exclusive\n");
407 #endif
408 				return(EEXIST);
409 			}
410 			if ((eval = ipcperm(p, &msqptr->msg_perm, msgflg & 0700 ))) {
411 #ifdef MSG_DEBUG_OK
412 				printf("requester doesn't have 0%o access\n",
413 				    msgflg & 0700);
414 #endif
415 				return(eval);
416 			}
417 			goto found;
418 		}
419 	}
420 
421 #ifdef MSG_DEBUG_OK
422 	printf("need to allocate the msqid_ds\n");
423 #endif
424 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
425 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
426 			/*
427 			 * Look for an unallocated and unlocked msqid_ds.
428 			 * msqid_ds's can be locked by msgsnd or msgrcv while
429 			 * they are copying the message in/out.  We can't
430 			 * re-use the entry until they release it.
431 			 */
432 			msqptr = &msqids[msqid];
433 			if (msqptr->msg_qbytes == 0 &&
434 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
435 				break;
436 		}
437 		if (msqid == msginfo.msgmni) {
438 #ifdef MSG_DEBUG_OK
439 			printf("no more msqid_ds's available\n");
440 #endif
441 			return(ENOSPC);
442 		}
443 #ifdef MSG_DEBUG_OK
444 		printf("msqid %d is available\n", msqid);
445 #endif
446 		msqptr->msg_perm.key = key;
447 		msqptr->msg_perm.cuid = cred->cr_uid;
448 		msqptr->msg_perm.uid = cred->cr_uid;
449 		msqptr->msg_perm.cgid = cred->cr_gid;
450 		msqptr->msg_perm.gid = cred->cr_gid;
451 		msqptr->msg_perm.mode = (msgflg & 0777);
452 		/* Make sure that the returned msqid is unique */
453 		msqptr->msg_perm.seq = (msqptr->msg_perm.seq + 1) & 0x7fff;
454 		msqptr->msg_first = NULL;
455 		msqptr->msg_last = NULL;
456 		msqptr->msg_cbytes = 0;
457 		msqptr->msg_qnum = 0;
458 		msqptr->msg_qbytes = msginfo.msgmnb;
459 		msqptr->msg_lspid = 0;
460 		msqptr->msg_lrpid = 0;
461 		msqptr->msg_stime = 0;
462 		msqptr->msg_rtime = 0;
463 		msqptr->msg_ctime = time_second;
464 	} else {
465 #ifdef MSG_DEBUG_OK
466 		printf("didn't find it and wasn't asked to create it\n");
467 #endif
468 		return(ENOENT);
469 	}
470 
471 found:
472 	/* Construct the unique msqid */
473 	uap->sysmsg_result = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
474 	return(0);
475 }
476 
477 int
478 sys_msgsnd(struct msgsnd_args *uap)
479 {
480 	struct proc *p = curproc;
481 	int msqid = uap->msqid;
482 	void *user_msgp = uap->msgp;
483 	size_t msgsz = uap->msgsz;
484 	int msgflg = uap->msgflg;
485 	int segs_needed, eval;
486 	struct msqid_ds *msqptr;
487 	struct msg *msghdr;
488 	short next;
489 
490 #ifdef MSG_DEBUG_OK
491 	printf("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
492 	    msgflg);
493 #endif
494 
495 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
496 		return (ENOSYS);
497 
498 	msqid = IPCID_TO_IX(msqid);
499 
500 	if (msqid < 0 || msqid >= msginfo.msgmni) {
501 #ifdef MSG_DEBUG_OK
502 		printf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
503 		    msginfo.msgmni);
504 #endif
505 		return(EINVAL);
506 	}
507 
508 	msqptr = &msqids[msqid];
509 	if (msqptr->msg_qbytes == 0) {
510 #ifdef MSG_DEBUG_OK
511 		printf("no such message queue id\n");
512 #endif
513 		return(EINVAL);
514 	}
515 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
516 #ifdef MSG_DEBUG_OK
517 		printf("wrong sequence number\n");
518 #endif
519 		return(EINVAL);
520 	}
521 
522 	if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_W))) {
523 #ifdef MSG_DEBUG_OK
524 		printf("requester doesn't have write access\n");
525 #endif
526 		return(eval);
527 	}
528 
529 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
530 #ifdef MSG_DEBUG_OK
531 	printf("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
532 	    segs_needed);
533 #endif
534 	for (;;) {
535 		int need_more_resources = 0;
536 
537 		/*
538 		 * check msgsz
539 		 * (inside this loop in case msg_qbytes changes while we sleep)
540 		 */
541 
542 		if (msgsz > msqptr->msg_qbytes) {
543 #ifdef MSG_DEBUG_OK
544 			printf("msgsz > msqptr->msg_qbytes\n");
545 #endif
546 			return(EINVAL);
547 		}
548 
549 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
550 #ifdef MSG_DEBUG_OK
551 			printf("msqid is locked\n");
552 #endif
553 			need_more_resources = 1;
554 		}
555 		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
556 #ifdef MSG_DEBUG_OK
557 			printf("msgsz + msg_cbytes > msg_qbytes\n");
558 #endif
559 			need_more_resources = 1;
560 		}
561 		if (segs_needed > nfree_msgmaps) {
562 #ifdef MSG_DEBUG_OK
563 			printf("segs_needed > nfree_msgmaps\n");
564 #endif
565 			need_more_resources = 1;
566 		}
567 		if (free_msghdrs == NULL) {
568 #ifdef MSG_DEBUG_OK
569 			printf("no more msghdrs\n");
570 #endif
571 			need_more_resources = 1;
572 		}
573 
574 		if (need_more_resources) {
575 			int we_own_it;
576 
577 			if ((msgflg & IPC_NOWAIT) != 0) {
578 #ifdef MSG_DEBUG_OK
579 				printf("need more resources but caller doesn't want to wait\n");
580 #endif
581 				return(EAGAIN);
582 			}
583 
584 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
585 #ifdef MSG_DEBUG_OK
586 				printf("we don't own the msqid_ds\n");
587 #endif
588 				we_own_it = 0;
589 			} else {
590 				/* Force later arrivals to wait for our
591 				   request */
592 #ifdef MSG_DEBUG_OK
593 				printf("we own the msqid_ds\n");
594 #endif
595 				msqptr->msg_perm.mode |= MSG_LOCKED;
596 				we_own_it = 1;
597 			}
598 #ifdef MSG_DEBUG_OK
599 			printf("goodnight\n");
600 #endif
601 			eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
602 #ifdef MSG_DEBUG_OK
603 			printf("good morning, eval=%d\n", eval);
604 #endif
605 			if (we_own_it)
606 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
607 			if (eval != 0) {
608 #ifdef MSG_DEBUG_OK
609 				printf("msgsnd:  interrupted system call\n");
610 #endif
611 				return(EINTR);
612 			}
613 
614 			/*
615 			 * Make sure that the msq queue still exists
616 			 */
617 
618 			if (msqptr->msg_qbytes == 0) {
619 #ifdef MSG_DEBUG_OK
620 				printf("msqid deleted\n");
621 #endif
622 				return(EIDRM);
623 			}
624 
625 		} else {
626 #ifdef MSG_DEBUG_OK
627 			printf("got all the resources that we need\n");
628 #endif
629 			break;
630 		}
631 	}
632 
633 	/*
634 	 * We have the resources that we need.
635 	 * Make sure!
636 	 */
637 
638 	if (msqptr->msg_perm.mode & MSG_LOCKED)
639 		panic("msg_perm.mode & MSG_LOCKED");
640 	if (segs_needed > nfree_msgmaps)
641 		panic("segs_needed > nfree_msgmaps");
642 	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
643 		panic("msgsz + msg_cbytes > msg_qbytes");
644 	if (free_msghdrs == NULL)
645 		panic("no more msghdrs");
646 
647 	/*
648 	 * Re-lock the msqid_ds in case we page-fault when copying in the
649 	 * message
650 	 */
651 
652 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
653 		panic("msqid_ds is already locked");
654 	msqptr->msg_perm.mode |= MSG_LOCKED;
655 
656 	/*
657 	 * Allocate a message header
658 	 */
659 
660 	msghdr = free_msghdrs;
661 	free_msghdrs = msghdr->msg_next;
662 	msghdr->msg_spot = -1;
663 	msghdr->msg_ts = msgsz;
664 
665 	/*
666 	 * Allocate space for the message
667 	 */
668 
669 	while (segs_needed > 0) {
670 		if (nfree_msgmaps <= 0)
671 			panic("not enough msgmaps");
672 		if (free_msgmaps == -1)
673 			panic("nil free_msgmaps");
674 		next = free_msgmaps;
675 		if (next <= -1)
676 			panic("next too low #1");
677 		if (next >= msginfo.msgseg)
678 			panic("next out of range #1");
679 #ifdef MSG_DEBUG_OK
680 		printf("allocating segment %d to message\n", next);
681 #endif
682 		free_msgmaps = msgmaps[next].next;
683 		nfree_msgmaps--;
684 		msgmaps[next].next = msghdr->msg_spot;
685 		msghdr->msg_spot = next;
686 		segs_needed--;
687 	}
688 
689 	/*
690 	 * Copy in the message type
691 	 */
692 
693 	if ((eval = copyin(user_msgp, &msghdr->msg_type,
694 	    sizeof(msghdr->msg_type))) != 0) {
695 #ifdef MSG_DEBUG_OK
696 		printf("error %d copying the message type\n", eval);
697 #endif
698 		msg_freehdr(msghdr);
699 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
700 		wakeup((caddr_t)msqptr);
701 		return(eval);
702 	}
703 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
704 
705 	/*
706 	 * Validate the message type
707 	 */
708 
709 	if (msghdr->msg_type < 1) {
710 		msg_freehdr(msghdr);
711 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
712 		wakeup((caddr_t)msqptr);
713 #ifdef MSG_DEBUG_OK
714 		printf("mtype (%d) < 1\n", msghdr->msg_type);
715 #endif
716 		return(EINVAL);
717 	}
718 
719 	/*
720 	 * Copy in the message body
721 	 */
722 
723 	next = msghdr->msg_spot;
724 	while (msgsz > 0) {
725 		size_t tlen;
726 		if (msgsz > msginfo.msgssz)
727 			tlen = msginfo.msgssz;
728 		else
729 			tlen = msgsz;
730 		if (next <= -1)
731 			panic("next too low #2");
732 		if (next >= msginfo.msgseg)
733 			panic("next out of range #2");
734 		if ((eval = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
735 		    tlen)) != 0) {
736 #ifdef MSG_DEBUG_OK
737 			printf("error %d copying in message segment\n", eval);
738 #endif
739 			msg_freehdr(msghdr);
740 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
741 			wakeup((caddr_t)msqptr);
742 			return(eval);
743 		}
744 		msgsz -= tlen;
745 		user_msgp = (char *)user_msgp + tlen;
746 		next = msgmaps[next].next;
747 	}
748 	if (next != -1)
749 		panic("didn't use all the msg segments");
750 
751 	/*
752 	 * We've got the message.  Unlock the msqid_ds.
753 	 */
754 
755 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
756 
757 	/*
758 	 * Make sure that the msqid_ds is still allocated.
759 	 */
760 
761 	if (msqptr->msg_qbytes == 0) {
762 		msg_freehdr(msghdr);
763 		wakeup((caddr_t)msqptr);
764 		return(EIDRM);
765 	}
766 
767 	/*
768 	 * Put the message into the queue
769 	 */
770 
771 	if (msqptr->msg_first == NULL) {
772 		msqptr->msg_first = msghdr;
773 		msqptr->msg_last = msghdr;
774 	} else {
775 		msqptr->msg_last->msg_next = msghdr;
776 		msqptr->msg_last = msghdr;
777 	}
778 	msqptr->msg_last->msg_next = NULL;
779 
780 	msqptr->msg_cbytes += msghdr->msg_ts;
781 	msqptr->msg_qnum++;
782 	msqptr->msg_lspid = p->p_pid;
783 	msqptr->msg_stime = time_second;
784 
785 	wakeup((caddr_t)msqptr);
786 	uap->sysmsg_result = 0;
787 	return(0);
788 }
789 
790 int
791 sys_msgrcv(struct msgrcv_args *uap)
792 {
793 	struct proc *p = curproc;
794 	int msqid = uap->msqid;
795 	void *user_msgp = uap->msgp;
796 	size_t msgsz = uap->msgsz;
797 	long msgtyp = uap->msgtyp;
798 	int msgflg = uap->msgflg;
799 	size_t len;
800 	struct msqid_ds *msqptr;
801 	struct msg *msghdr;
802 	int eval;
803 	short next;
804 
805 #ifdef MSG_DEBUG_OK
806 	printf("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
807 	    msgsz, msgtyp, msgflg);
808 #endif
809 
810 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
811 		return (ENOSYS);
812 
813 	msqid = IPCID_TO_IX(msqid);
814 
815 	if (msqid < 0 || msqid >= msginfo.msgmni) {
816 #ifdef MSG_DEBUG_OK
817 		printf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
818 		    msginfo.msgmni);
819 #endif
820 		return(EINVAL);
821 	}
822 
823 	msqptr = &msqids[msqid];
824 	if (msqptr->msg_qbytes == 0) {
825 #ifdef MSG_DEBUG_OK
826 		printf("no such message queue id\n");
827 #endif
828 		return(EINVAL);
829 	}
830 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
831 #ifdef MSG_DEBUG_OK
832 		printf("wrong sequence number\n");
833 #endif
834 		return(EINVAL);
835 	}
836 
837 	if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_R))) {
838 #ifdef MSG_DEBUG_OK
839 		printf("requester doesn't have read access\n");
840 #endif
841 		return(eval);
842 	}
843 
844 	msghdr = NULL;
845 	while (msghdr == NULL) {
846 		if (msgtyp == 0) {
847 			msghdr = msqptr->msg_first;
848 			if (msghdr != NULL) {
849 				if (msgsz < msghdr->msg_ts &&
850 				    (msgflg & MSG_NOERROR) == 0) {
851 #ifdef MSG_DEBUG_OK
852 					printf("first message on the queue is too big (want %d, got %d)\n",
853 					    msgsz, msghdr->msg_ts);
854 #endif
855 					return(E2BIG);
856 				}
857 				if (msqptr->msg_first == msqptr->msg_last) {
858 					msqptr->msg_first = NULL;
859 					msqptr->msg_last = NULL;
860 				} else {
861 					msqptr->msg_first = msghdr->msg_next;
862 					if (msqptr->msg_first == NULL)
863 						panic("msg_first/last screwed up #1");
864 				}
865 			}
866 		} else {
867 			struct msg *previous;
868 			struct msg **prev;
869 
870 			previous = NULL;
871 			prev = &(msqptr->msg_first);
872 			while ((msghdr = *prev) != NULL) {
873 				/*
874 				 * Is this message's type an exact match or is
875 				 * this message's type less than or equal to
876 				 * the absolute value of a negative msgtyp?
877 				 * Note that the second half of this test can
878 				 * NEVER be true if msgtyp is positive since
879 				 * msg_type is always positive!
880 				 */
881 
882 				if (msgtyp == msghdr->msg_type ||
883 				    msghdr->msg_type <= -msgtyp) {
884 #ifdef MSG_DEBUG_OK
885 					printf("found message type %d, requested %d\n",
886 					    msghdr->msg_type, msgtyp);
887 #endif
888 					if (msgsz < msghdr->msg_ts &&
889 					    (msgflg & MSG_NOERROR) == 0) {
890 #ifdef MSG_DEBUG_OK
891 						printf("requested message on the queue is too big (want %d, got %d)\n",
892 						    msgsz, msghdr->msg_ts);
893 #endif
894 						return(E2BIG);
895 					}
896 					*prev = msghdr->msg_next;
897 					if (msghdr == msqptr->msg_last) {
898 						if (previous == NULL) {
899 							if (prev !=
900 							    &msqptr->msg_first)
901 								panic("msg_first/last screwed up #2");
902 							msqptr->msg_first =
903 							    NULL;
904 							msqptr->msg_last =
905 							    NULL;
906 						} else {
907 							if (prev ==
908 							    &msqptr->msg_first)
909 								panic("msg_first/last screwed up #3");
910 							msqptr->msg_last =
911 							    previous;
912 						}
913 					}
914 					break;
915 				}
916 				previous = msghdr;
917 				prev = &(msghdr->msg_next);
918 			}
919 		}
920 
921 		/*
922 		 * We've either extracted the msghdr for the appropriate
923 		 * message or there isn't one.
924 		 * If there is one then bail out of this loop.
925 		 */
926 
927 		if (msghdr != NULL)
928 			break;
929 
930 		/*
931 		 * Hmph!  No message found.  Does the user want to wait?
932 		 */
933 
934 		if ((msgflg & IPC_NOWAIT) != 0) {
935 #ifdef MSG_DEBUG_OK
936 			printf("no appropriate message found (msgtyp=%d)\n",
937 			    msgtyp);
938 #endif
939 			/* The SVID says to return ENOMSG. */
940 #ifdef ENOMSG
941 			return(ENOMSG);
942 #else
943 			/* Unfortunately, BSD doesn't define that code yet! */
944 			return(EAGAIN);
945 #endif
946 		}
947 
948 		/*
949 		 * Wait for something to happen
950 		 */
951 
952 #ifdef MSG_DEBUG_OK
953 		printf("msgrcv:  goodnight\n");
954 #endif
955 		eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
956 #ifdef MSG_DEBUG_OK
957 		printf("msgrcv:  good morning (eval=%d)\n", eval);
958 #endif
959 
960 		if (eval != 0) {
961 #ifdef MSG_DEBUG_OK
962 			printf("msgsnd:  interrupted system call\n");
963 #endif
964 			return(EINTR);
965 		}
966 
967 		/*
968 		 * Make sure that the msq queue still exists
969 		 */
970 
971 		if (msqptr->msg_qbytes == 0 ||
972 		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
973 #ifdef MSG_DEBUG_OK
974 			printf("msqid deleted\n");
975 #endif
976 			return(EIDRM);
977 		}
978 	}
979 
980 	/*
981 	 * Return the message to the user.
982 	 *
983 	 * First, do the bookkeeping (before we risk being interrupted).
984 	 */
985 
986 	msqptr->msg_cbytes -= msghdr->msg_ts;
987 	msqptr->msg_qnum--;
988 	msqptr->msg_lrpid = p->p_pid;
989 	msqptr->msg_rtime = time_second;
990 
991 	/*
992 	 * Make msgsz the actual amount that we'll be returning.
993 	 * Note that this effectively truncates the message if it is too long
994 	 * (since msgsz is never increased).
995 	 */
996 
997 #ifdef MSG_DEBUG_OK
998 	printf("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
999 	    msghdr->msg_ts);
1000 #endif
1001 	if (msgsz > msghdr->msg_ts)
1002 		msgsz = msghdr->msg_ts;
1003 
1004 	/*
1005 	 * Return the type to the user.
1006 	 */
1007 
1008 	eval = copyout((caddr_t)&(msghdr->msg_type), user_msgp,
1009 	    sizeof(msghdr->msg_type));
1010 	if (eval != 0) {
1011 #ifdef MSG_DEBUG_OK
1012 		printf("error (%d) copying out message type\n", eval);
1013 #endif
1014 		msg_freehdr(msghdr);
1015 		wakeup((caddr_t)msqptr);
1016 		return(eval);
1017 	}
1018 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1019 
1020 	/*
1021 	 * Return the segments to the user
1022 	 */
1023 
1024 	next = msghdr->msg_spot;
1025 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1026 		size_t tlen;
1027 
1028 		if (msgsz - len > msginfo.msgssz)
1029 			tlen = msginfo.msgssz;
1030 		else
1031 			tlen = msgsz - len;
1032 		if (next <= -1)
1033 			panic("next too low #3");
1034 		if (next >= msginfo.msgseg)
1035 			panic("next out of range #3");
1036 		eval = copyout((caddr_t)&msgpool[next * msginfo.msgssz],
1037 		    user_msgp, tlen);
1038 		if (eval != 0) {
1039 #ifdef MSG_DEBUG_OK
1040 			printf("error (%d) copying out message segment\n",
1041 			    eval);
1042 #endif
1043 			msg_freehdr(msghdr);
1044 			wakeup((caddr_t)msqptr);
1045 			return(eval);
1046 		}
1047 		user_msgp = (char *)user_msgp + tlen;
1048 		next = msgmaps[next].next;
1049 	}
1050 
1051 	/*
1052 	 * Done, return the actual number of bytes copied out.
1053 	 */
1054 
1055 	msg_freehdr(msghdr);
1056 	wakeup((caddr_t)msqptr);
1057 	uap->sysmsg_result = msgsz;
1058 	return(0);
1059 }
1060 
1061 static int
1062 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1063 {
1064 
1065 	return (SYSCTL_OUT(req, msqids,
1066 	    sizeof(struct msqid_ds) * msginfo.msgmni));
1067 }
1068 
1069 TUNABLE_INT("kern.ipc.msgseg", &msginfo.msgseg);
1070 TUNABLE_INT("kern.ipc.msgssz", &msginfo.msgssz);
1071 TUNABLE_INT("kern.ipc.msgmni", &msginfo.msgmni);
1072 
1073 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1074 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RD, &msginfo.msgmni, 0, "");
1075 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0, "");
1076 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0, "");
1077 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RD, &msginfo.msgssz, 0, "");
1078 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RD, &msginfo.msgseg, 0, "");
1079 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1080     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1081