xref: /dragonfly/sys/kern/sysv_msg.c (revision 678e8cc6)
1 /* $FreeBSD: src/sys/kern/sysv_msg.c,v 1.23.2.5 2002/12/31 08:54:53 maxim Exp $ */
2 
3 /*
4  * Implementation of SVID messages
5  *
6  * Author:  Daniel Boulet
7  *
8  * Copyright 1993 Daniel Boulet and RTMX Inc.
9  *
10  * This system call was implemented by Daniel Boulet under contract from RTMX.
11  *
12  * Redistribution and use in source forms, with and without modification,
13  * are permitted provided that this entire comment appears intact.
14  *
15  * Redistribution in binary form may occur without any restrictions.
16  * Obviously, it would be nice if you gave credit where credit is due
17  * but requiring it would be too onerous.
18  *
19  * This software is provided ``AS IS'' without any warranties of any kind.
20  */
21 
22 #include "opt_sysvipc.h"
23 
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/sysproto.h>
27 #include <sys/kernel.h>
28 #include <sys/proc.h>
29 #include <sys/priv.h>
30 #include <sys/msg.h>
31 #include <sys/sysent.h>
32 #include <sys/sysctl.h>
33 #include <sys/malloc.h>
34 #include <sys/jail.h>
35 
36 #include <sys/mplock2.h>
37 
38 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
39 
40 static void msginit (void *);
41 
42 #define MSG_DEBUG
43 #undef MSG_DEBUG_OK
44 
45 static void msg_freehdr (struct msg *msghdr);
46 
47 /* XXX casting to (sy_call_t *) is bogus, as usual. */
48 static sy_call_t *msgcalls[] = {
49 	(sy_call_t *)sys_msgctl, (sy_call_t *)sys_msgget,
50 	(sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv
51 };
52 
53 struct msg {
54 	struct	msg *msg_next;	/* next msg in the chain */
55 	long	msg_type;	/* type of this message */
56     				/* >0 -> type of this message */
57     				/* 0 -> free header */
58 	u_short	msg_ts;		/* size of this message */
59 	short	msg_spot;	/* location of start of msg in buffer */
60 };
61 
62 
63 #ifndef MSGSSZ
64 #define MSGSSZ	8		/* Each segment must be 2^N long */
65 #endif
66 #ifndef MSGSEG
67 #define MSGSEG	2048		/* must be less than 32767 */
68 #endif
69 #define MSGMAX	(MSGSSZ*MSGSEG)
70 #ifndef MSGMNB
71 #define MSGMNB	2048		/* max # of bytes in a queue */
72 #endif
73 #ifndef MSGMNI
74 #define MSGMNI	40
75 #endif
76 #ifndef MSGTQL
77 #define MSGTQL	40
78 #endif
79 
80 /*
81  * Based on the configuration parameters described in an SVR2 (yes, two)
82  * config(1m) man page.
83  *
84  * Each message is broken up and stored in segments that are msgssz bytes
85  * long.  For efficiency reasons, this should be a power of two.  Also,
86  * it doesn't make sense if it is less than 8 or greater than about 256.
87  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
88  * two between 8 and 1024 inclusive (and panic's if it isn't).
89  */
90 struct msginfo msginfo = {
91                 MSGMAX,         /* max chars in a message */
92                 MSGMNI,         /* # of message queue identifiers */
93                 MSGMNB,         /* max chars in a queue */
94                 MSGTQL,         /* max messages in system */
95                 MSGSSZ,         /* size of a message segment */
96                 		/* (must be small power of 2 greater than 4) */
97                 MSGSEG          /* number of message segments */
98 };
99 
100 /*
101  * macros to convert between msqid_ds's and msqid's.
102  * (specific to this implementation)
103  */
104 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
105 #define MSQID_IX(id)	((id) & 0xffff)
106 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
107 
108 /*
109  * The rest of this file is specific to this particular implementation.
110  */
111 
112 struct msgmap {
113 	short	next;		/* next segment in buffer */
114     				/* -1 -> available */
115     				/* 0..(MSGSEG-1) -> index of next segment */
116 };
117 
118 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
119 
120 static int nfree_msgmaps;	/* # of free map entries */
121 static short free_msgmaps;	/* head of linked list of free map entries */
122 static struct msg *free_msghdrs;/* list of free msg headers */
123 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
124 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
125 static struct msg *msghdrs;	/* MSGTQL msg headers */
126 static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
127 
128 static void
129 msginit(void *dummy)
130 {
131 	int i;
132 
133 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
134 	msgpool = kmalloc(msginfo.msgmax, M_MSG, M_WAITOK);
135 	msgmaps = kmalloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
136 	msghdrs = kmalloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
137 	msqids = kmalloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
138 
139 	/*
140 	 * msginfo.msgssz should be a power of two for efficiency reasons.
141 	 * It is also pretty silly if msginfo.msgssz is less than 8
142 	 * or greater than about 256 so ...
143 	 */
144 
145 	i = 8;
146 	while (i < 1024 && i != msginfo.msgssz)
147 		i <<= 1;
148     	if (i != msginfo.msgssz) {
149 		kprintf("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
150 		    msginfo.msgssz);
151 		panic("msginfo.msgssz not a small power of 2");
152 	}
153 
154 	if (msginfo.msgseg > 32767) {
155 		kprintf("msginfo.msgseg=%d\n", msginfo.msgseg);
156 		panic("msginfo.msgseg > 32767");
157 	}
158 
159 	for (i = 0; i < msginfo.msgseg; i++) {
160 		if (i > 0)
161 			msgmaps[i-1].next = i;
162 		msgmaps[i].next = -1;	/* implies entry is available */
163 	}
164 	free_msgmaps = 0;
165 	nfree_msgmaps = msginfo.msgseg;
166 
167 	for (i = 0; i < msginfo.msgtql; i++) {
168 		msghdrs[i].msg_type = 0;
169 		if (i > 0)
170 			msghdrs[i-1].msg_next = &msghdrs[i];
171 		msghdrs[i].msg_next = NULL;
172     	}
173 	free_msghdrs = &msghdrs[0];
174 
175 	for (i = 0; i < msginfo.msgmni; i++) {
176 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
177 		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
178 		msqids[i].msg_perm.mode = 0;
179 	}
180 }
181 SYSINIT(sysv_msg, SI_SUB_SYSV_MSG, SI_ORDER_FIRST, msginit, NULL)
182 
183 /*
184  * Entry point for all MSG calls
185  *
186  * msgsys_args(int which, int a2, ...) (VARARGS)
187  *
188  * MPALMOSTSAFE
189  */
190 int
191 sys_msgsys(struct msgsys_args *uap)
192 {
193 	struct thread *td = curthread;
194 	unsigned int which = (unsigned int)uap->which;
195 	int error;
196 
197 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
198 		return (ENOSYS);
199 
200 	if (which >= NELEM(msgcalls))
201 		return (EINVAL);
202 	bcopy(&uap->a2, &uap->which,
203 	      sizeof(struct msgsys_args) - offsetof(struct msgsys_args, a2));
204 	get_mplock();
205 	error = (*msgcalls[which])(uap);
206 	rel_mplock();
207 	return (error);
208 }
209 
210 static void
211 msg_freehdr(struct msg *msghdr)
212 {
213 	while (msghdr->msg_ts > 0) {
214 		short next;
215 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
216 			panic("msghdr->msg_spot out of range");
217 		next = msgmaps[msghdr->msg_spot].next;
218 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
219 		free_msgmaps = msghdr->msg_spot;
220 		nfree_msgmaps++;
221 		msghdr->msg_spot = next;
222 		if (msghdr->msg_ts >= msginfo.msgssz)
223 			msghdr->msg_ts -= msginfo.msgssz;
224 		else
225 			msghdr->msg_ts = 0;
226 	}
227 	if (msghdr->msg_spot != -1)
228 		panic("msghdr->msg_spot != -1");
229 	msghdr->msg_next = free_msghdrs;
230 	free_msghdrs = msghdr;
231 }
232 
233 /*
234  * MPALMOSTSAFE
235  */
236 int
237 sys_msgctl(struct msgctl_args *uap)
238 {
239 	struct thread *td = curthread;
240 	struct proc *p = td->td_proc;
241 	int msqid = uap->msqid;
242 	int cmd = uap->cmd;
243 	struct msqid_ds *user_msqptr = uap->buf;
244 	int rval, eval;
245 	struct msqid_ds msqbuf;
246 	struct msqid_ds *msqptr;
247 
248 #ifdef MSG_DEBUG_OK
249 	kprintf("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr);
250 #endif
251 
252 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
253 		return (ENOSYS);
254 
255 	get_mplock();
256 	msqid = IPCID_TO_IX(msqid);
257 
258 	if (msqid < 0 || msqid >= msginfo.msgmni) {
259 #ifdef MSG_DEBUG_OK
260 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
261 		    msginfo.msgmni);
262 #endif
263 		eval = EINVAL;
264 		goto done;
265 	}
266 
267 	msqptr = &msqids[msqid];
268 
269 	if (msqptr->msg_qbytes == 0) {
270 #ifdef MSG_DEBUG_OK
271 		kprintf("no such msqid\n");
272 #endif
273 		eval = EINVAL;
274 		goto done;
275 	}
276 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
277 #ifdef MSG_DEBUG_OK
278 		kprintf("wrong sequence number\n");
279 #endif
280 		eval = EINVAL;
281 		goto done;
282 	}
283 
284 	rval = 0;
285 
286 	switch (cmd) {
287 	case IPC_RMID:
288 	{
289 		struct msg *msghdr;
290 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)) != 0)
291 			break;
292 		/* Free the message headers */
293 		msghdr = msqptr->msg_first;
294 		while (msghdr != NULL) {
295 			struct msg *msghdr_tmp;
296 
297 			/* Free the segments of each message */
298 			msqptr->msg_cbytes -= msghdr->msg_ts;
299 			msqptr->msg_qnum--;
300 			msghdr_tmp = msghdr;
301 			msghdr = msghdr->msg_next;
302 			msg_freehdr(msghdr_tmp);
303 		}
304 
305 		if (msqptr->msg_cbytes != 0)
306 			panic("msg_cbytes is screwed up");
307 		if (msqptr->msg_qnum != 0)
308 			panic("msg_qnum is screwed up");
309 
310 		msqptr->msg_qbytes = 0;	/* Mark it as free */
311 
312 		wakeup((caddr_t)msqptr);
313 	}
314 
315 		break;
316 
317 	case IPC_SET:
318 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)) != 0)
319 			break;
320 		if ((eval = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
321 			break;
322 		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
323 			eval = priv_check(td, PRIV_ROOT);
324 			if (eval)
325 				break;
326 		}
327 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
328 #ifdef MSG_DEBUG_OK
329 			kprintf("can't increase msg_qbytes beyond %d (truncating)\n",
330 			    msginfo.msgmnb);
331 #endif
332 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
333 		}
334 		if (msqbuf.msg_qbytes == 0) {
335 #ifdef MSG_DEBUG_OK
336 			kprintf("can't reduce msg_qbytes to 0\n");
337 #endif
338 			eval = EINVAL;		/* non-standard errno! */
339 			break;
340 		}
341 		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
342 		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
343 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
344 					(msqbuf.msg_perm.mode & 0777);
345 		msqptr->msg_qbytes = msqbuf.msg_qbytes;
346 		msqptr->msg_ctime = time_second;
347 		break;
348 
349 	case IPC_STAT:
350 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_R))) {
351 #ifdef MSG_DEBUG_OK
352 			kprintf("requester doesn't have read access\n");
353 #endif
354 			eval = EINVAL;
355 			break;
356 		}
357 		eval = copyout(msqptr, user_msqptr, sizeof(struct msqid_ds));
358 		break;
359 
360 	default:
361 #ifdef MSG_DEBUG_OK
362 		kprintf("invalid command %d\n", cmd);
363 #endif
364 		eval = EINVAL;
365 		break;
366 	}
367 done:
368 	rel_mplock();
369 	if (eval == 0)
370 		uap->sysmsg_result = rval;
371 	return(eval);
372 }
373 
374 /*
375  * MPALMOSTSAFE
376  */
377 int
378 sys_msgget(struct msgget_args *uap)
379 {
380 	struct thread *td = curthread;
381 	int msqid, eval;
382 	int key = uap->key;
383 	int msgflg = uap->msgflg;
384 	struct ucred *cred = td->td_ucred;
385 	struct msqid_ds *msqptr = NULL;
386 
387 #ifdef MSG_DEBUG_OK
388 	kprintf("msgget(0x%x, 0%o)\n", key, msgflg);
389 #endif
390 	if (!jail_sysvipc_allowed && cred->cr_prison != NULL)
391 		return (ENOSYS);
392 
393 	eval = 0;
394 	get_mplock();
395 
396 	if (key != IPC_PRIVATE) {
397 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
398 			msqptr = &msqids[msqid];
399 			if (msqptr->msg_qbytes != 0 &&
400 			    msqptr->msg_perm.key == key)
401 				break;
402 		}
403 		if (msqid < msginfo.msgmni) {
404 #ifdef MSG_DEBUG_OK
405 			kprintf("found public key\n");
406 #endif
407 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
408 #ifdef MSG_DEBUG_OK
409 				kprintf("not exclusive\n");
410 #endif
411 				eval = EEXIST;
412 				goto done;
413 			}
414 			if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, msgflg & 0700 ))) {
415 #ifdef MSG_DEBUG_OK
416 				kprintf("requester doesn't have 0%o access\n",
417 				    msgflg & 0700);
418 #endif
419 				goto done;
420 			}
421 			goto done;
422 		}
423 	}
424 
425 #ifdef MSG_DEBUG_OK
426 	kprintf("need to allocate the msqid_ds\n");
427 #endif
428 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
429 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
430 			/*
431 			 * Look for an unallocated and unlocked msqid_ds.
432 			 * msqid_ds's can be locked by msgsnd or msgrcv while
433 			 * they are copying the message in/out.  We can't
434 			 * re-use the entry until they release it.
435 			 */
436 			msqptr = &msqids[msqid];
437 			if (msqptr->msg_qbytes == 0 &&
438 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
439 				break;
440 		}
441 		if (msqid == msginfo.msgmni) {
442 #ifdef MSG_DEBUG_OK
443 			kprintf("no more msqid_ds's available\n");
444 #endif
445 			eval = ENOSPC;
446 			goto done;
447 		}
448 #ifdef MSG_DEBUG_OK
449 		kprintf("msqid %d is available\n", msqid);
450 #endif
451 		msqptr->msg_perm.key = key;
452 		msqptr->msg_perm.cuid = cred->cr_uid;
453 		msqptr->msg_perm.uid = cred->cr_uid;
454 		msqptr->msg_perm.cgid = cred->cr_gid;
455 		msqptr->msg_perm.gid = cred->cr_gid;
456 		msqptr->msg_perm.mode = (msgflg & 0777);
457 		/* Make sure that the returned msqid is unique */
458 		msqptr->msg_perm.seq = (msqptr->msg_perm.seq + 1) & 0x7fff;
459 		msqptr->msg_first = NULL;
460 		msqptr->msg_last = NULL;
461 		msqptr->msg_cbytes = 0;
462 		msqptr->msg_qnum = 0;
463 		msqptr->msg_qbytes = msginfo.msgmnb;
464 		msqptr->msg_lspid = 0;
465 		msqptr->msg_lrpid = 0;
466 		msqptr->msg_stime = 0;
467 		msqptr->msg_rtime = 0;
468 		msqptr->msg_ctime = time_second;
469 	} else {
470 #ifdef MSG_DEBUG_OK
471 		kprintf("didn't find it and wasn't asked to create it\n");
472 #endif
473 		eval = ENOENT;
474 	}
475 
476 done:
477 	rel_mplock();
478 	/* Construct the unique msqid */
479 	if (eval == 0)
480 		uap->sysmsg_result = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
481 	return(eval);
482 }
483 
484 /*
485  * MPALMOSTSAFE
486  */
487 int
488 sys_msgsnd(struct msgsnd_args *uap)
489 {
490 	struct thread *td = curthread;
491 	int msqid = uap->msqid;
492 	void *user_msgp = uap->msgp;
493 	size_t msgsz = uap->msgsz;
494 	int msgflg = uap->msgflg;
495 	int segs_needed, eval;
496 	struct msqid_ds *msqptr;
497 	struct msg *msghdr;
498 	short next;
499 
500 #ifdef MSG_DEBUG_OK
501 	kprintf("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
502 	    msgflg);
503 #endif
504 
505 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
506 		return (ENOSYS);
507 
508 	get_mplock();
509 	msqid = IPCID_TO_IX(msqid);
510 
511 	if (msqid < 0 || msqid >= msginfo.msgmni) {
512 #ifdef MSG_DEBUG_OK
513 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
514 		    msginfo.msgmni);
515 #endif
516 		eval = EINVAL;
517 		goto done;
518 	}
519 
520 	msqptr = &msqids[msqid];
521 	if (msqptr->msg_qbytes == 0) {
522 #ifdef MSG_DEBUG_OK
523 		kprintf("no such message queue id\n");
524 #endif
525 		eval = EINVAL;
526 		goto done;
527 	}
528 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
529 #ifdef MSG_DEBUG_OK
530 		kprintf("wrong sequence number\n");
531 #endif
532 		eval = EINVAL;
533 		goto done;
534 	}
535 
536 	if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, IPC_W))) {
537 #ifdef MSG_DEBUG_OK
538 		kprintf("requester doesn't have write access\n");
539 #endif
540 		eval = EINVAL;
541 		goto done;
542 	}
543 
544 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
545 #ifdef MSG_DEBUG_OK
546 	kprintf("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
547 	    segs_needed);
548 #endif
549 	for (;;) {
550 		int need_more_resources = 0;
551 
552 		/*
553 		 * check msgsz
554 		 * (inside this loop in case msg_qbytes changes while we sleep)
555 		 */
556 
557 		if (msgsz > msqptr->msg_qbytes) {
558 #ifdef MSG_DEBUG_OK
559 			kprintf("msgsz > msqptr->msg_qbytes\n");
560 #endif
561 			eval = EINVAL;
562 			goto done;
563 		}
564 
565 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
566 #ifdef MSG_DEBUG_OK
567 			kprintf("msqid is locked\n");
568 #endif
569 			need_more_resources = 1;
570 		}
571 		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
572 #ifdef MSG_DEBUG_OK
573 			kprintf("msgsz + msg_cbytes > msg_qbytes\n");
574 #endif
575 			need_more_resources = 1;
576 		}
577 		if (segs_needed > nfree_msgmaps) {
578 #ifdef MSG_DEBUG_OK
579 			kprintf("segs_needed > nfree_msgmaps\n");
580 #endif
581 			need_more_resources = 1;
582 		}
583 		if (free_msghdrs == NULL) {
584 #ifdef MSG_DEBUG_OK
585 			kprintf("no more msghdrs\n");
586 #endif
587 			need_more_resources = 1;
588 		}
589 
590 		if (need_more_resources) {
591 			int we_own_it;
592 
593 			if ((msgflg & IPC_NOWAIT) != 0) {
594 #ifdef MSG_DEBUG_OK
595 				kprintf("need more resources but caller doesn't want to wait\n");
596 #endif
597 				eval = EAGAIN;
598 				goto done;
599 			}
600 
601 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
602 #ifdef MSG_DEBUG_OK
603 				kprintf("we don't own the msqid_ds\n");
604 #endif
605 				we_own_it = 0;
606 			} else {
607 				/* Force later arrivals to wait for our
608 				   request */
609 #ifdef MSG_DEBUG_OK
610 				kprintf("we own the msqid_ds\n");
611 #endif
612 				msqptr->msg_perm.mode |= MSG_LOCKED;
613 				we_own_it = 1;
614 			}
615 #ifdef MSG_DEBUG_OK
616 			kprintf("goodnight\n");
617 #endif
618 			eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
619 #ifdef MSG_DEBUG_OK
620 			kprintf("good morning, eval=%d\n", eval);
621 #endif
622 			if (we_own_it)
623 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
624 			if (eval != 0) {
625 #ifdef MSG_DEBUG_OK
626 				kprintf("msgsnd:  interrupted system call\n");
627 #endif
628 				eval = EINTR;
629 				goto done;
630 			}
631 
632 			/*
633 			 * Make sure that the msq queue still exists
634 			 */
635 
636 			if (msqptr->msg_qbytes == 0) {
637 #ifdef MSG_DEBUG_OK
638 				kprintf("msqid deleted\n");
639 #endif
640 				eval = EIDRM;
641 				goto done;
642 			}
643 
644 		} else {
645 #ifdef MSG_DEBUG_OK
646 			kprintf("got all the resources that we need\n");
647 #endif
648 			break;
649 		}
650 	}
651 
652 	/*
653 	 * We have the resources that we need.
654 	 * Make sure!
655 	 */
656 
657 	if (msqptr->msg_perm.mode & MSG_LOCKED)
658 		panic("msg_perm.mode & MSG_LOCKED");
659 	if (segs_needed > nfree_msgmaps)
660 		panic("segs_needed > nfree_msgmaps");
661 	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
662 		panic("msgsz + msg_cbytes > msg_qbytes");
663 	if (free_msghdrs == NULL)
664 		panic("no more msghdrs");
665 
666 	/*
667 	 * Re-lock the msqid_ds in case we page-fault when copying in the
668 	 * message
669 	 */
670 
671 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
672 		panic("msqid_ds is already locked");
673 	msqptr->msg_perm.mode |= MSG_LOCKED;
674 
675 	/*
676 	 * Allocate a message header
677 	 */
678 
679 	msghdr = free_msghdrs;
680 	free_msghdrs = msghdr->msg_next;
681 	msghdr->msg_spot = -1;
682 	msghdr->msg_ts = msgsz;
683 
684 	/*
685 	 * Allocate space for the message
686 	 */
687 
688 	while (segs_needed > 0) {
689 		if (nfree_msgmaps <= 0)
690 			panic("not enough msgmaps");
691 		if (free_msgmaps == -1)
692 			panic("nil free_msgmaps");
693 		next = free_msgmaps;
694 		if (next <= -1)
695 			panic("next too low #1");
696 		if (next >= msginfo.msgseg)
697 			panic("next out of range #1");
698 #ifdef MSG_DEBUG_OK
699 		kprintf("allocating segment %d to message\n", next);
700 #endif
701 		free_msgmaps = msgmaps[next].next;
702 		nfree_msgmaps--;
703 		msgmaps[next].next = msghdr->msg_spot;
704 		msghdr->msg_spot = next;
705 		segs_needed--;
706 	}
707 
708 	/*
709 	 * Copy in the message type
710 	 */
711 
712 	if ((eval = copyin(user_msgp, &msghdr->msg_type,
713 	    sizeof(msghdr->msg_type))) != 0) {
714 #ifdef MSG_DEBUG_OK
715 		kprintf("error %d copying the message type\n", eval);
716 #endif
717 		msg_freehdr(msghdr);
718 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
719 		wakeup((caddr_t)msqptr);
720 		goto done;
721 	}
722 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
723 
724 	/*
725 	 * Validate the message type
726 	 */
727 
728 	if (msghdr->msg_type < 1) {
729 		msg_freehdr(msghdr);
730 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
731 		wakeup((caddr_t)msqptr);
732 #ifdef MSG_DEBUG_OK
733 		kprintf("mtype (%d) < 1\n", msghdr->msg_type);
734 #endif
735 		eval = EINVAL;
736 		goto done;
737 	}
738 
739 	/*
740 	 * Copy in the message body
741 	 */
742 
743 	next = msghdr->msg_spot;
744 	while (msgsz > 0) {
745 		size_t tlen;
746 		if (msgsz > msginfo.msgssz)
747 			tlen = msginfo.msgssz;
748 		else
749 			tlen = msgsz;
750 		if (next <= -1)
751 			panic("next too low #2");
752 		if (next >= msginfo.msgseg)
753 			panic("next out of range #2");
754 		if ((eval = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
755 		    tlen)) != 0) {
756 #ifdef MSG_DEBUG_OK
757 			kprintf("error %d copying in message segment\n", eval);
758 #endif
759 			msg_freehdr(msghdr);
760 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
761 			wakeup((caddr_t)msqptr);
762 			goto done;
763 		}
764 		msgsz -= tlen;
765 		user_msgp = (char *)user_msgp + tlen;
766 		next = msgmaps[next].next;
767 	}
768 	if (next != -1)
769 		panic("didn't use all the msg segments");
770 
771 	/*
772 	 * We've got the message.  Unlock the msqid_ds.
773 	 */
774 
775 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
776 
777 	/*
778 	 * Make sure that the msqid_ds is still allocated.
779 	 */
780 
781 	if (msqptr->msg_qbytes == 0) {
782 		msg_freehdr(msghdr);
783 		wakeup((caddr_t)msqptr);
784 		eval = EIDRM;
785 		goto done;
786 	}
787 
788 	/*
789 	 * Put the message into the queue
790 	 */
791 
792 	if (msqptr->msg_first == NULL) {
793 		msqptr->msg_first = msghdr;
794 		msqptr->msg_last = msghdr;
795 	} else {
796 		msqptr->msg_last->msg_next = msghdr;
797 		msqptr->msg_last = msghdr;
798 	}
799 	msqptr->msg_last->msg_next = NULL;
800 
801 	msqptr->msg_cbytes += msghdr->msg_ts;
802 	msqptr->msg_qnum++;
803 	msqptr->msg_lspid = td->td_proc->p_pid;
804 	msqptr->msg_stime = time_second;
805 
806 	wakeup((caddr_t)msqptr);
807 	eval = 0;
808 done:
809 	rel_mplock();
810 	if (eval == 0)
811 		uap->sysmsg_result = 0;
812 	return (eval);
813 }
814 
815 /*
816  * MPALMOSTSAFE
817  */
818 int
819 sys_msgrcv(struct msgrcv_args *uap)
820 {
821 	struct thread *td = curthread;
822 	int msqid = uap->msqid;
823 	void *user_msgp = uap->msgp;
824 	size_t msgsz = uap->msgsz;
825 	long msgtyp = uap->msgtyp;
826 	int msgflg = uap->msgflg;
827 	size_t len;
828 	struct msqid_ds *msqptr;
829 	struct msg *msghdr;
830 	int eval;
831 	short next;
832 
833 #ifdef MSG_DEBUG_OK
834 	kprintf("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
835 	    msgsz, msgtyp, msgflg);
836 #endif
837 
838 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
839 		return (ENOSYS);
840 
841 	get_mplock();
842 	msqid = IPCID_TO_IX(msqid);
843 
844 	if (msqid < 0 || msqid >= msginfo.msgmni) {
845 #ifdef MSG_DEBUG_OK
846 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
847 		    msginfo.msgmni);
848 #endif
849 		eval = EINVAL;
850 		goto done;
851 	}
852 
853 	msqptr = &msqids[msqid];
854 	if (msqptr->msg_qbytes == 0) {
855 #ifdef MSG_DEBUG_OK
856 		kprintf("no such message queue id\n");
857 #endif
858 		eval = EINVAL;
859 		goto done;
860 	}
861 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
862 #ifdef MSG_DEBUG_OK
863 		kprintf("wrong sequence number\n");
864 #endif
865 		eval = EINVAL;
866 		goto done;
867 	}
868 
869 	if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, IPC_R))) {
870 #ifdef MSG_DEBUG_OK
871 		kprintf("requester doesn't have read access\n");
872 #endif
873 		goto done;
874 	}
875 
876 	msghdr = NULL;
877 	while (msghdr == NULL) {
878 		if (msgtyp == 0) {
879 			msghdr = msqptr->msg_first;
880 			if (msghdr != NULL) {
881 				if (msgsz < msghdr->msg_ts &&
882 				    (msgflg & MSG_NOERROR) == 0) {
883 #ifdef MSG_DEBUG_OK
884 					kprintf("first message on the queue is too big (want %d, got %d)\n",
885 					    msgsz, msghdr->msg_ts);
886 #endif
887 					eval = E2BIG;
888 					goto done;
889 				}
890 				if (msqptr->msg_first == msqptr->msg_last) {
891 					msqptr->msg_first = NULL;
892 					msqptr->msg_last = NULL;
893 				} else {
894 					msqptr->msg_first = msghdr->msg_next;
895 					if (msqptr->msg_first == NULL)
896 						panic("msg_first/last screwed up #1");
897 				}
898 			}
899 		} else {
900 			struct msg *previous;
901 			struct msg **prev;
902 
903 			previous = NULL;
904 			prev = &(msqptr->msg_first);
905 			while ((msghdr = *prev) != NULL) {
906 				/*
907 				 * Is this message's type an exact match or is
908 				 * this message's type less than or equal to
909 				 * the absolute value of a negative msgtyp?
910 				 * Note that the second half of this test can
911 				 * NEVER be true if msgtyp is positive since
912 				 * msg_type is always positive!
913 				 */
914 
915 				if (msgtyp == msghdr->msg_type ||
916 				    msghdr->msg_type <= -msgtyp) {
917 #ifdef MSG_DEBUG_OK
918 					kprintf("found message type %d, requested %d\n",
919 					    msghdr->msg_type, msgtyp);
920 #endif
921 					if (msgsz < msghdr->msg_ts &&
922 					    (msgflg & MSG_NOERROR) == 0) {
923 #ifdef MSG_DEBUG_OK
924 						kprintf("requested message on the queue is too big (want %d, got %d)\n",
925 						    msgsz, msghdr->msg_ts);
926 #endif
927 						eval = E2BIG;
928 						goto done;
929 					}
930 					*prev = msghdr->msg_next;
931 					if (msghdr == msqptr->msg_last) {
932 						if (previous == NULL) {
933 							if (prev !=
934 							    &msqptr->msg_first)
935 								panic("msg_first/last screwed up #2");
936 							msqptr->msg_first =
937 							    NULL;
938 							msqptr->msg_last =
939 							    NULL;
940 						} else {
941 							if (prev ==
942 							    &msqptr->msg_first)
943 								panic("msg_first/last screwed up #3");
944 							msqptr->msg_last =
945 							    previous;
946 						}
947 					}
948 					break;
949 				}
950 				previous = msghdr;
951 				prev = &(msghdr->msg_next);
952 			}
953 		}
954 
955 		/*
956 		 * We've either extracted the msghdr for the appropriate
957 		 * message or there isn't one.
958 		 * If there is one then bail out of this loop.
959 		 */
960 
961 		if (msghdr != NULL)
962 			break;
963 
964 		/*
965 		 * Hmph!  No message found.  Does the user want to wait?
966 		 */
967 
968 		if ((msgflg & IPC_NOWAIT) != 0) {
969 #ifdef MSG_DEBUG_OK
970 			kprintf("no appropriate message found (msgtyp=%d)\n",
971 			    msgtyp);
972 #endif
973 			/* The SVID says to return ENOMSG. */
974 #ifdef ENOMSG
975 			eval = ENOMSG;
976 #else
977 			/* Unfortunately, BSD doesn't define that code yet! */
978 			eval = EAGAIN;
979 #endif
980 			goto done;
981 		}
982 
983 		/*
984 		 * Wait for something to happen
985 		 */
986 
987 #ifdef MSG_DEBUG_OK
988 		kprintf("msgrcv:  goodnight\n");
989 #endif
990 		eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
991 #ifdef MSG_DEBUG_OK
992 		kprintf("msgrcv:  good morning (eval=%d)\n", eval);
993 #endif
994 
995 		if (eval != 0) {
996 #ifdef MSG_DEBUG_OK
997 			kprintf("msgsnd:  interrupted system call\n");
998 #endif
999 			eval = EINTR;
1000 			goto done;
1001 		}
1002 
1003 		/*
1004 		 * Make sure that the msq queue still exists
1005 		 */
1006 
1007 		if (msqptr->msg_qbytes == 0 ||
1008 		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1009 #ifdef MSG_DEBUG_OK
1010 			kprintf("msqid deleted\n");
1011 #endif
1012 			eval = EIDRM;
1013 			goto done;
1014 		}
1015 	}
1016 
1017 	/*
1018 	 * Return the message to the user.
1019 	 *
1020 	 * First, do the bookkeeping (before we risk being interrupted).
1021 	 */
1022 
1023 	msqptr->msg_cbytes -= msghdr->msg_ts;
1024 	msqptr->msg_qnum--;
1025 	msqptr->msg_lrpid = td->td_proc->p_pid;
1026 	msqptr->msg_rtime = time_second;
1027 
1028 	/*
1029 	 * Make msgsz the actual amount that we'll be returning.
1030 	 * Note that this effectively truncates the message if it is too long
1031 	 * (since msgsz is never increased).
1032 	 */
1033 
1034 #ifdef MSG_DEBUG_OK
1035 	kprintf("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1036 	    msghdr->msg_ts);
1037 #endif
1038 	if (msgsz > msghdr->msg_ts)
1039 		msgsz = msghdr->msg_ts;
1040 
1041 	/*
1042 	 * Return the type to the user.
1043 	 */
1044 
1045 	eval = copyout((caddr_t)&(msghdr->msg_type), user_msgp,
1046 	    sizeof(msghdr->msg_type));
1047 	if (eval != 0) {
1048 #ifdef MSG_DEBUG_OK
1049 		kprintf("error (%d) copying out message type\n", eval);
1050 #endif
1051 		msg_freehdr(msghdr);
1052 		wakeup((caddr_t)msqptr);
1053 		goto done;
1054 	}
1055 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1056 
1057 	/*
1058 	 * Return the segments to the user
1059 	 */
1060 
1061 	next = msghdr->msg_spot;
1062 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1063 		size_t tlen;
1064 
1065 		if (msgsz - len > msginfo.msgssz)
1066 			tlen = msginfo.msgssz;
1067 		else
1068 			tlen = msgsz - len;
1069 		if (next <= -1)
1070 			panic("next too low #3");
1071 		if (next >= msginfo.msgseg)
1072 			panic("next out of range #3");
1073 		eval = copyout((caddr_t)&msgpool[next * msginfo.msgssz],
1074 		    user_msgp, tlen);
1075 		if (eval != 0) {
1076 #ifdef MSG_DEBUG_OK
1077 			kprintf("error (%d) copying out message segment\n",
1078 			    eval);
1079 #endif
1080 			msg_freehdr(msghdr);
1081 			wakeup((caddr_t)msqptr);
1082 			goto done;
1083 		}
1084 		user_msgp = (char *)user_msgp + tlen;
1085 		next = msgmaps[next].next;
1086 	}
1087 
1088 	/*
1089 	 * Done, return the actual number of bytes copied out.
1090 	 */
1091 
1092 	msg_freehdr(msghdr);
1093 	wakeup((caddr_t)msqptr);
1094 	eval = 0;
1095 done:
1096 	rel_mplock();
1097 	if (eval == 0)
1098 		uap->sysmsg_result = msgsz;
1099 	return(eval);
1100 }
1101 
1102 static int
1103 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1104 {
1105 
1106 	return (SYSCTL_OUT(req, msqids,
1107 	    sizeof(struct msqid_ds) * msginfo.msgmni));
1108 }
1109 
1110 TUNABLE_INT("kern.ipc.msgseg", &msginfo.msgseg);
1111 TUNABLE_INT("kern.ipc.msgssz", &msginfo.msgssz);
1112 TUNABLE_INT("kern.ipc.msgmni", &msginfo.msgmni);
1113 
1114 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
1115     "Max characters in message");
1116 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RD, &msginfo.msgmni, 0,
1117     "Max message queue identifiers");
1118 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0,
1119     "Max characters in message queue");
1120 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0,
1121     "Max SVID messages in system");
1122 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RD, &msginfo.msgssz, 0,
1123     "Power-of-two size of a message segment");
1124 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RD, &msginfo.msgseg, 0,
1125     "Number of message segments");
1126 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1127     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1128