xref: /dragonfly/sys/kern/sysv_msg.c (revision ad9f8794)
1 /* $FreeBSD: src/sys/kern/sysv_msg.c,v 1.23.2.5 2002/12/31 08:54:53 maxim Exp $ */
2 
3 /*
4  * Implementation of SVID messages
5  *
6  * Author:  Daniel Boulet
7  *
8  * Copyright 1993 Daniel Boulet and RTMX Inc.
9  *
10  * This system call was implemented by Daniel Boulet under contract from RTMX.
11  *
12  * Redistribution and use in source forms, with and without modification,
13  * are permitted provided that this entire comment appears intact.
14  *
15  * Redistribution in binary form may occur without any restrictions.
16  * Obviously, it would be nice if you gave credit where credit is due
17  * but requiring it would be too onerous.
18  *
19  * This software is provided ``AS IS'' without any warranties of any kind.
20  */
21 
22 #include "opt_sysvipc.h"
23 
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/sysproto.h>
27 #include <sys/kernel.h>
28 #include <sys/proc.h>
29 #include <sys/priv.h>
30 #include <sys/msg.h>
31 #include <sys/sysent.h>
32 #include <sys/sysctl.h>
33 #include <sys/malloc.h>
34 #include <sys/jail.h>
35 
36 #include <sys/mplock2.h>
37 
38 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
39 
40 static void msginit (void *);
41 
42 #define MSG_DEBUG
43 #undef MSG_DEBUG_OK
44 
45 static void msg_freehdr (struct msg *msghdr);
46 
47 /* XXX casting to (sy_call_t *) is bogus, as usual. */
48 static sy_call_t *msgcalls[] = {
49 	(sy_call_t *)sys_msgctl, (sy_call_t *)sys_msgget,
50 	(sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv
51 };
52 
53 struct msg {
54 	struct	msg *msg_next;	/* next msg in the chain */
55 	long	msg_type;	/* type of this message */
56     				/* >0 -> type of this message */
57     				/* 0 -> free header */
58 	u_short	msg_ts;		/* size of this message */
59 	short	msg_spot;	/* location of start of msg in buffer */
60 };
61 
62 
63 #ifndef MSGSSZ
64 #define MSGSSZ	8		/* Each segment must be 2^N long */
65 #endif
66 #ifndef MSGSEG
67 #define MSGSEG	2048		/* must be less than 32767 */
68 #endif
69 #define MSGMAX	(MSGSSZ*MSGSEG)
70 #ifndef MSGMNB
71 #define MSGMNB	2048		/* max # of bytes in a queue */
72 #endif
73 #ifndef MSGMNI
74 #define MSGMNI	40
75 #endif
76 #ifndef MSGTQL
77 #define MSGTQL	40
78 #endif
79 
80 /*
81  * Based on the configuration parameters described in an SVR2 (yes, two)
82  * config(1m) man page.
83  *
84  * Each message is broken up and stored in segments that are msgssz bytes
85  * long.  For efficiency reasons, this should be a power of two.  Also,
86  * it doesn't make sense if it is less than 8 or greater than about 256.
87  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
88  * two between 8 and 1024 inclusive (and panic's if it isn't).
89  */
90 struct msginfo msginfo = {
91                 MSGMAX,         /* max chars in a message */
92                 MSGMNI,         /* # of message queue identifiers */
93                 MSGMNB,         /* max chars in a queue */
94                 MSGTQL,         /* max messages in system */
95                 MSGSSZ,         /* size of a message segment */
96                 		/* (must be small power of 2 greater than 4) */
97                 MSGSEG          /* number of message segments */
98 };
99 
100 /*
101  * macros to convert between msqid_ds's and msqid's.
102  * (specific to this implementation)
103  */
104 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
105 #define MSQID_IX(id)	((id) & 0xffff)
106 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
107 
108 /*
109  * The rest of this file is specific to this particular implementation.
110  */
111 
112 struct msgmap {
113 	short	next;		/* next segment in buffer */
114     				/* -1 -> available */
115     				/* 0..(MSGSEG-1) -> index of next segment */
116 };
117 
118 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
119 
120 static int nfree_msgmaps;	/* # of free map entries */
121 static short free_msgmaps;	/* head of linked list of free map entries */
122 static struct msg *free_msghdrs;/* list of free msg headers */
123 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
124 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
125 static struct msg *msghdrs;	/* MSGTQL msg headers */
126 static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
127 
128 static void
129 msginit(void *dummy)
130 {
131 	int i;
132 
133 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
134 	msgpool = kmalloc(msginfo.msgmax, M_MSG, M_WAITOK);
135 	msgmaps = kmalloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
136 	msghdrs = kmalloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
137 	msqids = kmalloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
138 
139 	/*
140 	 * msginfo.msgssz should be a power of two for efficiency reasons.
141 	 * It is also pretty silly if msginfo.msgssz is less than 8
142 	 * or greater than about 256 so ...
143 	 */
144 
145 	i = 8;
146 	while (i < 1024 && i != msginfo.msgssz)
147 		i <<= 1;
148     	if (i != msginfo.msgssz) {
149 		kprintf("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
150 		    msginfo.msgssz);
151 		panic("msginfo.msgssz not a small power of 2");
152 	}
153 
154 	if (msginfo.msgseg > 32767) {
155 		kprintf("msginfo.msgseg=%d\n", msginfo.msgseg);
156 		panic("msginfo.msgseg > 32767");
157 	}
158 
159 	if (msgmaps == NULL)
160 		panic("msgmaps is NULL");
161 
162 	for (i = 0; i < msginfo.msgseg; i++) {
163 		if (i > 0)
164 			msgmaps[i-1].next = i;
165 		msgmaps[i].next = -1;	/* implies entry is available */
166 	}
167 	free_msgmaps = 0;
168 	nfree_msgmaps = msginfo.msgseg;
169 
170 	if (msghdrs == NULL)
171 		panic("msghdrs is NULL");
172 
173 	for (i = 0; i < msginfo.msgtql; i++) {
174 		msghdrs[i].msg_type = 0;
175 		if (i > 0)
176 			msghdrs[i-1].msg_next = &msghdrs[i];
177 		msghdrs[i].msg_next = NULL;
178     	}
179 	free_msghdrs = &msghdrs[0];
180 
181 	if (msqids == NULL)
182 		panic("msqids is NULL");
183 
184 	for (i = 0; i < msginfo.msgmni; i++) {
185 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
186 		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
187 		msqids[i].msg_perm.mode = 0;
188 	}
189 }
190 SYSINIT(sysv_msg, SI_SUB_SYSV_MSG, SI_ORDER_FIRST, msginit, NULL)
191 
192 /*
193  * Entry point for all MSG calls
194  *
195  * msgsys_args(int which, int a2, ...) (VARARGS)
196  *
197  * MPALMOSTSAFE
198  */
199 int
200 sys_msgsys(struct msgsys_args *uap)
201 {
202 	struct thread *td = curthread;
203 	unsigned int which = (unsigned int)uap->which;
204 	int error;
205 
206 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
207 		return (ENOSYS);
208 
209 	if (which >= NELEM(msgcalls))
210 		return (EINVAL);
211 	bcopy(&uap->a2, &uap->which,
212 	      sizeof(struct msgsys_args) - offsetof(struct msgsys_args, a2));
213 	get_mplock();
214 	error = (*msgcalls[which])(uap);
215 	rel_mplock();
216 	return (error);
217 }
218 
219 static void
220 msg_freehdr(struct msg *msghdr)
221 {
222 	while (msghdr->msg_ts > 0) {
223 		short next;
224 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
225 			panic("msghdr->msg_spot out of range");
226 		next = msgmaps[msghdr->msg_spot].next;
227 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
228 		free_msgmaps = msghdr->msg_spot;
229 		nfree_msgmaps++;
230 		msghdr->msg_spot = next;
231 		if (msghdr->msg_ts >= msginfo.msgssz)
232 			msghdr->msg_ts -= msginfo.msgssz;
233 		else
234 			msghdr->msg_ts = 0;
235 	}
236 	if (msghdr->msg_spot != -1)
237 		panic("msghdr->msg_spot != -1");
238 	msghdr->msg_next = free_msghdrs;
239 	free_msghdrs = msghdr;
240 }
241 
242 /*
243  * MPALMOSTSAFE
244  */
245 int
246 sys_msgctl(struct msgctl_args *uap)
247 {
248 	struct thread *td = curthread;
249 	struct proc *p = td->td_proc;
250 	int msqid = uap->msqid;
251 	int cmd = uap->cmd;
252 	struct msqid_ds *user_msqptr = uap->buf;
253 	int rval, eval;
254 	struct msqid_ds msqbuf;
255 	struct msqid_ds *msqptr;
256 
257 #ifdef MSG_DEBUG_OK
258 	kprintf("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr);
259 #endif
260 
261 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
262 		return (ENOSYS);
263 
264 	get_mplock();
265 	msqid = IPCID_TO_IX(msqid);
266 
267 	if (msqid < 0 || msqid >= msginfo.msgmni) {
268 #ifdef MSG_DEBUG_OK
269 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
270 		    msginfo.msgmni);
271 #endif
272 		eval = EINVAL;
273 		goto done;
274 	}
275 
276 	msqptr = &msqids[msqid];
277 
278 	if (msqptr->msg_qbytes == 0) {
279 #ifdef MSG_DEBUG_OK
280 		kprintf("no such msqid\n");
281 #endif
282 		eval = EINVAL;
283 		goto done;
284 	}
285 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
286 #ifdef MSG_DEBUG_OK
287 		kprintf("wrong sequence number\n");
288 #endif
289 		eval = EINVAL;
290 		goto done;
291 	}
292 
293 	rval = 0;
294 
295 	switch (cmd) {
296 	case IPC_RMID:
297 	{
298 		struct msg *msghdr;
299 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)) != 0)
300 			break;
301 		/* Free the message headers */
302 		msghdr = msqptr->msg_first;
303 		while (msghdr != NULL) {
304 			struct msg *msghdr_tmp;
305 
306 			/* Free the segments of each message */
307 			msqptr->msg_cbytes -= msghdr->msg_ts;
308 			msqptr->msg_qnum--;
309 			msghdr_tmp = msghdr;
310 			msghdr = msghdr->msg_next;
311 			msg_freehdr(msghdr_tmp);
312 		}
313 
314 		if (msqptr->msg_cbytes != 0)
315 			panic("msg_cbytes is screwed up");
316 		if (msqptr->msg_qnum != 0)
317 			panic("msg_qnum is screwed up");
318 
319 		msqptr->msg_qbytes = 0;	/* Mark it as free */
320 
321 		wakeup((caddr_t)msqptr);
322 	}
323 
324 		break;
325 
326 	case IPC_SET:
327 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)) != 0)
328 			break;
329 		if ((eval = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
330 			break;
331 		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
332 			eval = priv_check(td, PRIV_ROOT);
333 			if (eval)
334 				break;
335 		}
336 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
337 #ifdef MSG_DEBUG_OK
338 			kprintf("can't increase msg_qbytes beyond %d (truncating)\n",
339 			    msginfo.msgmnb);
340 #endif
341 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
342 		}
343 		if (msqbuf.msg_qbytes == 0) {
344 #ifdef MSG_DEBUG_OK
345 			kprintf("can't reduce msg_qbytes to 0\n");
346 #endif
347 			eval = EINVAL;		/* non-standard errno! */
348 			break;
349 		}
350 		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
351 		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
352 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
353 					(msqbuf.msg_perm.mode & 0777);
354 		msqptr->msg_qbytes = msqbuf.msg_qbytes;
355 		msqptr->msg_ctime = time_second;
356 		break;
357 
358 	case IPC_STAT:
359 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_R))) {
360 #ifdef MSG_DEBUG_OK
361 			kprintf("requester doesn't have read access\n");
362 #endif
363 			eval = EINVAL;
364 			break;
365 		}
366 		eval = copyout(msqptr, user_msqptr, sizeof(struct msqid_ds));
367 		break;
368 
369 	default:
370 #ifdef MSG_DEBUG_OK
371 		kprintf("invalid command %d\n", cmd);
372 #endif
373 		eval = EINVAL;
374 		break;
375 	}
376 done:
377 	rel_mplock();
378 	if (eval == 0)
379 		uap->sysmsg_result = rval;
380 	return(eval);
381 }
382 
383 /*
384  * MPALMOSTSAFE
385  */
386 int
387 sys_msgget(struct msgget_args *uap)
388 {
389 	struct thread *td = curthread;
390 	int msqid, eval;
391 	int key = uap->key;
392 	int msgflg = uap->msgflg;
393 	struct ucred *cred = td->td_ucred;
394 	struct msqid_ds *msqptr = NULL;
395 
396 #ifdef MSG_DEBUG_OK
397 	kprintf("msgget(0x%x, 0%o)\n", key, msgflg);
398 #endif
399 	if (!jail_sysvipc_allowed && cred->cr_prison != NULL)
400 		return (ENOSYS);
401 
402 	eval = 0;
403 	get_mplock();
404 
405 	if (key != IPC_PRIVATE) {
406 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
407 			msqptr = &msqids[msqid];
408 			if (msqptr->msg_qbytes != 0 &&
409 			    msqptr->msg_perm.key == key)
410 				break;
411 		}
412 		if (msqid < msginfo.msgmni) {
413 #ifdef MSG_DEBUG_OK
414 			kprintf("found public key\n");
415 #endif
416 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
417 #ifdef MSG_DEBUG_OK
418 				kprintf("not exclusive\n");
419 #endif
420 				eval = EEXIST;
421 				goto done;
422 			}
423 			if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, msgflg & 0700 ))) {
424 #ifdef MSG_DEBUG_OK
425 				kprintf("requester doesn't have 0%o access\n",
426 				    msgflg & 0700);
427 #endif
428 				goto done;
429 			}
430 			goto done;
431 		}
432 	}
433 
434 #ifdef MSG_DEBUG_OK
435 	kprintf("need to allocate the msqid_ds\n");
436 #endif
437 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
438 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
439 			/*
440 			 * Look for an unallocated and unlocked msqid_ds.
441 			 * msqid_ds's can be locked by msgsnd or msgrcv while
442 			 * they are copying the message in/out.  We can't
443 			 * re-use the entry until they release it.
444 			 */
445 			msqptr = &msqids[msqid];
446 			if (msqptr->msg_qbytes == 0 &&
447 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
448 				break;
449 		}
450 		if (msqid == msginfo.msgmni) {
451 #ifdef MSG_DEBUG_OK
452 			kprintf("no more msqid_ds's available\n");
453 #endif
454 			eval = ENOSPC;
455 			goto done;
456 		}
457 #ifdef MSG_DEBUG_OK
458 		kprintf("msqid %d is available\n", msqid);
459 #endif
460 		msqptr->msg_perm.key = key;
461 		msqptr->msg_perm.cuid = cred->cr_uid;
462 		msqptr->msg_perm.uid = cred->cr_uid;
463 		msqptr->msg_perm.cgid = cred->cr_gid;
464 		msqptr->msg_perm.gid = cred->cr_gid;
465 		msqptr->msg_perm.mode = (msgflg & 0777);
466 		/* Make sure that the returned msqid is unique */
467 		msqptr->msg_perm.seq = (msqptr->msg_perm.seq + 1) & 0x7fff;
468 		msqptr->msg_first = NULL;
469 		msqptr->msg_last = NULL;
470 		msqptr->msg_cbytes = 0;
471 		msqptr->msg_qnum = 0;
472 		msqptr->msg_qbytes = msginfo.msgmnb;
473 		msqptr->msg_lspid = 0;
474 		msqptr->msg_lrpid = 0;
475 		msqptr->msg_stime = 0;
476 		msqptr->msg_rtime = 0;
477 		msqptr->msg_ctime = time_second;
478 	} else {
479 #ifdef MSG_DEBUG_OK
480 		kprintf("didn't find it and wasn't asked to create it\n");
481 #endif
482 		eval = ENOENT;
483 	}
484 
485 done:
486 	rel_mplock();
487 	/* Construct the unique msqid */
488 	if (eval == 0)
489 		uap->sysmsg_result = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
490 	return(eval);
491 }
492 
493 /*
494  * MPALMOSTSAFE
495  */
496 int
497 sys_msgsnd(struct msgsnd_args *uap)
498 {
499 	struct thread *td = curthread;
500 	int msqid = uap->msqid;
501 	void *user_msgp = uap->msgp;
502 	size_t msgsz = uap->msgsz;
503 	int msgflg = uap->msgflg;
504 	int segs_needed, eval;
505 	struct msqid_ds *msqptr;
506 	struct msg *msghdr;
507 	short next;
508 
509 #ifdef MSG_DEBUG_OK
510 	kprintf("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
511 	    msgflg);
512 #endif
513 
514 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
515 		return (ENOSYS);
516 
517 	get_mplock();
518 	msqid = IPCID_TO_IX(msqid);
519 
520 	if (msqid < 0 || msqid >= msginfo.msgmni) {
521 #ifdef MSG_DEBUG_OK
522 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
523 		    msginfo.msgmni);
524 #endif
525 		eval = EINVAL;
526 		goto done;
527 	}
528 
529 	msqptr = &msqids[msqid];
530 	if (msqptr->msg_qbytes == 0) {
531 #ifdef MSG_DEBUG_OK
532 		kprintf("no such message queue id\n");
533 #endif
534 		eval = EINVAL;
535 		goto done;
536 	}
537 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
538 #ifdef MSG_DEBUG_OK
539 		kprintf("wrong sequence number\n");
540 #endif
541 		eval = EINVAL;
542 		goto done;
543 	}
544 
545 	if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, IPC_W))) {
546 #ifdef MSG_DEBUG_OK
547 		kprintf("requester doesn't have write access\n");
548 #endif
549 		eval = EINVAL;
550 		goto done;
551 	}
552 
553 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
554 #ifdef MSG_DEBUG_OK
555 	kprintf("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
556 	    segs_needed);
557 #endif
558 	for (;;) {
559 		int need_more_resources = 0;
560 
561 		/*
562 		 * check msgsz
563 		 * (inside this loop in case msg_qbytes changes while we sleep)
564 		 */
565 
566 		if (msgsz > msqptr->msg_qbytes) {
567 #ifdef MSG_DEBUG_OK
568 			kprintf("msgsz > msqptr->msg_qbytes\n");
569 #endif
570 			eval = EINVAL;
571 			goto done;
572 		}
573 
574 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
575 #ifdef MSG_DEBUG_OK
576 			kprintf("msqid is locked\n");
577 #endif
578 			need_more_resources = 1;
579 		}
580 		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
581 #ifdef MSG_DEBUG_OK
582 			kprintf("msgsz + msg_cbytes > msg_qbytes\n");
583 #endif
584 			need_more_resources = 1;
585 		}
586 		if (segs_needed > nfree_msgmaps) {
587 #ifdef MSG_DEBUG_OK
588 			kprintf("segs_needed > nfree_msgmaps\n");
589 #endif
590 			need_more_resources = 1;
591 		}
592 		if (free_msghdrs == NULL) {
593 #ifdef MSG_DEBUG_OK
594 			kprintf("no more msghdrs\n");
595 #endif
596 			need_more_resources = 1;
597 		}
598 
599 		if (need_more_resources) {
600 			int we_own_it;
601 
602 			if ((msgflg & IPC_NOWAIT) != 0) {
603 #ifdef MSG_DEBUG_OK
604 				kprintf("need more resources but caller doesn't want to wait\n");
605 #endif
606 				eval = EAGAIN;
607 				goto done;
608 			}
609 
610 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
611 #ifdef MSG_DEBUG_OK
612 				kprintf("we don't own the msqid_ds\n");
613 #endif
614 				we_own_it = 0;
615 			} else {
616 				/* Force later arrivals to wait for our
617 				   request */
618 #ifdef MSG_DEBUG_OK
619 				kprintf("we own the msqid_ds\n");
620 #endif
621 				msqptr->msg_perm.mode |= MSG_LOCKED;
622 				we_own_it = 1;
623 			}
624 #ifdef MSG_DEBUG_OK
625 			kprintf("goodnight\n");
626 #endif
627 			eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
628 #ifdef MSG_DEBUG_OK
629 			kprintf("good morning, eval=%d\n", eval);
630 #endif
631 			if (we_own_it)
632 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
633 			if (eval != 0) {
634 #ifdef MSG_DEBUG_OK
635 				kprintf("msgsnd:  interrupted system call\n");
636 #endif
637 				eval = EINTR;
638 				goto done;
639 			}
640 
641 			/*
642 			 * Make sure that the msq queue still exists
643 			 */
644 
645 			if (msqptr->msg_qbytes == 0) {
646 #ifdef MSG_DEBUG_OK
647 				kprintf("msqid deleted\n");
648 #endif
649 				eval = EIDRM;
650 				goto done;
651 			}
652 
653 		} else {
654 #ifdef MSG_DEBUG_OK
655 			kprintf("got all the resources that we need\n");
656 #endif
657 			break;
658 		}
659 	}
660 
661 	/*
662 	 * We have the resources that we need.
663 	 * Make sure!
664 	 */
665 
666 	if (msqptr->msg_perm.mode & MSG_LOCKED)
667 		panic("msg_perm.mode & MSG_LOCKED");
668 	if (segs_needed > nfree_msgmaps)
669 		panic("segs_needed > nfree_msgmaps");
670 	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
671 		panic("msgsz + msg_cbytes > msg_qbytes");
672 	if (free_msghdrs == NULL)
673 		panic("no more msghdrs");
674 
675 	/*
676 	 * Re-lock the msqid_ds in case we page-fault when copying in the
677 	 * message
678 	 */
679 
680 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
681 		panic("msqid_ds is already locked");
682 	msqptr->msg_perm.mode |= MSG_LOCKED;
683 
684 	/*
685 	 * Allocate a message header
686 	 */
687 
688 	msghdr = free_msghdrs;
689 	free_msghdrs = msghdr->msg_next;
690 	msghdr->msg_spot = -1;
691 	msghdr->msg_ts = msgsz;
692 
693 	/*
694 	 * Allocate space for the message
695 	 */
696 
697 	while (segs_needed > 0) {
698 		if (nfree_msgmaps <= 0)
699 			panic("not enough msgmaps");
700 		if (free_msgmaps == -1)
701 			panic("nil free_msgmaps");
702 		next = free_msgmaps;
703 		if (next <= -1)
704 			panic("next too low #1");
705 		if (next >= msginfo.msgseg)
706 			panic("next out of range #1");
707 #ifdef MSG_DEBUG_OK
708 		kprintf("allocating segment %d to message\n", next);
709 #endif
710 		free_msgmaps = msgmaps[next].next;
711 		nfree_msgmaps--;
712 		msgmaps[next].next = msghdr->msg_spot;
713 		msghdr->msg_spot = next;
714 		segs_needed--;
715 	}
716 
717 	/*
718 	 * Copy in the message type
719 	 */
720 
721 	if ((eval = copyin(user_msgp, &msghdr->msg_type,
722 	    sizeof(msghdr->msg_type))) != 0) {
723 #ifdef MSG_DEBUG_OK
724 		kprintf("error %d copying the message type\n", eval);
725 #endif
726 		msg_freehdr(msghdr);
727 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
728 		wakeup((caddr_t)msqptr);
729 		goto done;
730 	}
731 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
732 
733 	/*
734 	 * Validate the message type
735 	 */
736 
737 	if (msghdr->msg_type < 1) {
738 		msg_freehdr(msghdr);
739 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
740 		wakeup((caddr_t)msqptr);
741 #ifdef MSG_DEBUG_OK
742 		kprintf("mtype (%d) < 1\n", msghdr->msg_type);
743 #endif
744 		eval = EINVAL;
745 		goto done;
746 	}
747 
748 	/*
749 	 * Copy in the message body
750 	 */
751 
752 	next = msghdr->msg_spot;
753 	while (msgsz > 0) {
754 		size_t tlen;
755 		if (msgsz > msginfo.msgssz)
756 			tlen = msginfo.msgssz;
757 		else
758 			tlen = msgsz;
759 		if (next <= -1)
760 			panic("next too low #2");
761 		if (next >= msginfo.msgseg)
762 			panic("next out of range #2");
763 		if ((eval = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
764 		    tlen)) != 0) {
765 #ifdef MSG_DEBUG_OK
766 			kprintf("error %d copying in message segment\n", eval);
767 #endif
768 			msg_freehdr(msghdr);
769 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
770 			wakeup((caddr_t)msqptr);
771 			goto done;
772 		}
773 		msgsz -= tlen;
774 		user_msgp = (char *)user_msgp + tlen;
775 		next = msgmaps[next].next;
776 	}
777 	if (next != -1)
778 		panic("didn't use all the msg segments");
779 
780 	/*
781 	 * We've got the message.  Unlock the msqid_ds.
782 	 */
783 
784 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
785 
786 	/*
787 	 * Make sure that the msqid_ds is still allocated.
788 	 */
789 
790 	if (msqptr->msg_qbytes == 0) {
791 		msg_freehdr(msghdr);
792 		wakeup((caddr_t)msqptr);
793 		eval = EIDRM;
794 		goto done;
795 	}
796 
797 	/*
798 	 * Put the message into the queue
799 	 */
800 
801 	if (msqptr->msg_first == NULL) {
802 		msqptr->msg_first = msghdr;
803 		msqptr->msg_last = msghdr;
804 	} else {
805 		msqptr->msg_last->msg_next = msghdr;
806 		msqptr->msg_last = msghdr;
807 	}
808 	msqptr->msg_last->msg_next = NULL;
809 
810 	msqptr->msg_cbytes += msghdr->msg_ts;
811 	msqptr->msg_qnum++;
812 	msqptr->msg_lspid = td->td_proc->p_pid;
813 	msqptr->msg_stime = time_second;
814 
815 	wakeup((caddr_t)msqptr);
816 	eval = 0;
817 done:
818 	rel_mplock();
819 	if (eval == 0)
820 		uap->sysmsg_result = 0;
821 	return (eval);
822 }
823 
824 /*
825  * MPALMOSTSAFE
826  */
827 int
828 sys_msgrcv(struct msgrcv_args *uap)
829 {
830 	struct thread *td = curthread;
831 	int msqid = uap->msqid;
832 	void *user_msgp = uap->msgp;
833 	size_t msgsz = uap->msgsz;
834 	long msgtyp = uap->msgtyp;
835 	int msgflg = uap->msgflg;
836 	size_t len;
837 	struct msqid_ds *msqptr;
838 	struct msg *msghdr;
839 	int eval;
840 	short next;
841 
842 #ifdef MSG_DEBUG_OK
843 	kprintf("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
844 	    msgsz, msgtyp, msgflg);
845 #endif
846 
847 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
848 		return (ENOSYS);
849 
850 	get_mplock();
851 	msqid = IPCID_TO_IX(msqid);
852 
853 	if (msqid < 0 || msqid >= msginfo.msgmni) {
854 #ifdef MSG_DEBUG_OK
855 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
856 		    msginfo.msgmni);
857 #endif
858 		eval = EINVAL;
859 		goto done;
860 	}
861 
862 	msqptr = &msqids[msqid];
863 	if (msqptr->msg_qbytes == 0) {
864 #ifdef MSG_DEBUG_OK
865 		kprintf("no such message queue id\n");
866 #endif
867 		eval = EINVAL;
868 		goto done;
869 	}
870 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
871 #ifdef MSG_DEBUG_OK
872 		kprintf("wrong sequence number\n");
873 #endif
874 		eval = EINVAL;
875 		goto done;
876 	}
877 
878 	if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, IPC_R))) {
879 #ifdef MSG_DEBUG_OK
880 		kprintf("requester doesn't have read access\n");
881 #endif
882 		goto done;
883 	}
884 
885 	msghdr = NULL;
886 	while (msghdr == NULL) {
887 		if (msgtyp == 0) {
888 			msghdr = msqptr->msg_first;
889 			if (msghdr != NULL) {
890 				if (msgsz < msghdr->msg_ts &&
891 				    (msgflg & MSG_NOERROR) == 0) {
892 #ifdef MSG_DEBUG_OK
893 					kprintf("first message on the queue is too big (want %d, got %d)\n",
894 					    msgsz, msghdr->msg_ts);
895 #endif
896 					eval = E2BIG;
897 					goto done;
898 				}
899 				if (msqptr->msg_first == msqptr->msg_last) {
900 					msqptr->msg_first = NULL;
901 					msqptr->msg_last = NULL;
902 				} else {
903 					msqptr->msg_first = msghdr->msg_next;
904 					if (msqptr->msg_first == NULL)
905 						panic("msg_first/last screwed up #1");
906 				}
907 			}
908 		} else {
909 			struct msg *previous;
910 			struct msg **prev;
911 
912 			previous = NULL;
913 			prev = &(msqptr->msg_first);
914 			while ((msghdr = *prev) != NULL) {
915 				/*
916 				 * Is this message's type an exact match or is
917 				 * this message's type less than or equal to
918 				 * the absolute value of a negative msgtyp?
919 				 * Note that the second half of this test can
920 				 * NEVER be true if msgtyp is positive since
921 				 * msg_type is always positive!
922 				 */
923 
924 				if (msgtyp == msghdr->msg_type ||
925 				    msghdr->msg_type <= -msgtyp) {
926 #ifdef MSG_DEBUG_OK
927 					kprintf("found message type %d, requested %d\n",
928 					    msghdr->msg_type, msgtyp);
929 #endif
930 					if (msgsz < msghdr->msg_ts &&
931 					    (msgflg & MSG_NOERROR) == 0) {
932 #ifdef MSG_DEBUG_OK
933 						kprintf("requested message on the queue is too big (want %d, got %d)\n",
934 						    msgsz, msghdr->msg_ts);
935 #endif
936 						eval = E2BIG;
937 						goto done;
938 					}
939 					*prev = msghdr->msg_next;
940 					if (msghdr == msqptr->msg_last) {
941 						if (previous == NULL) {
942 							if (prev !=
943 							    &msqptr->msg_first)
944 								panic("msg_first/last screwed up #2");
945 							msqptr->msg_first =
946 							    NULL;
947 							msqptr->msg_last =
948 							    NULL;
949 						} else {
950 							if (prev ==
951 							    &msqptr->msg_first)
952 								panic("msg_first/last screwed up #3");
953 							msqptr->msg_last =
954 							    previous;
955 						}
956 					}
957 					break;
958 				}
959 				previous = msghdr;
960 				prev = &(msghdr->msg_next);
961 			}
962 		}
963 
964 		/*
965 		 * We've either extracted the msghdr for the appropriate
966 		 * message or there isn't one.
967 		 * If there is one then bail out of this loop.
968 		 */
969 
970 		if (msghdr != NULL)
971 			break;
972 
973 		/*
974 		 * Hmph!  No message found.  Does the user want to wait?
975 		 */
976 
977 		if ((msgflg & IPC_NOWAIT) != 0) {
978 #ifdef MSG_DEBUG_OK
979 			kprintf("no appropriate message found (msgtyp=%d)\n",
980 			    msgtyp);
981 #endif
982 			/* The SVID says to return ENOMSG. */
983 #ifdef ENOMSG
984 			eval = ENOMSG;
985 #else
986 			/* Unfortunately, BSD doesn't define that code yet! */
987 			eval = EAGAIN;
988 #endif
989 			goto done;
990 		}
991 
992 		/*
993 		 * Wait for something to happen
994 		 */
995 
996 #ifdef MSG_DEBUG_OK
997 		kprintf("msgrcv:  goodnight\n");
998 #endif
999 		eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
1000 #ifdef MSG_DEBUG_OK
1001 		kprintf("msgrcv:  good morning (eval=%d)\n", eval);
1002 #endif
1003 
1004 		if (eval != 0) {
1005 #ifdef MSG_DEBUG_OK
1006 			kprintf("msgsnd:  interrupted system call\n");
1007 #endif
1008 			eval = EINTR;
1009 			goto done;
1010 		}
1011 
1012 		/*
1013 		 * Make sure that the msq queue still exists
1014 		 */
1015 
1016 		if (msqptr->msg_qbytes == 0 ||
1017 		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1018 #ifdef MSG_DEBUG_OK
1019 			kprintf("msqid deleted\n");
1020 #endif
1021 			eval = EIDRM;
1022 			goto done;
1023 		}
1024 	}
1025 
1026 	/*
1027 	 * Return the message to the user.
1028 	 *
1029 	 * First, do the bookkeeping (before we risk being interrupted).
1030 	 */
1031 
1032 	msqptr->msg_cbytes -= msghdr->msg_ts;
1033 	msqptr->msg_qnum--;
1034 	msqptr->msg_lrpid = td->td_proc->p_pid;
1035 	msqptr->msg_rtime = time_second;
1036 
1037 	/*
1038 	 * Make msgsz the actual amount that we'll be returning.
1039 	 * Note that this effectively truncates the message if it is too long
1040 	 * (since msgsz is never increased).
1041 	 */
1042 
1043 #ifdef MSG_DEBUG_OK
1044 	kprintf("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1045 	    msghdr->msg_ts);
1046 #endif
1047 	if (msgsz > msghdr->msg_ts)
1048 		msgsz = msghdr->msg_ts;
1049 
1050 	/*
1051 	 * Return the type to the user.
1052 	 */
1053 
1054 	eval = copyout((caddr_t)&(msghdr->msg_type), user_msgp,
1055 	    sizeof(msghdr->msg_type));
1056 	if (eval != 0) {
1057 #ifdef MSG_DEBUG_OK
1058 		kprintf("error (%d) copying out message type\n", eval);
1059 #endif
1060 		msg_freehdr(msghdr);
1061 		wakeup((caddr_t)msqptr);
1062 		goto done;
1063 	}
1064 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1065 
1066 	/*
1067 	 * Return the segments to the user
1068 	 */
1069 
1070 	next = msghdr->msg_spot;
1071 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1072 		size_t tlen;
1073 
1074 		if (msgsz - len > msginfo.msgssz)
1075 			tlen = msginfo.msgssz;
1076 		else
1077 			tlen = msgsz - len;
1078 		if (next <= -1)
1079 			panic("next too low #3");
1080 		if (next >= msginfo.msgseg)
1081 			panic("next out of range #3");
1082 		eval = copyout((caddr_t)&msgpool[next * msginfo.msgssz],
1083 		    user_msgp, tlen);
1084 		if (eval != 0) {
1085 #ifdef MSG_DEBUG_OK
1086 			kprintf("error (%d) copying out message segment\n",
1087 			    eval);
1088 #endif
1089 			msg_freehdr(msghdr);
1090 			wakeup((caddr_t)msqptr);
1091 			goto done;
1092 		}
1093 		user_msgp = (char *)user_msgp + tlen;
1094 		next = msgmaps[next].next;
1095 	}
1096 
1097 	/*
1098 	 * Done, return the actual number of bytes copied out.
1099 	 */
1100 
1101 	msg_freehdr(msghdr);
1102 	wakeup((caddr_t)msqptr);
1103 	eval = 0;
1104 done:
1105 	rel_mplock();
1106 	if (eval == 0)
1107 		uap->sysmsg_result = msgsz;
1108 	return(eval);
1109 }
1110 
1111 static int
1112 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1113 {
1114 
1115 	return (SYSCTL_OUT(req, msqids,
1116 	    sizeof(struct msqid_ds) * msginfo.msgmni));
1117 }
1118 
1119 TUNABLE_INT("kern.ipc.msgseg", &msginfo.msgseg);
1120 TUNABLE_INT("kern.ipc.msgssz", &msginfo.msgssz);
1121 TUNABLE_INT("kern.ipc.msgmni", &msginfo.msgmni);
1122 
1123 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
1124     "Max characters in message");
1125 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RD, &msginfo.msgmni, 0,
1126     "Max message queue identifiers");
1127 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0,
1128     "Max characters in message queue");
1129 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0,
1130     "Max SVID messages in system");
1131 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RD, &msginfo.msgssz, 0,
1132     "Power-of-two size of a message segment");
1133 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RD, &msginfo.msgseg, 0,
1134     "Number of message segments");
1135 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1136     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1137