xref: /dragonfly/sys/kern/sysv_msg.c (revision 59b0b316)
1 /* $FreeBSD: src/sys/kern/sysv_msg.c,v 1.23.2.5 2002/12/31 08:54:53 maxim Exp $ */
2 
3 /*
4  * Implementation of SVID messages
5  *
6  * Author:  Daniel Boulet
7  *
8  * Copyright 1993 Daniel Boulet and RTMX Inc.
9  *
10  * This system call was implemented by Daniel Boulet under contract from RTMX.
11  *
12  * Redistribution and use in source forms, with and without modification,
13  * are permitted provided that this entire comment appears intact.
14  *
15  * Redistribution in binary form may occur without any restrictions.
16  * Obviously, it would be nice if you gave credit where credit is due
17  * but requiring it would be too onerous.
18  *
19  * This software is provided ``AS IS'' without any warranties of any kind.
20  */
21 
22 #include "opt_sysvipc.h"
23 
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/sysproto.h>
27 #include <sys/kernel.h>
28 #include <sys/proc.h>
29 #include <sys/priv.h>
30 #include <sys/msg.h>
31 #include <sys/sysent.h>
32 #include <sys/sysctl.h>
33 #include <sys/malloc.h>
34 #include <sys/jail.h>
35 
36 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
37 
38 static void msginit (void *);
39 
40 #define MSG_DEBUG
41 #undef MSG_DEBUG_OK
42 
43 static void msg_freehdr (struct msg *msghdr);
44 
45 struct msg {
46 	struct	msg *msg_next;	/* next msg in the chain */
47 	long	msg_type;	/* type of this message */
48     				/* >0 -> type of this message */
49     				/* 0 -> free header */
50 	u_short	msg_ts;		/* size of this message */
51 	short	msg_spot;	/* location of start of msg in buffer */
52 };
53 
54 
55 #ifndef MSGSSZ
56 #define MSGSSZ	8		/* Each segment must be 2^N long */
57 #endif
58 #ifndef MSGSEG
59 #define MSGSEG	2048		/* must be less than 32767 */
60 #endif
61 #define MSGMAX	(MSGSSZ*MSGSEG)
62 #ifndef MSGMNB
63 #define MSGMNB	2048		/* max # of bytes in a queue */
64 #endif
65 #ifndef MSGMNI
66 #define MSGMNI	40
67 #endif
68 #ifndef MSGTQL
69 #define MSGTQL	40
70 #endif
71 
72 /*
73  * Based on the configuration parameters described in an SVR2 (yes, two)
74  * config(1m) man page.
75  *
76  * Each message is broken up and stored in segments that are msgssz bytes
77  * long.  For efficiency reasons, this should be a power of two.  Also,
78  * it doesn't make sense if it is less than 8 or greater than about 256.
79  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
80  * two between 8 and 1024 inclusive (and panic's if it isn't).
81  */
82 struct msginfo msginfo = {
83                 MSGMAX,         /* max chars in a message */
84                 MSGMNI,         /* # of message queue identifiers */
85                 MSGMNB,         /* max chars in a queue */
86                 MSGTQL,         /* max messages in system */
87                 MSGSSZ,         /* size of a message segment */
88                 		/* (must be small power of 2 greater than 4) */
89                 MSGSEG          /* number of message segments */
90 };
91 
92 /*
93  * macros to convert between msqid_ds's and msqid's.
94  * (specific to this implementation)
95  */
96 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
97 #define MSQID_IX(id)	((id) & 0xffff)
98 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
99 
100 /*
101  * The rest of this file is specific to this particular implementation.
102  */
103 
104 struct msgmap {
105 	short	next;		/* next segment in buffer */
106     				/* -1 -> available */
107     				/* 0..(MSGSEG-1) -> index of next segment */
108 };
109 
110 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
111 
112 static int nfree_msgmaps;	/* # of free map entries */
113 static short free_msgmaps;	/* head of linked list of free map entries */
114 static struct msg *free_msghdrs;/* list of free msg headers */
115 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
116 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
117 static struct msg *msghdrs;	/* MSGTQL msg headers */
118 static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
119 static struct lwkt_token msg_token = LWKT_TOKEN_INITIALIZER(msg_token);
120 
121 static void
122 msginit(void *dummy)
123 {
124 	int i;
125 
126 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
127 	msgpool = kmalloc(msginfo.msgmax, M_MSG, M_WAITOK);
128 	msgmaps = kmalloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
129 	msghdrs = kmalloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
130 	msqids = kmalloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
131 
132 	/*
133 	 * msginfo.msgssz should be a power of two for efficiency reasons.
134 	 * It is also pretty silly if msginfo.msgssz is less than 8
135 	 * or greater than about 256 so ...
136 	 */
137 
138 	i = 8;
139 	while (i < 1024 && i != msginfo.msgssz)
140 		i <<= 1;
141     	if (i != msginfo.msgssz) {
142 		kprintf("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
143 		    msginfo.msgssz);
144 		panic("msginfo.msgssz not a small power of 2");
145 	}
146 
147 	if (msginfo.msgseg > 32767) {
148 		kprintf("msginfo.msgseg=%d\n", msginfo.msgseg);
149 		panic("msginfo.msgseg > 32767");
150 	}
151 
152 	for (i = 0; i < msginfo.msgseg; i++) {
153 		if (i > 0)
154 			msgmaps[i-1].next = i;
155 		msgmaps[i].next = -1;	/* implies entry is available */
156 	}
157 	free_msgmaps = 0;
158 	nfree_msgmaps = msginfo.msgseg;
159 
160 	for (i = 0; i < msginfo.msgtql; i++) {
161 		msghdrs[i].msg_type = 0;
162 		if (i > 0)
163 			msghdrs[i-1].msg_next = &msghdrs[i];
164 		msghdrs[i].msg_next = NULL;
165     	}
166 	free_msghdrs = &msghdrs[0];
167 
168 	for (i = 0; i < msginfo.msgmni; i++) {
169 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
170 		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
171 		msqids[i].msg_perm.mode = 0;
172 	}
173 }
174 SYSINIT(sysv_msg, SI_SUB_SYSV_MSG, SI_ORDER_FIRST, msginit, NULL);
175 
176 static void
177 msg_freehdr(struct msg *msghdr)
178 {
179 	while (msghdr->msg_ts > 0) {
180 		short next;
181 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
182 			panic("msghdr->msg_spot out of range");
183 		next = msgmaps[msghdr->msg_spot].next;
184 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
185 		free_msgmaps = msghdr->msg_spot;
186 		nfree_msgmaps++;
187 		msghdr->msg_spot = next;
188 		if (msghdr->msg_ts >= msginfo.msgssz)
189 			msghdr->msg_ts -= msginfo.msgssz;
190 		else
191 			msghdr->msg_ts = 0;
192 	}
193 	if (msghdr->msg_spot != -1)
194 		panic("msghdr->msg_spot != -1");
195 	msghdr->msg_next = free_msghdrs;
196 	free_msghdrs = msghdr;
197 }
198 
199 /*
200  * MPALMOSTSAFE
201  */
202 int
203 sys_msgctl(struct msgctl_args *uap)
204 {
205 	struct thread *td = curthread;
206 	struct proc *p = td->td_proc;
207 	int msqid = uap->msqid;
208 	int cmd = uap->cmd;
209 	struct msqid_ds *user_msqptr = uap->buf;
210 	int rval, eval;
211 	struct msqid_ds msqbuf;
212 	struct msqid_ds *msqptr;
213 
214 #ifdef MSG_DEBUG_OK
215 	kprintf("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr);
216 #endif
217 
218 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
219 		return (ENOSYS);
220 
221 	lwkt_gettoken(&msg_token);
222 	msqid = IPCID_TO_IX(msqid);
223 
224 	if (msqid < 0 || msqid >= msginfo.msgmni) {
225 #ifdef MSG_DEBUG_OK
226 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
227 		    msginfo.msgmni);
228 #endif
229 		eval = EINVAL;
230 		goto done;
231 	}
232 
233 	msqptr = &msqids[msqid];
234 
235 	if (msqptr->msg_qbytes == 0) {
236 #ifdef MSG_DEBUG_OK
237 		kprintf("no such msqid\n");
238 #endif
239 		eval = EINVAL;
240 		goto done;
241 	}
242 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
243 #ifdef MSG_DEBUG_OK
244 		kprintf("wrong sequence number\n");
245 #endif
246 		eval = EINVAL;
247 		goto done;
248 	}
249 
250 	rval = 0;
251 
252 	switch (cmd) {
253 	case IPC_RMID:
254 	{
255 		struct msg *msghdr;
256 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)) != 0)
257 			break;
258 		/* Free the message headers */
259 		msghdr = msqptr->msg_first;
260 		while (msghdr != NULL) {
261 			struct msg *msghdr_tmp;
262 
263 			/* Free the segments of each message */
264 			msqptr->msg_cbytes -= msghdr->msg_ts;
265 			msqptr->msg_qnum--;
266 			msghdr_tmp = msghdr;
267 			msghdr = msghdr->msg_next;
268 			msg_freehdr(msghdr_tmp);
269 		}
270 
271 		if (msqptr->msg_cbytes != 0)
272 			panic("msg_cbytes is screwed up");
273 		if (msqptr->msg_qnum != 0)
274 			panic("msg_qnum is screwed up");
275 
276 		msqptr->msg_qbytes = 0;	/* Mark it as free */
277 
278 		wakeup((caddr_t)msqptr);
279 	}
280 
281 		break;
282 
283 	case IPC_SET:
284 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)) != 0)
285 			break;
286 		if ((eval = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
287 			break;
288 		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
289 			eval = priv_check(td, PRIV_ROOT);
290 			if (eval)
291 				break;
292 		}
293 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
294 #ifdef MSG_DEBUG_OK
295 			kprintf("can't increase msg_qbytes beyond %d (truncating)\n",
296 			    msginfo.msgmnb);
297 #endif
298 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
299 		}
300 		if (msqbuf.msg_qbytes == 0) {
301 #ifdef MSG_DEBUG_OK
302 			kprintf("can't reduce msg_qbytes to 0\n");
303 #endif
304 			eval = EINVAL;		/* non-standard errno! */
305 			break;
306 		}
307 		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
308 		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
309 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
310 					(msqbuf.msg_perm.mode & 0777);
311 		msqptr->msg_qbytes = msqbuf.msg_qbytes;
312 		msqptr->msg_ctime = time_second;
313 		break;
314 
315 	case IPC_STAT:
316 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_R))) {
317 #ifdef MSG_DEBUG_OK
318 			kprintf("requester doesn't have read access\n");
319 #endif
320 			eval = EINVAL;
321 			break;
322 		}
323 		eval = copyout(msqptr, user_msqptr, sizeof(struct msqid_ds));
324 		break;
325 
326 	default:
327 #ifdef MSG_DEBUG_OK
328 		kprintf("invalid command %d\n", cmd);
329 #endif
330 		eval = EINVAL;
331 		break;
332 	}
333 done:
334 	lwkt_reltoken(&msg_token);
335 	if (eval == 0)
336 		uap->sysmsg_result = rval;
337 	return(eval);
338 }
339 
340 /*
341  * MPALMOSTSAFE
342  */
343 int
344 sys_msgget(struct msgget_args *uap)
345 {
346 	struct thread *td = curthread;
347 	int msqid, eval;
348 	int key = uap->key;
349 	int msgflg = uap->msgflg;
350 	struct ucred *cred = td->td_ucred;
351 	struct msqid_ds *msqptr = NULL;
352 
353 #ifdef MSG_DEBUG_OK
354 	kprintf("msgget(0x%x, 0%o)\n", key, msgflg);
355 #endif
356 	if (!jail_sysvipc_allowed && cred->cr_prison != NULL)
357 		return (ENOSYS);
358 
359 	eval = 0;
360 	lwkt_gettoken(&msg_token);
361 
362 	if (key != IPC_PRIVATE) {
363 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
364 			msqptr = &msqids[msqid];
365 			if (msqptr->msg_qbytes != 0 &&
366 			    msqptr->msg_perm.key == key)
367 				break;
368 		}
369 		if (msqid < msginfo.msgmni) {
370 #ifdef MSG_DEBUG_OK
371 			kprintf("found public key\n");
372 #endif
373 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
374 #ifdef MSG_DEBUG_OK
375 				kprintf("not exclusive\n");
376 #endif
377 				eval = EEXIST;
378 				goto done;
379 			}
380 			if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, msgflg & 0700 ))) {
381 #ifdef MSG_DEBUG_OK
382 				kprintf("requester doesn't have 0%o access\n",
383 				    msgflg & 0700);
384 #endif
385 				goto done;
386 			}
387 			goto done;
388 		}
389 	}
390 
391 #ifdef MSG_DEBUG_OK
392 	kprintf("need to allocate the msqid_ds\n");
393 #endif
394 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
395 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
396 			/*
397 			 * Look for an unallocated and unlocked msqid_ds.
398 			 * msqid_ds's can be locked by msgsnd or msgrcv while
399 			 * they are copying the message in/out.  We can't
400 			 * re-use the entry until they release it.
401 			 */
402 			msqptr = &msqids[msqid];
403 			if (msqptr->msg_qbytes == 0 &&
404 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
405 				break;
406 		}
407 		if (msqid == msginfo.msgmni) {
408 #ifdef MSG_DEBUG_OK
409 			kprintf("no more msqid_ds's available\n");
410 #endif
411 			eval = ENOSPC;
412 			goto done;
413 		}
414 #ifdef MSG_DEBUG_OK
415 		kprintf("msqid %d is available\n", msqid);
416 #endif
417 		msqptr->msg_perm.key = key;
418 		msqptr->msg_perm.cuid = cred->cr_uid;
419 		msqptr->msg_perm.uid = cred->cr_uid;
420 		msqptr->msg_perm.cgid = cred->cr_gid;
421 		msqptr->msg_perm.gid = cred->cr_gid;
422 		msqptr->msg_perm.mode = (msgflg & 0777);
423 		/* Make sure that the returned msqid is unique */
424 		msqptr->msg_perm.seq = (msqptr->msg_perm.seq + 1) & 0x7fff;
425 		msqptr->msg_first = NULL;
426 		msqptr->msg_last = NULL;
427 		msqptr->msg_cbytes = 0;
428 		msqptr->msg_qnum = 0;
429 		msqptr->msg_qbytes = msginfo.msgmnb;
430 		msqptr->msg_lspid = 0;
431 		msqptr->msg_lrpid = 0;
432 		msqptr->msg_stime = 0;
433 		msqptr->msg_rtime = 0;
434 		msqptr->msg_ctime = time_second;
435 	} else {
436 #ifdef MSG_DEBUG_OK
437 		kprintf("didn't find it and wasn't asked to create it\n");
438 #endif
439 		eval = ENOENT;
440 	}
441 
442 done:
443 	lwkt_reltoken(&msg_token);
444 	/* Construct the unique msqid */
445 	if (eval == 0)
446 		uap->sysmsg_result = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
447 	return(eval);
448 }
449 
450 /*
451  * MPALMOSTSAFE
452  */
453 int
454 sys_msgsnd(struct msgsnd_args *uap)
455 {
456 	struct thread *td = curthread;
457 	int msqid = uap->msqid;
458 	const void *user_msgp = uap->msgp;
459 	size_t msgsz = uap->msgsz;
460 	int msgflg = uap->msgflg;
461 	int segs_needed, eval;
462 	struct msqid_ds *msqptr;
463 	struct msg *msghdr;
464 	short next;
465 
466 #ifdef MSG_DEBUG_OK
467 	kprintf("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
468 	    msgflg);
469 #endif
470 
471 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
472 		return (ENOSYS);
473 
474 	lwkt_gettoken(&msg_token);
475 	msqid = IPCID_TO_IX(msqid);
476 
477 	if (msqid < 0 || msqid >= msginfo.msgmni) {
478 #ifdef MSG_DEBUG_OK
479 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
480 		    msginfo.msgmni);
481 #endif
482 		eval = EINVAL;
483 		goto done;
484 	}
485 
486 	msqptr = &msqids[msqid];
487 	if (msqptr->msg_qbytes == 0) {
488 #ifdef MSG_DEBUG_OK
489 		kprintf("no such message queue id\n");
490 #endif
491 		eval = EINVAL;
492 		goto done;
493 	}
494 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
495 #ifdef MSG_DEBUG_OK
496 		kprintf("wrong sequence number\n");
497 #endif
498 		eval = EINVAL;
499 		goto done;
500 	}
501 
502 	if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, IPC_W))) {
503 #ifdef MSG_DEBUG_OK
504 		kprintf("requester doesn't have write access\n");
505 #endif
506 		eval = EINVAL;
507 		goto done;
508 	}
509 
510 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
511 #ifdef MSG_DEBUG_OK
512 	kprintf("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
513 	    segs_needed);
514 #endif
515 	for (;;) {
516 		int need_more_resources = 0;
517 
518 		/*
519 		 * check msgsz
520 		 * (inside this loop in case msg_qbytes changes while we sleep)
521 		 */
522 
523 		if (msgsz > msqptr->msg_qbytes) {
524 #ifdef MSG_DEBUG_OK
525 			kprintf("msgsz > msqptr->msg_qbytes\n");
526 #endif
527 			eval = EINVAL;
528 			goto done;
529 		}
530 
531 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
532 #ifdef MSG_DEBUG_OK
533 			kprintf("msqid is locked\n");
534 #endif
535 			need_more_resources = 1;
536 		}
537 		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
538 #ifdef MSG_DEBUG_OK
539 			kprintf("msgsz + msg_cbytes > msg_qbytes\n");
540 #endif
541 			need_more_resources = 1;
542 		}
543 		if (segs_needed > nfree_msgmaps) {
544 #ifdef MSG_DEBUG_OK
545 			kprintf("segs_needed > nfree_msgmaps\n");
546 #endif
547 			need_more_resources = 1;
548 		}
549 		if (free_msghdrs == NULL) {
550 #ifdef MSG_DEBUG_OK
551 			kprintf("no more msghdrs\n");
552 #endif
553 			need_more_resources = 1;
554 		}
555 
556 		if (need_more_resources) {
557 			int we_own_it;
558 
559 			if ((msgflg & IPC_NOWAIT) != 0) {
560 #ifdef MSG_DEBUG_OK
561 				kprintf("need more resources but caller doesn't want to wait\n");
562 #endif
563 				eval = EAGAIN;
564 				goto done;
565 			}
566 
567 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
568 #ifdef MSG_DEBUG_OK
569 				kprintf("we don't own the msqid_ds\n");
570 #endif
571 				we_own_it = 0;
572 			} else {
573 				/* Force later arrivals to wait for our
574 				   request */
575 #ifdef MSG_DEBUG_OK
576 				kprintf("we own the msqid_ds\n");
577 #endif
578 				msqptr->msg_perm.mode |= MSG_LOCKED;
579 				we_own_it = 1;
580 			}
581 #ifdef MSG_DEBUG_OK
582 			kprintf("goodnight\n");
583 #endif
584 			eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
585 #ifdef MSG_DEBUG_OK
586 			kprintf("good morning, eval=%d\n", eval);
587 #endif
588 			if (we_own_it)
589 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
590 			if (eval != 0) {
591 #ifdef MSG_DEBUG_OK
592 				kprintf("msgsnd:  interrupted system call\n");
593 #endif
594 				eval = EINTR;
595 				goto done;
596 			}
597 
598 			/*
599 			 * Make sure that the msq queue still exists
600 			 */
601 
602 			if (msqptr->msg_qbytes == 0) {
603 #ifdef MSG_DEBUG_OK
604 				kprintf("msqid deleted\n");
605 #endif
606 				eval = EIDRM;
607 				goto done;
608 			}
609 
610 		} else {
611 #ifdef MSG_DEBUG_OK
612 			kprintf("got all the resources that we need\n");
613 #endif
614 			break;
615 		}
616 	}
617 
618 	/*
619 	 * We have the resources that we need.
620 	 * Make sure!
621 	 */
622 
623 	if (msqptr->msg_perm.mode & MSG_LOCKED)
624 		panic("msg_perm.mode & MSG_LOCKED");
625 	if (segs_needed > nfree_msgmaps)
626 		panic("segs_needed > nfree_msgmaps");
627 	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
628 		panic("msgsz + msg_cbytes > msg_qbytes");
629 	if (free_msghdrs == NULL)
630 		panic("no more msghdrs");
631 
632 	/*
633 	 * Re-lock the msqid_ds in case we page-fault when copying in the
634 	 * message
635 	 */
636 
637 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
638 		panic("msqid_ds is already locked");
639 	msqptr->msg_perm.mode |= MSG_LOCKED;
640 
641 	/*
642 	 * Allocate a message header
643 	 */
644 
645 	msghdr = free_msghdrs;
646 	free_msghdrs = msghdr->msg_next;
647 	msghdr->msg_spot = -1;
648 	msghdr->msg_ts = msgsz;
649 
650 	/*
651 	 * Allocate space for the message
652 	 */
653 
654 	while (segs_needed > 0) {
655 		if (nfree_msgmaps <= 0)
656 			panic("not enough msgmaps");
657 		if (free_msgmaps == -1)
658 			panic("nil free_msgmaps");
659 		next = free_msgmaps;
660 		if (next <= -1)
661 			panic("next too low #1");
662 		if (next >= msginfo.msgseg)
663 			panic("next out of range #1");
664 #ifdef MSG_DEBUG_OK
665 		kprintf("allocating segment %d to message\n", next);
666 #endif
667 		free_msgmaps = msgmaps[next].next;
668 		nfree_msgmaps--;
669 		msgmaps[next].next = msghdr->msg_spot;
670 		msghdr->msg_spot = next;
671 		segs_needed--;
672 	}
673 
674 	/*
675 	 * Copy in the message type
676 	 */
677 
678 	if ((eval = copyin(user_msgp, &msghdr->msg_type,
679 	    sizeof(msghdr->msg_type))) != 0) {
680 #ifdef MSG_DEBUG_OK
681 		kprintf("error %d copying the message type\n", eval);
682 #endif
683 		msg_freehdr(msghdr);
684 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
685 		wakeup((caddr_t)msqptr);
686 		goto done;
687 	}
688 	user_msgp = (const char *)user_msgp + sizeof(msghdr->msg_type);
689 
690 	/*
691 	 * Validate the message type
692 	 */
693 
694 	if (msghdr->msg_type < 1) {
695 		msg_freehdr(msghdr);
696 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
697 		wakeup((caddr_t)msqptr);
698 #ifdef MSG_DEBUG_OK
699 		kprintf("mtype (%d) < 1\n", msghdr->msg_type);
700 #endif
701 		eval = EINVAL;
702 		goto done;
703 	}
704 
705 	/*
706 	 * Copy in the message body
707 	 */
708 
709 	next = msghdr->msg_spot;
710 	while (msgsz > 0) {
711 		size_t tlen;
712 		if (msgsz > msginfo.msgssz)
713 			tlen = msginfo.msgssz;
714 		else
715 			tlen = msgsz;
716 		if (next <= -1)
717 			panic("next too low #2");
718 		if (next >= msginfo.msgseg)
719 			panic("next out of range #2");
720 		if ((eval = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
721 		    tlen)) != 0) {
722 #ifdef MSG_DEBUG_OK
723 			kprintf("error %d copying in message segment\n", eval);
724 #endif
725 			msg_freehdr(msghdr);
726 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
727 			wakeup((caddr_t)msqptr);
728 			goto done;
729 		}
730 		msgsz -= tlen;
731 		user_msgp = (const char *)user_msgp + tlen;
732 		next = msgmaps[next].next;
733 	}
734 	if (next != -1)
735 		panic("didn't use all the msg segments");
736 
737 	/*
738 	 * We've got the message.  Unlock the msqid_ds.
739 	 */
740 
741 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
742 
743 	/*
744 	 * Make sure that the msqid_ds is still allocated.
745 	 */
746 
747 	if (msqptr->msg_qbytes == 0) {
748 		msg_freehdr(msghdr);
749 		wakeup((caddr_t)msqptr);
750 		eval = EIDRM;
751 		goto done;
752 	}
753 
754 	/*
755 	 * Put the message into the queue
756 	 */
757 
758 	if (msqptr->msg_first == NULL) {
759 		msqptr->msg_first = msghdr;
760 		msqptr->msg_last = msghdr;
761 	} else {
762 		msqptr->msg_last->msg_next = msghdr;
763 		msqptr->msg_last = msghdr;
764 	}
765 	msqptr->msg_last->msg_next = NULL;
766 
767 	msqptr->msg_cbytes += msghdr->msg_ts;
768 	msqptr->msg_qnum++;
769 	msqptr->msg_lspid = td->td_proc->p_pid;
770 	msqptr->msg_stime = time_second;
771 
772 	wakeup((caddr_t)msqptr);
773 	eval = 0;
774 done:
775 	lwkt_reltoken(&msg_token);
776 	if (eval == 0)
777 		uap->sysmsg_result = 0;
778 	return (eval);
779 }
780 
781 /*
782  * MPALMOSTSAFE
783  */
784 int
785 sys_msgrcv(struct msgrcv_args *uap)
786 {
787 	struct thread *td = curthread;
788 	int msqid = uap->msqid;
789 	void *user_msgp = uap->msgp;
790 	size_t msgsz = uap->msgsz;
791 	long msgtyp = uap->msgtyp;
792 	int msgflg = uap->msgflg;
793 	size_t len;
794 	struct msqid_ds *msqptr;
795 	struct msg *msghdr;
796 	int eval;
797 	short next;
798 
799 #ifdef MSG_DEBUG_OK
800 	kprintf("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
801 	    msgsz, msgtyp, msgflg);
802 #endif
803 
804 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
805 		return (ENOSYS);
806 
807 	lwkt_gettoken(&msg_token);
808 	msqid = IPCID_TO_IX(msqid);
809 
810 	if (msqid < 0 || msqid >= msginfo.msgmni) {
811 #ifdef MSG_DEBUG_OK
812 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
813 		    msginfo.msgmni);
814 #endif
815 		eval = EINVAL;
816 		goto done;
817 	}
818 
819 	msqptr = &msqids[msqid];
820 	if (msqptr->msg_qbytes == 0) {
821 #ifdef MSG_DEBUG_OK
822 		kprintf("no such message queue id\n");
823 #endif
824 		eval = EINVAL;
825 		goto done;
826 	}
827 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
828 #ifdef MSG_DEBUG_OK
829 		kprintf("wrong sequence number\n");
830 #endif
831 		eval = EINVAL;
832 		goto done;
833 	}
834 
835 	if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, IPC_R))) {
836 #ifdef MSG_DEBUG_OK
837 		kprintf("requester doesn't have read access\n");
838 #endif
839 		goto done;
840 	}
841 
842 	msghdr = NULL;
843 	while (msghdr == NULL) {
844 		if (msgtyp == 0) {
845 			msghdr = msqptr->msg_first;
846 			if (msghdr != NULL) {
847 				if (msgsz < msghdr->msg_ts &&
848 				    (msgflg & MSG_NOERROR) == 0) {
849 #ifdef MSG_DEBUG_OK
850 					kprintf("first message on the queue is too big (want %d, got %d)\n",
851 					    msgsz, msghdr->msg_ts);
852 #endif
853 					eval = E2BIG;
854 					goto done;
855 				}
856 				if (msqptr->msg_first == msqptr->msg_last) {
857 					msqptr->msg_first = NULL;
858 					msqptr->msg_last = NULL;
859 				} else {
860 					msqptr->msg_first = msghdr->msg_next;
861 					if (msqptr->msg_first == NULL)
862 						panic("msg_first/last screwed up #1");
863 				}
864 			}
865 		} else {
866 			struct msg *previous;
867 			struct msg **prev;
868 
869 			previous = NULL;
870 			prev = &(msqptr->msg_first);
871 			while ((msghdr = *prev) != NULL) {
872 				/*
873 				 * Is this message's type an exact match or is
874 				 * this message's type less than or equal to
875 				 * the absolute value of a negative msgtyp?
876 				 * Note that the second half of this test can
877 				 * NEVER be true if msgtyp is positive since
878 				 * msg_type is always positive!
879 				 */
880 
881 				if (msgtyp == msghdr->msg_type ||
882 				    msghdr->msg_type <= -msgtyp) {
883 #ifdef MSG_DEBUG_OK
884 					kprintf("found message type %d, requested %d\n",
885 					    msghdr->msg_type, msgtyp);
886 #endif
887 					if (msgsz < msghdr->msg_ts &&
888 					    (msgflg & MSG_NOERROR) == 0) {
889 #ifdef MSG_DEBUG_OK
890 						kprintf("requested message on the queue is too big (want %d, got %d)\n",
891 						    msgsz, msghdr->msg_ts);
892 #endif
893 						eval = E2BIG;
894 						goto done;
895 					}
896 					*prev = msghdr->msg_next;
897 					if (msghdr == msqptr->msg_last) {
898 						if (previous == NULL) {
899 							if (prev !=
900 							    &msqptr->msg_first)
901 								panic("msg_first/last screwed up #2");
902 							msqptr->msg_first =
903 							    NULL;
904 							msqptr->msg_last =
905 							    NULL;
906 						} else {
907 							if (prev ==
908 							    &msqptr->msg_first)
909 								panic("msg_first/last screwed up #3");
910 							msqptr->msg_last =
911 							    previous;
912 						}
913 					}
914 					break;
915 				}
916 				previous = msghdr;
917 				prev = &(msghdr->msg_next);
918 			}
919 		}
920 
921 		/*
922 		 * We've either extracted the msghdr for the appropriate
923 		 * message or there isn't one.
924 		 * If there is one then bail out of this loop.
925 		 */
926 
927 		if (msghdr != NULL)
928 			break;
929 
930 		/*
931 		 * Hmph!  No message found.  Does the user want to wait?
932 		 */
933 
934 		if ((msgflg & IPC_NOWAIT) != 0) {
935 #ifdef MSG_DEBUG_OK
936 			kprintf("no appropriate message found (msgtyp=%d)\n",
937 			    msgtyp);
938 #endif
939 			/* The SVID says to return ENOMSG. */
940 #ifdef ENOMSG
941 			eval = ENOMSG;
942 #else
943 			/* Unfortunately, BSD doesn't define that code yet! */
944 			eval = EAGAIN;
945 #endif
946 			goto done;
947 		}
948 
949 		/*
950 		 * Wait for something to happen
951 		 */
952 
953 #ifdef MSG_DEBUG_OK
954 		kprintf("msgrcv:  goodnight\n");
955 #endif
956 		eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
957 #ifdef MSG_DEBUG_OK
958 		kprintf("msgrcv:  good morning (eval=%d)\n", eval);
959 #endif
960 
961 		if (eval != 0) {
962 #ifdef MSG_DEBUG_OK
963 			kprintf("msgsnd:  interrupted system call\n");
964 #endif
965 			eval = EINTR;
966 			goto done;
967 		}
968 
969 		/*
970 		 * Make sure that the msq queue still exists
971 		 */
972 
973 		if (msqptr->msg_qbytes == 0 ||
974 		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
975 #ifdef MSG_DEBUG_OK
976 			kprintf("msqid deleted\n");
977 #endif
978 			eval = EIDRM;
979 			goto done;
980 		}
981 	}
982 
983 	/*
984 	 * Return the message to the user.
985 	 *
986 	 * First, do the bookkeeping (before we risk being interrupted).
987 	 */
988 
989 	msqptr->msg_cbytes -= msghdr->msg_ts;
990 	msqptr->msg_qnum--;
991 	msqptr->msg_lrpid = td->td_proc->p_pid;
992 	msqptr->msg_rtime = time_second;
993 
994 	/*
995 	 * Make msgsz the actual amount that we'll be returning.
996 	 * Note that this effectively truncates the message if it is too long
997 	 * (since msgsz is never increased).
998 	 */
999 
1000 #ifdef MSG_DEBUG_OK
1001 	kprintf("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1002 	    msghdr->msg_ts);
1003 #endif
1004 	if (msgsz > msghdr->msg_ts)
1005 		msgsz = msghdr->msg_ts;
1006 
1007 	/*
1008 	 * Return the type to the user.
1009 	 */
1010 
1011 	eval = copyout((caddr_t)&(msghdr->msg_type), user_msgp,
1012 	    sizeof(msghdr->msg_type));
1013 	if (eval != 0) {
1014 #ifdef MSG_DEBUG_OK
1015 		kprintf("error (%d) copying out message type\n", eval);
1016 #endif
1017 		msg_freehdr(msghdr);
1018 		wakeup((caddr_t)msqptr);
1019 		goto done;
1020 	}
1021 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1022 
1023 	/*
1024 	 * Return the segments to the user
1025 	 */
1026 
1027 	next = msghdr->msg_spot;
1028 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1029 		size_t tlen;
1030 
1031 		if (msgsz - len > msginfo.msgssz)
1032 			tlen = msginfo.msgssz;
1033 		else
1034 			tlen = msgsz - len;
1035 		if (next <= -1)
1036 			panic("next too low #3");
1037 		if (next >= msginfo.msgseg)
1038 			panic("next out of range #3");
1039 		eval = copyout((caddr_t)&msgpool[next * msginfo.msgssz],
1040 		    user_msgp, tlen);
1041 		if (eval != 0) {
1042 #ifdef MSG_DEBUG_OK
1043 			kprintf("error (%d) copying out message segment\n",
1044 			    eval);
1045 #endif
1046 			msg_freehdr(msghdr);
1047 			wakeup((caddr_t)msqptr);
1048 			goto done;
1049 		}
1050 		user_msgp = (char *)user_msgp + tlen;
1051 		next = msgmaps[next].next;
1052 	}
1053 
1054 	/*
1055 	 * Done, return the actual number of bytes copied out.
1056 	 */
1057 
1058 	msg_freehdr(msghdr);
1059 	wakeup((caddr_t)msqptr);
1060 	eval = 0;
1061 done:
1062 	lwkt_reltoken(&msg_token);
1063 	if (eval == 0)
1064 		uap->sysmsg_result = msgsz;
1065 	return(eval);
1066 }
1067 
1068 static int
1069 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1070 {
1071 	return (SYSCTL_OUT(req, msqids,
1072 		sizeof(struct msqid_ds) * msginfo.msgmni));
1073 }
1074 
1075 TUNABLE_INT("kern.ipc.msgseg", &msginfo.msgseg);
1076 TUNABLE_INT("kern.ipc.msgssz", &msginfo.msgssz);
1077 TUNABLE_INT("kern.ipc.msgmni", &msginfo.msgmni);
1078 
1079 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
1080     "Max characters in message");
1081 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RD, &msginfo.msgmni, 0,
1082     "Max message queue identifiers");
1083 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0,
1084     "Max characters in message queue");
1085 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0,
1086     "Max SVID messages in system");
1087 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RD, &msginfo.msgssz, 0,
1088     "Power-of-two size of a message segment");
1089 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RD, &msginfo.msgseg, 0,
1090     "Number of message segments");
1091 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1092     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1093