xref: /dragonfly/sys/kern/sysv_msg.c (revision f02303f9)
1 /* $FreeBSD: src/sys/kern/sysv_msg.c,v 1.23.2.5 2002/12/31 08:54:53 maxim Exp $ */
2 /* $DragonFly: src/sys/kern/sysv_msg.c,v 1.17 2006/12/23 23:47:54 swildner Exp $ */
3 
4 /*
5  * Implementation of SVID messages
6  *
7  * Author:  Daniel Boulet
8  *
9  * Copyright 1993 Daniel Boulet and RTMX Inc.
10  *
11  * This system call was implemented by Daniel Boulet under contract from RTMX.
12  *
13  * Redistribution and use in source forms, with and without modification,
14  * are permitted provided that this entire comment appears intact.
15  *
16  * Redistribution in binary form may occur without any restrictions.
17  * Obviously, it would be nice if you gave credit where credit is due
18  * but requiring it would be too onerous.
19  *
20  * This software is provided ``AS IS'' without any warranties of any kind.
21  */
22 
23 #include "opt_sysvipc.h"
24 
25 #include <sys/param.h>
26 #include <sys/systm.h>
27 #include <sys/sysproto.h>
28 #include <sys/kernel.h>
29 #include <sys/proc.h>
30 #include <sys/msg.h>
31 #include <sys/sysent.h>
32 #include <sys/sysctl.h>
33 #include <sys/malloc.h>
34 #include <sys/jail.h>
35 
36 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
37 
38 static void msginit (void *);
39 
40 #define MSG_DEBUG
41 #undef MSG_DEBUG_OK
42 
43 static void msg_freehdr (struct msg *msghdr);
44 
45 /* XXX casting to (sy_call_t *) is bogus, as usual. */
46 static sy_call_t *msgcalls[] = {
47 	(sy_call_t *)sys_msgctl, (sy_call_t *)sys_msgget,
48 	(sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv
49 };
50 
51 struct msg {
52 	struct	msg *msg_next;	/* next msg in the chain */
53 	long	msg_type;	/* type of this message */
54     				/* >0 -> type of this message */
55     				/* 0 -> free header */
56 	u_short	msg_ts;		/* size of this message */
57 	short	msg_spot;	/* location of start of msg in buffer */
58 };
59 
60 
61 #ifndef MSGSSZ
62 #define MSGSSZ	8		/* Each segment must be 2^N long */
63 #endif
64 #ifndef MSGSEG
65 #define MSGSEG	2048		/* must be less than 32767 */
66 #endif
67 #define MSGMAX	(MSGSSZ*MSGSEG)
68 #ifndef MSGMNB
69 #define MSGMNB	2048		/* max # of bytes in a queue */
70 #endif
71 #ifndef MSGMNI
72 #define MSGMNI	40
73 #endif
74 #ifndef MSGTQL
75 #define MSGTQL	40
76 #endif
77 
78 /*
79  * Based on the configuration parameters described in an SVR2 (yes, two)
80  * config(1m) man page.
81  *
82  * Each message is broken up and stored in segments that are msgssz bytes
83  * long.  For efficiency reasons, this should be a power of two.  Also,
84  * it doesn't make sense if it is less than 8 or greater than about 256.
85  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
86  * two between 8 and 1024 inclusive (and panic's if it isn't).
87  */
88 struct msginfo msginfo = {
89                 MSGMAX,         /* max chars in a message */
90                 MSGMNI,         /* # of message queue identifiers */
91                 MSGMNB,         /* max chars in a queue */
92                 MSGTQL,         /* max messages in system */
93                 MSGSSZ,         /* size of a message segment */
94                 		/* (must be small power of 2 greater than 4) */
95                 MSGSEG          /* number of message segments */
96 };
97 
98 /*
99  * macros to convert between msqid_ds's and msqid's.
100  * (specific to this implementation)
101  */
102 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
103 #define MSQID_IX(id)	((id) & 0xffff)
104 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
105 
106 /*
107  * The rest of this file is specific to this particular implementation.
108  */
109 
110 struct msgmap {
111 	short	next;		/* next segment in buffer */
112     				/* -1 -> available */
113     				/* 0..(MSGSEG-1) -> index of next segment */
114 };
115 
116 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
117 
118 static int nfree_msgmaps;	/* # of free map entries */
119 static short free_msgmaps;	/* head of linked list of free map entries */
120 static struct msg *free_msghdrs;/* list of free msg headers */
121 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
122 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
123 static struct msg *msghdrs;	/* MSGTQL msg headers */
124 static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
125 
126 static void
127 msginit(void *dummy)
128 {
129 	int i;
130 
131 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
132 	msgpool = kmalloc(msginfo.msgmax, M_MSG, M_WAITOK);
133 	if (msgpool == NULL)
134 		panic("msgpool is NULL");
135 	msgmaps = kmalloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
136 	if (msgmaps == NULL)
137 		panic("msgmaps is NULL");
138 	msghdrs = kmalloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
139 	if (msghdrs == NULL)
140 		panic("msghdrs is NULL");
141 	msqids = kmalloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
142 	if (msqids == NULL)
143 		panic("msqids is NULL");
144 
145 	/*
146 	 * msginfo.msgssz should be a power of two for efficiency reasons.
147 	 * It is also pretty silly if msginfo.msgssz is less than 8
148 	 * or greater than about 256 so ...
149 	 */
150 
151 	i = 8;
152 	while (i < 1024 && i != msginfo.msgssz)
153 		i <<= 1;
154     	if (i != msginfo.msgssz) {
155 		kprintf("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
156 		    msginfo.msgssz);
157 		panic("msginfo.msgssz not a small power of 2");
158 	}
159 
160 	if (msginfo.msgseg > 32767) {
161 		kprintf("msginfo.msgseg=%d\n", msginfo.msgseg);
162 		panic("msginfo.msgseg > 32767");
163 	}
164 
165 	if (msgmaps == NULL)
166 		panic("msgmaps is NULL");
167 
168 	for (i = 0; i < msginfo.msgseg; i++) {
169 		if (i > 0)
170 			msgmaps[i-1].next = i;
171 		msgmaps[i].next = -1;	/* implies entry is available */
172 	}
173 	free_msgmaps = 0;
174 	nfree_msgmaps = msginfo.msgseg;
175 
176 	if (msghdrs == NULL)
177 		panic("msghdrs is NULL");
178 
179 	for (i = 0; i < msginfo.msgtql; i++) {
180 		msghdrs[i].msg_type = 0;
181 		if (i > 0)
182 			msghdrs[i-1].msg_next = &msghdrs[i];
183 		msghdrs[i].msg_next = NULL;
184     	}
185 	free_msghdrs = &msghdrs[0];
186 
187 	if (msqids == NULL)
188 		panic("msqids is NULL");
189 
190 	for (i = 0; i < msginfo.msgmni; i++) {
191 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
192 		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
193 		msqids[i].msg_perm.mode = 0;
194 	}
195 }
196 SYSINIT(sysv_msg, SI_SUB_SYSV_MSG, SI_ORDER_FIRST, msginit, NULL)
197 
198 /*
199  * Entry point for all MSG calls
200  *
201  * msgsys_args(int which, int a2, ...) (VARARGS)
202  */
203 int
204 sys_msgsys(struct msgsys_args *uap)
205 {
206 	struct proc *p = curproc;
207 	unsigned int which = (unsigned int)uap->which;
208 
209 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
210 		return (ENOSYS);
211 
212 	if (which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
213 		return (EINVAL);
214 	bcopy(&uap->a2, &uap->which,
215 	    sizeof(struct msgsys_args) - offsetof(struct msgsys_args, a2));
216 	return ((*msgcalls[which])(uap));
217 }
218 
219 static void
220 msg_freehdr(struct msg *msghdr)
221 {
222 	while (msghdr->msg_ts > 0) {
223 		short next;
224 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
225 			panic("msghdr->msg_spot out of range");
226 		next = msgmaps[msghdr->msg_spot].next;
227 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
228 		free_msgmaps = msghdr->msg_spot;
229 		nfree_msgmaps++;
230 		msghdr->msg_spot = next;
231 		if (msghdr->msg_ts >= msginfo.msgssz)
232 			msghdr->msg_ts -= msginfo.msgssz;
233 		else
234 			msghdr->msg_ts = 0;
235 	}
236 	if (msghdr->msg_spot != -1)
237 		panic("msghdr->msg_spot != -1");
238 	msghdr->msg_next = free_msghdrs;
239 	free_msghdrs = msghdr;
240 }
241 
242 int
243 sys_msgctl(struct msgctl_args *uap)
244 {
245 	struct thread *td = curthread;
246 	struct proc *p = td->td_proc;
247 	int msqid = uap->msqid;
248 	int cmd = uap->cmd;
249 	struct msqid_ds *user_msqptr = uap->buf;
250 	int rval, eval;
251 	struct msqid_ds msqbuf;
252 	struct msqid_ds *msqptr;
253 
254 #ifdef MSG_DEBUG_OK
255 	kprintf("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr);
256 #endif
257 
258 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
259 		return (ENOSYS);
260 
261 	msqid = IPCID_TO_IX(msqid);
262 
263 	if (msqid < 0 || msqid >= msginfo.msgmni) {
264 #ifdef MSG_DEBUG_OK
265 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
266 		    msginfo.msgmni);
267 #endif
268 		return(EINVAL);
269 	}
270 
271 	msqptr = &msqids[msqid];
272 
273 	if (msqptr->msg_qbytes == 0) {
274 #ifdef MSG_DEBUG_OK
275 		kprintf("no such msqid\n");
276 #endif
277 		return(EINVAL);
278 	}
279 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
280 #ifdef MSG_DEBUG_OK
281 		kprintf("wrong sequence number\n");
282 #endif
283 		return(EINVAL);
284 	}
285 
286 	eval = 0;
287 	rval = 0;
288 
289 	switch (cmd) {
290 
291 	case IPC_RMID:
292 	{
293 		struct msg *msghdr;
294 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)))
295 			return(eval);
296 		/* Free the message headers */
297 		msghdr = msqptr->msg_first;
298 		while (msghdr != NULL) {
299 			struct msg *msghdr_tmp;
300 
301 			/* Free the segments of each message */
302 			msqptr->msg_cbytes -= msghdr->msg_ts;
303 			msqptr->msg_qnum--;
304 			msghdr_tmp = msghdr;
305 			msghdr = msghdr->msg_next;
306 			msg_freehdr(msghdr_tmp);
307 		}
308 
309 		if (msqptr->msg_cbytes != 0)
310 			panic("msg_cbytes is screwed up");
311 		if (msqptr->msg_qnum != 0)
312 			panic("msg_qnum is screwed up");
313 
314 		msqptr->msg_qbytes = 0;	/* Mark it as free */
315 
316 		wakeup((caddr_t)msqptr);
317 	}
318 
319 		break;
320 
321 	case IPC_SET:
322 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)))
323 			return(eval);
324 		if ((eval = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
325 			return(eval);
326 		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
327 			eval = suser(td);
328 			if (eval)
329 				return(eval);
330 		}
331 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
332 #ifdef MSG_DEBUG_OK
333 			kprintf("can't increase msg_qbytes beyond %d (truncating)\n",
334 			    msginfo.msgmnb);
335 #endif
336 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
337 		}
338 		if (msqbuf.msg_qbytes == 0) {
339 #ifdef MSG_DEBUG_OK
340 			kprintf("can't reduce msg_qbytes to 0\n");
341 #endif
342 			return(EINVAL);		/* non-standard errno! */
343 		}
344 		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
345 		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
346 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
347 		    (msqbuf.msg_perm.mode & 0777);
348 		msqptr->msg_qbytes = msqbuf.msg_qbytes;
349 		msqptr->msg_ctime = time_second;
350 		break;
351 
352 	case IPC_STAT:
353 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_R))) {
354 #ifdef MSG_DEBUG_OK
355 			kprintf("requester doesn't have read access\n");
356 #endif
357 			return(eval);
358 		}
359 		eval = copyout((caddr_t)msqptr, user_msqptr,
360 		    sizeof(struct msqid_ds));
361 		break;
362 
363 	default:
364 #ifdef MSG_DEBUG_OK
365 		kprintf("invalid command %d\n", cmd);
366 #endif
367 		return(EINVAL);
368 	}
369 
370 	if (eval == 0)
371 		uap->sysmsg_result = rval;
372 	return(eval);
373 }
374 
375 int
376 sys_msgget(struct msgget_args *uap)
377 {
378 	struct proc *p = curproc;
379 	int msqid, eval;
380 	int key = uap->key;
381 	int msgflg = uap->msgflg;
382 	struct ucred *cred = p->p_ucred;
383 	struct msqid_ds *msqptr = NULL;
384 
385 #ifdef MSG_DEBUG_OK
386 	kprintf("msgget(0x%x, 0%o)\n", key, msgflg);
387 #endif
388 
389 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
390 		return (ENOSYS);
391 
392 	if (key != IPC_PRIVATE) {
393 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
394 			msqptr = &msqids[msqid];
395 			if (msqptr->msg_qbytes != 0 &&
396 			    msqptr->msg_perm.key == key)
397 				break;
398 		}
399 		if (msqid < msginfo.msgmni) {
400 #ifdef MSG_DEBUG_OK
401 			kprintf("found public key\n");
402 #endif
403 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
404 #ifdef MSG_DEBUG_OK
405 				kprintf("not exclusive\n");
406 #endif
407 				return(EEXIST);
408 			}
409 			if ((eval = ipcperm(p, &msqptr->msg_perm, msgflg & 0700 ))) {
410 #ifdef MSG_DEBUG_OK
411 				kprintf("requester doesn't have 0%o access\n",
412 				    msgflg & 0700);
413 #endif
414 				return(eval);
415 			}
416 			goto found;
417 		}
418 	}
419 
420 #ifdef MSG_DEBUG_OK
421 	kprintf("need to allocate the msqid_ds\n");
422 #endif
423 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
424 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
425 			/*
426 			 * Look for an unallocated and unlocked msqid_ds.
427 			 * msqid_ds's can be locked by msgsnd or msgrcv while
428 			 * they are copying the message in/out.  We can't
429 			 * re-use the entry until they release it.
430 			 */
431 			msqptr = &msqids[msqid];
432 			if (msqptr->msg_qbytes == 0 &&
433 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
434 				break;
435 		}
436 		if (msqid == msginfo.msgmni) {
437 #ifdef MSG_DEBUG_OK
438 			kprintf("no more msqid_ds's available\n");
439 #endif
440 			return(ENOSPC);
441 		}
442 #ifdef MSG_DEBUG_OK
443 		kprintf("msqid %d is available\n", msqid);
444 #endif
445 		msqptr->msg_perm.key = key;
446 		msqptr->msg_perm.cuid = cred->cr_uid;
447 		msqptr->msg_perm.uid = cred->cr_uid;
448 		msqptr->msg_perm.cgid = cred->cr_gid;
449 		msqptr->msg_perm.gid = cred->cr_gid;
450 		msqptr->msg_perm.mode = (msgflg & 0777);
451 		/* Make sure that the returned msqid is unique */
452 		msqptr->msg_perm.seq = (msqptr->msg_perm.seq + 1) & 0x7fff;
453 		msqptr->msg_first = NULL;
454 		msqptr->msg_last = NULL;
455 		msqptr->msg_cbytes = 0;
456 		msqptr->msg_qnum = 0;
457 		msqptr->msg_qbytes = msginfo.msgmnb;
458 		msqptr->msg_lspid = 0;
459 		msqptr->msg_lrpid = 0;
460 		msqptr->msg_stime = 0;
461 		msqptr->msg_rtime = 0;
462 		msqptr->msg_ctime = time_second;
463 	} else {
464 #ifdef MSG_DEBUG_OK
465 		kprintf("didn't find it and wasn't asked to create it\n");
466 #endif
467 		return(ENOENT);
468 	}
469 
470 found:
471 	/* Construct the unique msqid */
472 	uap->sysmsg_result = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
473 	return(0);
474 }
475 
476 int
477 sys_msgsnd(struct msgsnd_args *uap)
478 {
479 	struct proc *p = curproc;
480 	int msqid = uap->msqid;
481 	void *user_msgp = uap->msgp;
482 	size_t msgsz = uap->msgsz;
483 	int msgflg = uap->msgflg;
484 	int segs_needed, eval;
485 	struct msqid_ds *msqptr;
486 	struct msg *msghdr;
487 	short next;
488 
489 #ifdef MSG_DEBUG_OK
490 	kprintf("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
491 	    msgflg);
492 #endif
493 
494 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
495 		return (ENOSYS);
496 
497 	msqid = IPCID_TO_IX(msqid);
498 
499 	if (msqid < 0 || msqid >= msginfo.msgmni) {
500 #ifdef MSG_DEBUG_OK
501 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
502 		    msginfo.msgmni);
503 #endif
504 		return(EINVAL);
505 	}
506 
507 	msqptr = &msqids[msqid];
508 	if (msqptr->msg_qbytes == 0) {
509 #ifdef MSG_DEBUG_OK
510 		kprintf("no such message queue id\n");
511 #endif
512 		return(EINVAL);
513 	}
514 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
515 #ifdef MSG_DEBUG_OK
516 		kprintf("wrong sequence number\n");
517 #endif
518 		return(EINVAL);
519 	}
520 
521 	if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_W))) {
522 #ifdef MSG_DEBUG_OK
523 		kprintf("requester doesn't have write access\n");
524 #endif
525 		return(eval);
526 	}
527 
528 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
529 #ifdef MSG_DEBUG_OK
530 	kprintf("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
531 	    segs_needed);
532 #endif
533 	for (;;) {
534 		int need_more_resources = 0;
535 
536 		/*
537 		 * check msgsz
538 		 * (inside this loop in case msg_qbytes changes while we sleep)
539 		 */
540 
541 		if (msgsz > msqptr->msg_qbytes) {
542 #ifdef MSG_DEBUG_OK
543 			kprintf("msgsz > msqptr->msg_qbytes\n");
544 #endif
545 			return(EINVAL);
546 		}
547 
548 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
549 #ifdef MSG_DEBUG_OK
550 			kprintf("msqid is locked\n");
551 #endif
552 			need_more_resources = 1;
553 		}
554 		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
555 #ifdef MSG_DEBUG_OK
556 			kprintf("msgsz + msg_cbytes > msg_qbytes\n");
557 #endif
558 			need_more_resources = 1;
559 		}
560 		if (segs_needed > nfree_msgmaps) {
561 #ifdef MSG_DEBUG_OK
562 			kprintf("segs_needed > nfree_msgmaps\n");
563 #endif
564 			need_more_resources = 1;
565 		}
566 		if (free_msghdrs == NULL) {
567 #ifdef MSG_DEBUG_OK
568 			kprintf("no more msghdrs\n");
569 #endif
570 			need_more_resources = 1;
571 		}
572 
573 		if (need_more_resources) {
574 			int we_own_it;
575 
576 			if ((msgflg & IPC_NOWAIT) != 0) {
577 #ifdef MSG_DEBUG_OK
578 				kprintf("need more resources but caller doesn't want to wait\n");
579 #endif
580 				return(EAGAIN);
581 			}
582 
583 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
584 #ifdef MSG_DEBUG_OK
585 				kprintf("we don't own the msqid_ds\n");
586 #endif
587 				we_own_it = 0;
588 			} else {
589 				/* Force later arrivals to wait for our
590 				   request */
591 #ifdef MSG_DEBUG_OK
592 				kprintf("we own the msqid_ds\n");
593 #endif
594 				msqptr->msg_perm.mode |= MSG_LOCKED;
595 				we_own_it = 1;
596 			}
597 #ifdef MSG_DEBUG_OK
598 			kprintf("goodnight\n");
599 #endif
600 			eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
601 #ifdef MSG_DEBUG_OK
602 			kprintf("good morning, eval=%d\n", eval);
603 #endif
604 			if (we_own_it)
605 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
606 			if (eval != 0) {
607 #ifdef MSG_DEBUG_OK
608 				kprintf("msgsnd:  interrupted system call\n");
609 #endif
610 				return(EINTR);
611 			}
612 
613 			/*
614 			 * Make sure that the msq queue still exists
615 			 */
616 
617 			if (msqptr->msg_qbytes == 0) {
618 #ifdef MSG_DEBUG_OK
619 				kprintf("msqid deleted\n");
620 #endif
621 				return(EIDRM);
622 			}
623 
624 		} else {
625 #ifdef MSG_DEBUG_OK
626 			kprintf("got all the resources that we need\n");
627 #endif
628 			break;
629 		}
630 	}
631 
632 	/*
633 	 * We have the resources that we need.
634 	 * Make sure!
635 	 */
636 
637 	if (msqptr->msg_perm.mode & MSG_LOCKED)
638 		panic("msg_perm.mode & MSG_LOCKED");
639 	if (segs_needed > nfree_msgmaps)
640 		panic("segs_needed > nfree_msgmaps");
641 	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
642 		panic("msgsz + msg_cbytes > msg_qbytes");
643 	if (free_msghdrs == NULL)
644 		panic("no more msghdrs");
645 
646 	/*
647 	 * Re-lock the msqid_ds in case we page-fault when copying in the
648 	 * message
649 	 */
650 
651 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
652 		panic("msqid_ds is already locked");
653 	msqptr->msg_perm.mode |= MSG_LOCKED;
654 
655 	/*
656 	 * Allocate a message header
657 	 */
658 
659 	msghdr = free_msghdrs;
660 	free_msghdrs = msghdr->msg_next;
661 	msghdr->msg_spot = -1;
662 	msghdr->msg_ts = msgsz;
663 
664 	/*
665 	 * Allocate space for the message
666 	 */
667 
668 	while (segs_needed > 0) {
669 		if (nfree_msgmaps <= 0)
670 			panic("not enough msgmaps");
671 		if (free_msgmaps == -1)
672 			panic("nil free_msgmaps");
673 		next = free_msgmaps;
674 		if (next <= -1)
675 			panic("next too low #1");
676 		if (next >= msginfo.msgseg)
677 			panic("next out of range #1");
678 #ifdef MSG_DEBUG_OK
679 		kprintf("allocating segment %d to message\n", next);
680 #endif
681 		free_msgmaps = msgmaps[next].next;
682 		nfree_msgmaps--;
683 		msgmaps[next].next = msghdr->msg_spot;
684 		msghdr->msg_spot = next;
685 		segs_needed--;
686 	}
687 
688 	/*
689 	 * Copy in the message type
690 	 */
691 
692 	if ((eval = copyin(user_msgp, &msghdr->msg_type,
693 	    sizeof(msghdr->msg_type))) != 0) {
694 #ifdef MSG_DEBUG_OK
695 		kprintf("error %d copying the message type\n", eval);
696 #endif
697 		msg_freehdr(msghdr);
698 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
699 		wakeup((caddr_t)msqptr);
700 		return(eval);
701 	}
702 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
703 
704 	/*
705 	 * Validate the message type
706 	 */
707 
708 	if (msghdr->msg_type < 1) {
709 		msg_freehdr(msghdr);
710 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
711 		wakeup((caddr_t)msqptr);
712 #ifdef MSG_DEBUG_OK
713 		kprintf("mtype (%d) < 1\n", msghdr->msg_type);
714 #endif
715 		return(EINVAL);
716 	}
717 
718 	/*
719 	 * Copy in the message body
720 	 */
721 
722 	next = msghdr->msg_spot;
723 	while (msgsz > 0) {
724 		size_t tlen;
725 		if (msgsz > msginfo.msgssz)
726 			tlen = msginfo.msgssz;
727 		else
728 			tlen = msgsz;
729 		if (next <= -1)
730 			panic("next too low #2");
731 		if (next >= msginfo.msgseg)
732 			panic("next out of range #2");
733 		if ((eval = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
734 		    tlen)) != 0) {
735 #ifdef MSG_DEBUG_OK
736 			kprintf("error %d copying in message segment\n", eval);
737 #endif
738 			msg_freehdr(msghdr);
739 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
740 			wakeup((caddr_t)msqptr);
741 			return(eval);
742 		}
743 		msgsz -= tlen;
744 		user_msgp = (char *)user_msgp + tlen;
745 		next = msgmaps[next].next;
746 	}
747 	if (next != -1)
748 		panic("didn't use all the msg segments");
749 
750 	/*
751 	 * We've got the message.  Unlock the msqid_ds.
752 	 */
753 
754 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
755 
756 	/*
757 	 * Make sure that the msqid_ds is still allocated.
758 	 */
759 
760 	if (msqptr->msg_qbytes == 0) {
761 		msg_freehdr(msghdr);
762 		wakeup((caddr_t)msqptr);
763 		return(EIDRM);
764 	}
765 
766 	/*
767 	 * Put the message into the queue
768 	 */
769 
770 	if (msqptr->msg_first == NULL) {
771 		msqptr->msg_first = msghdr;
772 		msqptr->msg_last = msghdr;
773 	} else {
774 		msqptr->msg_last->msg_next = msghdr;
775 		msqptr->msg_last = msghdr;
776 	}
777 	msqptr->msg_last->msg_next = NULL;
778 
779 	msqptr->msg_cbytes += msghdr->msg_ts;
780 	msqptr->msg_qnum++;
781 	msqptr->msg_lspid = p->p_pid;
782 	msqptr->msg_stime = time_second;
783 
784 	wakeup((caddr_t)msqptr);
785 	uap->sysmsg_result = 0;
786 	return(0);
787 }
788 
789 int
790 sys_msgrcv(struct msgrcv_args *uap)
791 {
792 	struct proc *p = curproc;
793 	int msqid = uap->msqid;
794 	void *user_msgp = uap->msgp;
795 	size_t msgsz = uap->msgsz;
796 	long msgtyp = uap->msgtyp;
797 	int msgflg = uap->msgflg;
798 	size_t len;
799 	struct msqid_ds *msqptr;
800 	struct msg *msghdr;
801 	int eval;
802 	short next;
803 
804 #ifdef MSG_DEBUG_OK
805 	kprintf("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
806 	    msgsz, msgtyp, msgflg);
807 #endif
808 
809 	if (!jail_sysvipc_allowed && p->p_ucred->cr_prison != NULL)
810 		return (ENOSYS);
811 
812 	msqid = IPCID_TO_IX(msqid);
813 
814 	if (msqid < 0 || msqid >= msginfo.msgmni) {
815 #ifdef MSG_DEBUG_OK
816 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
817 		    msginfo.msgmni);
818 #endif
819 		return(EINVAL);
820 	}
821 
822 	msqptr = &msqids[msqid];
823 	if (msqptr->msg_qbytes == 0) {
824 #ifdef MSG_DEBUG_OK
825 		kprintf("no such message queue id\n");
826 #endif
827 		return(EINVAL);
828 	}
829 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
830 #ifdef MSG_DEBUG_OK
831 		kprintf("wrong sequence number\n");
832 #endif
833 		return(EINVAL);
834 	}
835 
836 	if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_R))) {
837 #ifdef MSG_DEBUG_OK
838 		kprintf("requester doesn't have read access\n");
839 #endif
840 		return(eval);
841 	}
842 
843 	msghdr = NULL;
844 	while (msghdr == NULL) {
845 		if (msgtyp == 0) {
846 			msghdr = msqptr->msg_first;
847 			if (msghdr != NULL) {
848 				if (msgsz < msghdr->msg_ts &&
849 				    (msgflg & MSG_NOERROR) == 0) {
850 #ifdef MSG_DEBUG_OK
851 					kprintf("first message on the queue is too big (want %d, got %d)\n",
852 					    msgsz, msghdr->msg_ts);
853 #endif
854 					return(E2BIG);
855 				}
856 				if (msqptr->msg_first == msqptr->msg_last) {
857 					msqptr->msg_first = NULL;
858 					msqptr->msg_last = NULL;
859 				} else {
860 					msqptr->msg_first = msghdr->msg_next;
861 					if (msqptr->msg_first == NULL)
862 						panic("msg_first/last screwed up #1");
863 				}
864 			}
865 		} else {
866 			struct msg *previous;
867 			struct msg **prev;
868 
869 			previous = NULL;
870 			prev = &(msqptr->msg_first);
871 			while ((msghdr = *prev) != NULL) {
872 				/*
873 				 * Is this message's type an exact match or is
874 				 * this message's type less than or equal to
875 				 * the absolute value of a negative msgtyp?
876 				 * Note that the second half of this test can
877 				 * NEVER be true if msgtyp is positive since
878 				 * msg_type is always positive!
879 				 */
880 
881 				if (msgtyp == msghdr->msg_type ||
882 				    msghdr->msg_type <= -msgtyp) {
883 #ifdef MSG_DEBUG_OK
884 					kprintf("found message type %d, requested %d\n",
885 					    msghdr->msg_type, msgtyp);
886 #endif
887 					if (msgsz < msghdr->msg_ts &&
888 					    (msgflg & MSG_NOERROR) == 0) {
889 #ifdef MSG_DEBUG_OK
890 						kprintf("requested message on the queue is too big (want %d, got %d)\n",
891 						    msgsz, msghdr->msg_ts);
892 #endif
893 						return(E2BIG);
894 					}
895 					*prev = msghdr->msg_next;
896 					if (msghdr == msqptr->msg_last) {
897 						if (previous == NULL) {
898 							if (prev !=
899 							    &msqptr->msg_first)
900 								panic("msg_first/last screwed up #2");
901 							msqptr->msg_first =
902 							    NULL;
903 							msqptr->msg_last =
904 							    NULL;
905 						} else {
906 							if (prev ==
907 							    &msqptr->msg_first)
908 								panic("msg_first/last screwed up #3");
909 							msqptr->msg_last =
910 							    previous;
911 						}
912 					}
913 					break;
914 				}
915 				previous = msghdr;
916 				prev = &(msghdr->msg_next);
917 			}
918 		}
919 
920 		/*
921 		 * We've either extracted the msghdr for the appropriate
922 		 * message or there isn't one.
923 		 * If there is one then bail out of this loop.
924 		 */
925 
926 		if (msghdr != NULL)
927 			break;
928 
929 		/*
930 		 * Hmph!  No message found.  Does the user want to wait?
931 		 */
932 
933 		if ((msgflg & IPC_NOWAIT) != 0) {
934 #ifdef MSG_DEBUG_OK
935 			kprintf("no appropriate message found (msgtyp=%d)\n",
936 			    msgtyp);
937 #endif
938 			/* The SVID says to return ENOMSG. */
939 #ifdef ENOMSG
940 			return(ENOMSG);
941 #else
942 			/* Unfortunately, BSD doesn't define that code yet! */
943 			return(EAGAIN);
944 #endif
945 		}
946 
947 		/*
948 		 * Wait for something to happen
949 		 */
950 
951 #ifdef MSG_DEBUG_OK
952 		kprintf("msgrcv:  goodnight\n");
953 #endif
954 		eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
955 #ifdef MSG_DEBUG_OK
956 		kprintf("msgrcv:  good morning (eval=%d)\n", eval);
957 #endif
958 
959 		if (eval != 0) {
960 #ifdef MSG_DEBUG_OK
961 			kprintf("msgsnd:  interrupted system call\n");
962 #endif
963 			return(EINTR);
964 		}
965 
966 		/*
967 		 * Make sure that the msq queue still exists
968 		 */
969 
970 		if (msqptr->msg_qbytes == 0 ||
971 		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
972 #ifdef MSG_DEBUG_OK
973 			kprintf("msqid deleted\n");
974 #endif
975 			return(EIDRM);
976 		}
977 	}
978 
979 	/*
980 	 * Return the message to the user.
981 	 *
982 	 * First, do the bookkeeping (before we risk being interrupted).
983 	 */
984 
985 	msqptr->msg_cbytes -= msghdr->msg_ts;
986 	msqptr->msg_qnum--;
987 	msqptr->msg_lrpid = p->p_pid;
988 	msqptr->msg_rtime = time_second;
989 
990 	/*
991 	 * Make msgsz the actual amount that we'll be returning.
992 	 * Note that this effectively truncates the message if it is too long
993 	 * (since msgsz is never increased).
994 	 */
995 
996 #ifdef MSG_DEBUG_OK
997 	kprintf("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
998 	    msghdr->msg_ts);
999 #endif
1000 	if (msgsz > msghdr->msg_ts)
1001 		msgsz = msghdr->msg_ts;
1002 
1003 	/*
1004 	 * Return the type to the user.
1005 	 */
1006 
1007 	eval = copyout((caddr_t)&(msghdr->msg_type), user_msgp,
1008 	    sizeof(msghdr->msg_type));
1009 	if (eval != 0) {
1010 #ifdef MSG_DEBUG_OK
1011 		kprintf("error (%d) copying out message type\n", eval);
1012 #endif
1013 		msg_freehdr(msghdr);
1014 		wakeup((caddr_t)msqptr);
1015 		return(eval);
1016 	}
1017 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1018 
1019 	/*
1020 	 * Return the segments to the user
1021 	 */
1022 
1023 	next = msghdr->msg_spot;
1024 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1025 		size_t tlen;
1026 
1027 		if (msgsz - len > msginfo.msgssz)
1028 			tlen = msginfo.msgssz;
1029 		else
1030 			tlen = msgsz - len;
1031 		if (next <= -1)
1032 			panic("next too low #3");
1033 		if (next >= msginfo.msgseg)
1034 			panic("next out of range #3");
1035 		eval = copyout((caddr_t)&msgpool[next * msginfo.msgssz],
1036 		    user_msgp, tlen);
1037 		if (eval != 0) {
1038 #ifdef MSG_DEBUG_OK
1039 			kprintf("error (%d) copying out message segment\n",
1040 			    eval);
1041 #endif
1042 			msg_freehdr(msghdr);
1043 			wakeup((caddr_t)msqptr);
1044 			return(eval);
1045 		}
1046 		user_msgp = (char *)user_msgp + tlen;
1047 		next = msgmaps[next].next;
1048 	}
1049 
1050 	/*
1051 	 * Done, return the actual number of bytes copied out.
1052 	 */
1053 
1054 	msg_freehdr(msghdr);
1055 	wakeup((caddr_t)msqptr);
1056 	uap->sysmsg_result = msgsz;
1057 	return(0);
1058 }
1059 
1060 static int
1061 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1062 {
1063 
1064 	return (SYSCTL_OUT(req, msqids,
1065 	    sizeof(struct msqid_ds) * msginfo.msgmni));
1066 }
1067 
1068 TUNABLE_INT("kern.ipc.msgseg", &msginfo.msgseg);
1069 TUNABLE_INT("kern.ipc.msgssz", &msginfo.msgssz);
1070 TUNABLE_INT("kern.ipc.msgmni", &msginfo.msgmni);
1071 
1072 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1073 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RD, &msginfo.msgmni, 0, "");
1074 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0, "");
1075 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0, "");
1076 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RD, &msginfo.msgssz, 0, "");
1077 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RD, &msginfo.msgseg, 0, "");
1078 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1079     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1080