xref: /dragonfly/sys/kern/sysv_msg.c (revision ef3ac1d1)
1 /* $FreeBSD: src/sys/kern/sysv_msg.c,v 1.23.2.5 2002/12/31 08:54:53 maxim Exp $ */
2 
3 /*
4  * Implementation of SVID messages
5  *
6  * Author:  Daniel Boulet
7  *
8  * Copyright 1993 Daniel Boulet and RTMX Inc.
9  *
10  * This system call was implemented by Daniel Boulet under contract from RTMX.
11  *
12  * Redistribution and use in source forms, with and without modification,
13  * are permitted provided that this entire comment appears intact.
14  *
15  * Redistribution in binary form may occur without any restrictions.
16  * Obviously, it would be nice if you gave credit where credit is due
17  * but requiring it would be too onerous.
18  *
19  * This software is provided ``AS IS'' without any warranties of any kind.
20  */
21 
22 #include "opt_sysvipc.h"
23 
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/sysproto.h>
27 #include <sys/kernel.h>
28 #include <sys/proc.h>
29 #include <sys/priv.h>
30 #include <sys/msg.h>
31 #include <sys/sysent.h>
32 #include <sys/sysctl.h>
33 #include <sys/malloc.h>
34 #include <sys/jail.h>
35 
36 #include <sys/mplock2.h>
37 
38 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
39 
40 static void msginit (void *);
41 
42 #define MSG_DEBUG
43 #undef MSG_DEBUG_OK
44 
45 static void msg_freehdr (struct msg *msghdr);
46 
47 struct msg {
48 	struct	msg *msg_next;	/* next msg in the chain */
49 	long	msg_type;	/* type of this message */
50     				/* >0 -> type of this message */
51     				/* 0 -> free header */
52 	u_short	msg_ts;		/* size of this message */
53 	short	msg_spot;	/* location of start of msg in buffer */
54 };
55 
56 
57 #ifndef MSGSSZ
58 #define MSGSSZ	8		/* Each segment must be 2^N long */
59 #endif
60 #ifndef MSGSEG
61 #define MSGSEG	2048		/* must be less than 32767 */
62 #endif
63 #define MSGMAX	(MSGSSZ*MSGSEG)
64 #ifndef MSGMNB
65 #define MSGMNB	2048		/* max # of bytes in a queue */
66 #endif
67 #ifndef MSGMNI
68 #define MSGMNI	40
69 #endif
70 #ifndef MSGTQL
71 #define MSGTQL	40
72 #endif
73 
74 /*
75  * Based on the configuration parameters described in an SVR2 (yes, two)
76  * config(1m) man page.
77  *
78  * Each message is broken up and stored in segments that are msgssz bytes
79  * long.  For efficiency reasons, this should be a power of two.  Also,
80  * it doesn't make sense if it is less than 8 or greater than about 256.
81  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
82  * two between 8 and 1024 inclusive (and panic's if it isn't).
83  */
84 struct msginfo msginfo = {
85                 MSGMAX,         /* max chars in a message */
86                 MSGMNI,         /* # of message queue identifiers */
87                 MSGMNB,         /* max chars in a queue */
88                 MSGTQL,         /* max messages in system */
89                 MSGSSZ,         /* size of a message segment */
90                 		/* (must be small power of 2 greater than 4) */
91                 MSGSEG          /* number of message segments */
92 };
93 
94 /*
95  * macros to convert between msqid_ds's and msqid's.
96  * (specific to this implementation)
97  */
98 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
99 #define MSQID_IX(id)	((id) & 0xffff)
100 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
101 
102 /*
103  * The rest of this file is specific to this particular implementation.
104  */
105 
106 struct msgmap {
107 	short	next;		/* next segment in buffer */
108     				/* -1 -> available */
109     				/* 0..(MSGSEG-1) -> index of next segment */
110 };
111 
112 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
113 
114 static int nfree_msgmaps;	/* # of free map entries */
115 static short free_msgmaps;	/* head of linked list of free map entries */
116 static struct msg *free_msghdrs;/* list of free msg headers */
117 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
118 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
119 static struct msg *msghdrs;	/* MSGTQL msg headers */
120 static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
121 
122 static void
123 msginit(void *dummy)
124 {
125 	int i;
126 
127 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
128 	msgpool = kmalloc(msginfo.msgmax, M_MSG, M_WAITOK);
129 	msgmaps = kmalloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
130 	msghdrs = kmalloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
131 	msqids = kmalloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
132 
133 	/*
134 	 * msginfo.msgssz should be a power of two for efficiency reasons.
135 	 * It is also pretty silly if msginfo.msgssz is less than 8
136 	 * or greater than about 256 so ...
137 	 */
138 
139 	i = 8;
140 	while (i < 1024 && i != msginfo.msgssz)
141 		i <<= 1;
142     	if (i != msginfo.msgssz) {
143 		kprintf("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
144 		    msginfo.msgssz);
145 		panic("msginfo.msgssz not a small power of 2");
146 	}
147 
148 	if (msginfo.msgseg > 32767) {
149 		kprintf("msginfo.msgseg=%d\n", msginfo.msgseg);
150 		panic("msginfo.msgseg > 32767");
151 	}
152 
153 	for (i = 0; i < msginfo.msgseg; i++) {
154 		if (i > 0)
155 			msgmaps[i-1].next = i;
156 		msgmaps[i].next = -1;	/* implies entry is available */
157 	}
158 	free_msgmaps = 0;
159 	nfree_msgmaps = msginfo.msgseg;
160 
161 	for (i = 0; i < msginfo.msgtql; i++) {
162 		msghdrs[i].msg_type = 0;
163 		if (i > 0)
164 			msghdrs[i-1].msg_next = &msghdrs[i];
165 		msghdrs[i].msg_next = NULL;
166     	}
167 	free_msghdrs = &msghdrs[0];
168 
169 	for (i = 0; i < msginfo.msgmni; i++) {
170 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
171 		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
172 		msqids[i].msg_perm.mode = 0;
173 	}
174 }
175 SYSINIT(sysv_msg, SI_SUB_SYSV_MSG, SI_ORDER_FIRST, msginit, NULL)
176 
177 static void
178 msg_freehdr(struct msg *msghdr)
179 {
180 	while (msghdr->msg_ts > 0) {
181 		short next;
182 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
183 			panic("msghdr->msg_spot out of range");
184 		next = msgmaps[msghdr->msg_spot].next;
185 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
186 		free_msgmaps = msghdr->msg_spot;
187 		nfree_msgmaps++;
188 		msghdr->msg_spot = next;
189 		if (msghdr->msg_ts >= msginfo.msgssz)
190 			msghdr->msg_ts -= msginfo.msgssz;
191 		else
192 			msghdr->msg_ts = 0;
193 	}
194 	if (msghdr->msg_spot != -1)
195 		panic("msghdr->msg_spot != -1");
196 	msghdr->msg_next = free_msghdrs;
197 	free_msghdrs = msghdr;
198 }
199 
200 /*
201  * MPALMOSTSAFE
202  */
203 int
204 sys_msgctl(struct msgctl_args *uap)
205 {
206 	struct thread *td = curthread;
207 	struct proc *p = td->td_proc;
208 	int msqid = uap->msqid;
209 	int cmd = uap->cmd;
210 	struct msqid_ds *user_msqptr = uap->buf;
211 	int rval, eval;
212 	struct msqid_ds msqbuf;
213 	struct msqid_ds *msqptr;
214 
215 #ifdef MSG_DEBUG_OK
216 	kprintf("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr);
217 #endif
218 
219 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
220 		return (ENOSYS);
221 
222 	get_mplock();
223 	msqid = IPCID_TO_IX(msqid);
224 
225 	if (msqid < 0 || msqid >= msginfo.msgmni) {
226 #ifdef MSG_DEBUG_OK
227 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
228 		    msginfo.msgmni);
229 #endif
230 		eval = EINVAL;
231 		goto done;
232 	}
233 
234 	msqptr = &msqids[msqid];
235 
236 	if (msqptr->msg_qbytes == 0) {
237 #ifdef MSG_DEBUG_OK
238 		kprintf("no such msqid\n");
239 #endif
240 		eval = EINVAL;
241 		goto done;
242 	}
243 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
244 #ifdef MSG_DEBUG_OK
245 		kprintf("wrong sequence number\n");
246 #endif
247 		eval = EINVAL;
248 		goto done;
249 	}
250 
251 	rval = 0;
252 
253 	switch (cmd) {
254 	case IPC_RMID:
255 	{
256 		struct msg *msghdr;
257 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)) != 0)
258 			break;
259 		/* Free the message headers */
260 		msghdr = msqptr->msg_first;
261 		while (msghdr != NULL) {
262 			struct msg *msghdr_tmp;
263 
264 			/* Free the segments of each message */
265 			msqptr->msg_cbytes -= msghdr->msg_ts;
266 			msqptr->msg_qnum--;
267 			msghdr_tmp = msghdr;
268 			msghdr = msghdr->msg_next;
269 			msg_freehdr(msghdr_tmp);
270 		}
271 
272 		if (msqptr->msg_cbytes != 0)
273 			panic("msg_cbytes is screwed up");
274 		if (msqptr->msg_qnum != 0)
275 			panic("msg_qnum is screwed up");
276 
277 		msqptr->msg_qbytes = 0;	/* Mark it as free */
278 
279 		wakeup((caddr_t)msqptr);
280 	}
281 
282 		break;
283 
284 	case IPC_SET:
285 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)) != 0)
286 			break;
287 		if ((eval = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
288 			break;
289 		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
290 			eval = priv_check(td, PRIV_ROOT);
291 			if (eval)
292 				break;
293 		}
294 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
295 #ifdef MSG_DEBUG_OK
296 			kprintf("can't increase msg_qbytes beyond %d (truncating)\n",
297 			    msginfo.msgmnb);
298 #endif
299 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
300 		}
301 		if (msqbuf.msg_qbytes == 0) {
302 #ifdef MSG_DEBUG_OK
303 			kprintf("can't reduce msg_qbytes to 0\n");
304 #endif
305 			eval = EINVAL;		/* non-standard errno! */
306 			break;
307 		}
308 		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
309 		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
310 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
311 					(msqbuf.msg_perm.mode & 0777);
312 		msqptr->msg_qbytes = msqbuf.msg_qbytes;
313 		msqptr->msg_ctime = time_second;
314 		break;
315 
316 	case IPC_STAT:
317 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_R))) {
318 #ifdef MSG_DEBUG_OK
319 			kprintf("requester doesn't have read access\n");
320 #endif
321 			eval = EINVAL;
322 			break;
323 		}
324 		eval = copyout(msqptr, user_msqptr, sizeof(struct msqid_ds));
325 		break;
326 
327 	default:
328 #ifdef MSG_DEBUG_OK
329 		kprintf("invalid command %d\n", cmd);
330 #endif
331 		eval = EINVAL;
332 		break;
333 	}
334 done:
335 	rel_mplock();
336 	if (eval == 0)
337 		uap->sysmsg_result = rval;
338 	return(eval);
339 }
340 
341 /*
342  * MPALMOSTSAFE
343  */
344 int
345 sys_msgget(struct msgget_args *uap)
346 {
347 	struct thread *td = curthread;
348 	int msqid, eval;
349 	int key = uap->key;
350 	int msgflg = uap->msgflg;
351 	struct ucred *cred = td->td_ucred;
352 	struct msqid_ds *msqptr = NULL;
353 
354 #ifdef MSG_DEBUG_OK
355 	kprintf("msgget(0x%x, 0%o)\n", key, msgflg);
356 #endif
357 	if (!jail_sysvipc_allowed && cred->cr_prison != NULL)
358 		return (ENOSYS);
359 
360 	eval = 0;
361 	get_mplock();
362 
363 	if (key != IPC_PRIVATE) {
364 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
365 			msqptr = &msqids[msqid];
366 			if (msqptr->msg_qbytes != 0 &&
367 			    msqptr->msg_perm.key == key)
368 				break;
369 		}
370 		if (msqid < msginfo.msgmni) {
371 #ifdef MSG_DEBUG_OK
372 			kprintf("found public key\n");
373 #endif
374 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
375 #ifdef MSG_DEBUG_OK
376 				kprintf("not exclusive\n");
377 #endif
378 				eval = EEXIST;
379 				goto done;
380 			}
381 			if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, msgflg & 0700 ))) {
382 #ifdef MSG_DEBUG_OK
383 				kprintf("requester doesn't have 0%o access\n",
384 				    msgflg & 0700);
385 #endif
386 				goto done;
387 			}
388 			goto done;
389 		}
390 	}
391 
392 #ifdef MSG_DEBUG_OK
393 	kprintf("need to allocate the msqid_ds\n");
394 #endif
395 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
396 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
397 			/*
398 			 * Look for an unallocated and unlocked msqid_ds.
399 			 * msqid_ds's can be locked by msgsnd or msgrcv while
400 			 * they are copying the message in/out.  We can't
401 			 * re-use the entry until they release it.
402 			 */
403 			msqptr = &msqids[msqid];
404 			if (msqptr->msg_qbytes == 0 &&
405 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
406 				break;
407 		}
408 		if (msqid == msginfo.msgmni) {
409 #ifdef MSG_DEBUG_OK
410 			kprintf("no more msqid_ds's available\n");
411 #endif
412 			eval = ENOSPC;
413 			goto done;
414 		}
415 #ifdef MSG_DEBUG_OK
416 		kprintf("msqid %d is available\n", msqid);
417 #endif
418 		msqptr->msg_perm.key = key;
419 		msqptr->msg_perm.cuid = cred->cr_uid;
420 		msqptr->msg_perm.uid = cred->cr_uid;
421 		msqptr->msg_perm.cgid = cred->cr_gid;
422 		msqptr->msg_perm.gid = cred->cr_gid;
423 		msqptr->msg_perm.mode = (msgflg & 0777);
424 		/* Make sure that the returned msqid is unique */
425 		msqptr->msg_perm.seq = (msqptr->msg_perm.seq + 1) & 0x7fff;
426 		msqptr->msg_first = NULL;
427 		msqptr->msg_last = NULL;
428 		msqptr->msg_cbytes = 0;
429 		msqptr->msg_qnum = 0;
430 		msqptr->msg_qbytes = msginfo.msgmnb;
431 		msqptr->msg_lspid = 0;
432 		msqptr->msg_lrpid = 0;
433 		msqptr->msg_stime = 0;
434 		msqptr->msg_rtime = 0;
435 		msqptr->msg_ctime = time_second;
436 	} else {
437 #ifdef MSG_DEBUG_OK
438 		kprintf("didn't find it and wasn't asked to create it\n");
439 #endif
440 		eval = ENOENT;
441 	}
442 
443 done:
444 	rel_mplock();
445 	/* Construct the unique msqid */
446 	if (eval == 0)
447 		uap->sysmsg_result = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
448 	return(eval);
449 }
450 
451 /*
452  * MPALMOSTSAFE
453  */
454 int
455 sys_msgsnd(struct msgsnd_args *uap)
456 {
457 	struct thread *td = curthread;
458 	int msqid = uap->msqid;
459 	void *user_msgp = uap->msgp;
460 	size_t msgsz = uap->msgsz;
461 	int msgflg = uap->msgflg;
462 	int segs_needed, eval;
463 	struct msqid_ds *msqptr;
464 	struct msg *msghdr;
465 	short next;
466 
467 #ifdef MSG_DEBUG_OK
468 	kprintf("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
469 	    msgflg);
470 #endif
471 
472 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
473 		return (ENOSYS);
474 
475 	get_mplock();
476 	msqid = IPCID_TO_IX(msqid);
477 
478 	if (msqid < 0 || msqid >= msginfo.msgmni) {
479 #ifdef MSG_DEBUG_OK
480 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
481 		    msginfo.msgmni);
482 #endif
483 		eval = EINVAL;
484 		goto done;
485 	}
486 
487 	msqptr = &msqids[msqid];
488 	if (msqptr->msg_qbytes == 0) {
489 #ifdef MSG_DEBUG_OK
490 		kprintf("no such message queue id\n");
491 #endif
492 		eval = EINVAL;
493 		goto done;
494 	}
495 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
496 #ifdef MSG_DEBUG_OK
497 		kprintf("wrong sequence number\n");
498 #endif
499 		eval = EINVAL;
500 		goto done;
501 	}
502 
503 	if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, IPC_W))) {
504 #ifdef MSG_DEBUG_OK
505 		kprintf("requester doesn't have write access\n");
506 #endif
507 		eval = EINVAL;
508 		goto done;
509 	}
510 
511 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
512 #ifdef MSG_DEBUG_OK
513 	kprintf("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
514 	    segs_needed);
515 #endif
516 	for (;;) {
517 		int need_more_resources = 0;
518 
519 		/*
520 		 * check msgsz
521 		 * (inside this loop in case msg_qbytes changes while we sleep)
522 		 */
523 
524 		if (msgsz > msqptr->msg_qbytes) {
525 #ifdef MSG_DEBUG_OK
526 			kprintf("msgsz > msqptr->msg_qbytes\n");
527 #endif
528 			eval = EINVAL;
529 			goto done;
530 		}
531 
532 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
533 #ifdef MSG_DEBUG_OK
534 			kprintf("msqid is locked\n");
535 #endif
536 			need_more_resources = 1;
537 		}
538 		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
539 #ifdef MSG_DEBUG_OK
540 			kprintf("msgsz + msg_cbytes > msg_qbytes\n");
541 #endif
542 			need_more_resources = 1;
543 		}
544 		if (segs_needed > nfree_msgmaps) {
545 #ifdef MSG_DEBUG_OK
546 			kprintf("segs_needed > nfree_msgmaps\n");
547 #endif
548 			need_more_resources = 1;
549 		}
550 		if (free_msghdrs == NULL) {
551 #ifdef MSG_DEBUG_OK
552 			kprintf("no more msghdrs\n");
553 #endif
554 			need_more_resources = 1;
555 		}
556 
557 		if (need_more_resources) {
558 			int we_own_it;
559 
560 			if ((msgflg & IPC_NOWAIT) != 0) {
561 #ifdef MSG_DEBUG_OK
562 				kprintf("need more resources but caller doesn't want to wait\n");
563 #endif
564 				eval = EAGAIN;
565 				goto done;
566 			}
567 
568 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
569 #ifdef MSG_DEBUG_OK
570 				kprintf("we don't own the msqid_ds\n");
571 #endif
572 				we_own_it = 0;
573 			} else {
574 				/* Force later arrivals to wait for our
575 				   request */
576 #ifdef MSG_DEBUG_OK
577 				kprintf("we own the msqid_ds\n");
578 #endif
579 				msqptr->msg_perm.mode |= MSG_LOCKED;
580 				we_own_it = 1;
581 			}
582 #ifdef MSG_DEBUG_OK
583 			kprintf("goodnight\n");
584 #endif
585 			eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
586 #ifdef MSG_DEBUG_OK
587 			kprintf("good morning, eval=%d\n", eval);
588 #endif
589 			if (we_own_it)
590 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
591 			if (eval != 0) {
592 #ifdef MSG_DEBUG_OK
593 				kprintf("msgsnd:  interrupted system call\n");
594 #endif
595 				eval = EINTR;
596 				goto done;
597 			}
598 
599 			/*
600 			 * Make sure that the msq queue still exists
601 			 */
602 
603 			if (msqptr->msg_qbytes == 0) {
604 #ifdef MSG_DEBUG_OK
605 				kprintf("msqid deleted\n");
606 #endif
607 				eval = EIDRM;
608 				goto done;
609 			}
610 
611 		} else {
612 #ifdef MSG_DEBUG_OK
613 			kprintf("got all the resources that we need\n");
614 #endif
615 			break;
616 		}
617 	}
618 
619 	/*
620 	 * We have the resources that we need.
621 	 * Make sure!
622 	 */
623 
624 	if (msqptr->msg_perm.mode & MSG_LOCKED)
625 		panic("msg_perm.mode & MSG_LOCKED");
626 	if (segs_needed > nfree_msgmaps)
627 		panic("segs_needed > nfree_msgmaps");
628 	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
629 		panic("msgsz + msg_cbytes > msg_qbytes");
630 	if (free_msghdrs == NULL)
631 		panic("no more msghdrs");
632 
633 	/*
634 	 * Re-lock the msqid_ds in case we page-fault when copying in the
635 	 * message
636 	 */
637 
638 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
639 		panic("msqid_ds is already locked");
640 	msqptr->msg_perm.mode |= MSG_LOCKED;
641 
642 	/*
643 	 * Allocate a message header
644 	 */
645 
646 	msghdr = free_msghdrs;
647 	free_msghdrs = msghdr->msg_next;
648 	msghdr->msg_spot = -1;
649 	msghdr->msg_ts = msgsz;
650 
651 	/*
652 	 * Allocate space for the message
653 	 */
654 
655 	while (segs_needed > 0) {
656 		if (nfree_msgmaps <= 0)
657 			panic("not enough msgmaps");
658 		if (free_msgmaps == -1)
659 			panic("nil free_msgmaps");
660 		next = free_msgmaps;
661 		if (next <= -1)
662 			panic("next too low #1");
663 		if (next >= msginfo.msgseg)
664 			panic("next out of range #1");
665 #ifdef MSG_DEBUG_OK
666 		kprintf("allocating segment %d to message\n", next);
667 #endif
668 		free_msgmaps = msgmaps[next].next;
669 		nfree_msgmaps--;
670 		msgmaps[next].next = msghdr->msg_spot;
671 		msghdr->msg_spot = next;
672 		segs_needed--;
673 	}
674 
675 	/*
676 	 * Copy in the message type
677 	 */
678 
679 	if ((eval = copyin(user_msgp, &msghdr->msg_type,
680 	    sizeof(msghdr->msg_type))) != 0) {
681 #ifdef MSG_DEBUG_OK
682 		kprintf("error %d copying the message type\n", eval);
683 #endif
684 		msg_freehdr(msghdr);
685 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
686 		wakeup((caddr_t)msqptr);
687 		goto done;
688 	}
689 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
690 
691 	/*
692 	 * Validate the message type
693 	 */
694 
695 	if (msghdr->msg_type < 1) {
696 		msg_freehdr(msghdr);
697 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
698 		wakeup((caddr_t)msqptr);
699 #ifdef MSG_DEBUG_OK
700 		kprintf("mtype (%d) < 1\n", msghdr->msg_type);
701 #endif
702 		eval = EINVAL;
703 		goto done;
704 	}
705 
706 	/*
707 	 * Copy in the message body
708 	 */
709 
710 	next = msghdr->msg_spot;
711 	while (msgsz > 0) {
712 		size_t tlen;
713 		if (msgsz > msginfo.msgssz)
714 			tlen = msginfo.msgssz;
715 		else
716 			tlen = msgsz;
717 		if (next <= -1)
718 			panic("next too low #2");
719 		if (next >= msginfo.msgseg)
720 			panic("next out of range #2");
721 		if ((eval = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
722 		    tlen)) != 0) {
723 #ifdef MSG_DEBUG_OK
724 			kprintf("error %d copying in message segment\n", eval);
725 #endif
726 			msg_freehdr(msghdr);
727 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
728 			wakeup((caddr_t)msqptr);
729 			goto done;
730 		}
731 		msgsz -= tlen;
732 		user_msgp = (char *)user_msgp + tlen;
733 		next = msgmaps[next].next;
734 	}
735 	if (next != -1)
736 		panic("didn't use all the msg segments");
737 
738 	/*
739 	 * We've got the message.  Unlock the msqid_ds.
740 	 */
741 
742 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
743 
744 	/*
745 	 * Make sure that the msqid_ds is still allocated.
746 	 */
747 
748 	if (msqptr->msg_qbytes == 0) {
749 		msg_freehdr(msghdr);
750 		wakeup((caddr_t)msqptr);
751 		eval = EIDRM;
752 		goto done;
753 	}
754 
755 	/*
756 	 * Put the message into the queue
757 	 */
758 
759 	if (msqptr->msg_first == NULL) {
760 		msqptr->msg_first = msghdr;
761 		msqptr->msg_last = msghdr;
762 	} else {
763 		msqptr->msg_last->msg_next = msghdr;
764 		msqptr->msg_last = msghdr;
765 	}
766 	msqptr->msg_last->msg_next = NULL;
767 
768 	msqptr->msg_cbytes += msghdr->msg_ts;
769 	msqptr->msg_qnum++;
770 	msqptr->msg_lspid = td->td_proc->p_pid;
771 	msqptr->msg_stime = time_second;
772 
773 	wakeup((caddr_t)msqptr);
774 	eval = 0;
775 done:
776 	rel_mplock();
777 	if (eval == 0)
778 		uap->sysmsg_result = 0;
779 	return (eval);
780 }
781 
782 /*
783  * MPALMOSTSAFE
784  */
785 int
786 sys_msgrcv(struct msgrcv_args *uap)
787 {
788 	struct thread *td = curthread;
789 	int msqid = uap->msqid;
790 	void *user_msgp = uap->msgp;
791 	size_t msgsz = uap->msgsz;
792 	long msgtyp = uap->msgtyp;
793 	int msgflg = uap->msgflg;
794 	size_t len;
795 	struct msqid_ds *msqptr;
796 	struct msg *msghdr;
797 	int eval;
798 	short next;
799 
800 #ifdef MSG_DEBUG_OK
801 	kprintf("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
802 	    msgsz, msgtyp, msgflg);
803 #endif
804 
805 	if (!jail_sysvipc_allowed && td->td_ucred->cr_prison != NULL)
806 		return (ENOSYS);
807 
808 	get_mplock();
809 	msqid = IPCID_TO_IX(msqid);
810 
811 	if (msqid < 0 || msqid >= msginfo.msgmni) {
812 #ifdef MSG_DEBUG_OK
813 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
814 		    msginfo.msgmni);
815 #endif
816 		eval = EINVAL;
817 		goto done;
818 	}
819 
820 	msqptr = &msqids[msqid];
821 	if (msqptr->msg_qbytes == 0) {
822 #ifdef MSG_DEBUG_OK
823 		kprintf("no such message queue id\n");
824 #endif
825 		eval = EINVAL;
826 		goto done;
827 	}
828 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
829 #ifdef MSG_DEBUG_OK
830 		kprintf("wrong sequence number\n");
831 #endif
832 		eval = EINVAL;
833 		goto done;
834 	}
835 
836 	if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, IPC_R))) {
837 #ifdef MSG_DEBUG_OK
838 		kprintf("requester doesn't have read access\n");
839 #endif
840 		goto done;
841 	}
842 
843 	msghdr = NULL;
844 	while (msghdr == NULL) {
845 		if (msgtyp == 0) {
846 			msghdr = msqptr->msg_first;
847 			if (msghdr != NULL) {
848 				if (msgsz < msghdr->msg_ts &&
849 				    (msgflg & MSG_NOERROR) == 0) {
850 #ifdef MSG_DEBUG_OK
851 					kprintf("first message on the queue is too big (want %d, got %d)\n",
852 					    msgsz, msghdr->msg_ts);
853 #endif
854 					eval = E2BIG;
855 					goto done;
856 				}
857 				if (msqptr->msg_first == msqptr->msg_last) {
858 					msqptr->msg_first = NULL;
859 					msqptr->msg_last = NULL;
860 				} else {
861 					msqptr->msg_first = msghdr->msg_next;
862 					if (msqptr->msg_first == NULL)
863 						panic("msg_first/last screwed up #1");
864 				}
865 			}
866 		} else {
867 			struct msg *previous;
868 			struct msg **prev;
869 
870 			previous = NULL;
871 			prev = &(msqptr->msg_first);
872 			while ((msghdr = *prev) != NULL) {
873 				/*
874 				 * Is this message's type an exact match or is
875 				 * this message's type less than or equal to
876 				 * the absolute value of a negative msgtyp?
877 				 * Note that the second half of this test can
878 				 * NEVER be true if msgtyp is positive since
879 				 * msg_type is always positive!
880 				 */
881 
882 				if (msgtyp == msghdr->msg_type ||
883 				    msghdr->msg_type <= -msgtyp) {
884 #ifdef MSG_DEBUG_OK
885 					kprintf("found message type %d, requested %d\n",
886 					    msghdr->msg_type, msgtyp);
887 #endif
888 					if (msgsz < msghdr->msg_ts &&
889 					    (msgflg & MSG_NOERROR) == 0) {
890 #ifdef MSG_DEBUG_OK
891 						kprintf("requested message on the queue is too big (want %d, got %d)\n",
892 						    msgsz, msghdr->msg_ts);
893 #endif
894 						eval = E2BIG;
895 						goto done;
896 					}
897 					*prev = msghdr->msg_next;
898 					if (msghdr == msqptr->msg_last) {
899 						if (previous == NULL) {
900 							if (prev !=
901 							    &msqptr->msg_first)
902 								panic("msg_first/last screwed up #2");
903 							msqptr->msg_first =
904 							    NULL;
905 							msqptr->msg_last =
906 							    NULL;
907 						} else {
908 							if (prev ==
909 							    &msqptr->msg_first)
910 								panic("msg_first/last screwed up #3");
911 							msqptr->msg_last =
912 							    previous;
913 						}
914 					}
915 					break;
916 				}
917 				previous = msghdr;
918 				prev = &(msghdr->msg_next);
919 			}
920 		}
921 
922 		/*
923 		 * We've either extracted the msghdr for the appropriate
924 		 * message or there isn't one.
925 		 * If there is one then bail out of this loop.
926 		 */
927 
928 		if (msghdr != NULL)
929 			break;
930 
931 		/*
932 		 * Hmph!  No message found.  Does the user want to wait?
933 		 */
934 
935 		if ((msgflg & IPC_NOWAIT) != 0) {
936 #ifdef MSG_DEBUG_OK
937 			kprintf("no appropriate message found (msgtyp=%d)\n",
938 			    msgtyp);
939 #endif
940 			/* The SVID says to return ENOMSG. */
941 #ifdef ENOMSG
942 			eval = ENOMSG;
943 #else
944 			/* Unfortunately, BSD doesn't define that code yet! */
945 			eval = EAGAIN;
946 #endif
947 			goto done;
948 		}
949 
950 		/*
951 		 * Wait for something to happen
952 		 */
953 
954 #ifdef MSG_DEBUG_OK
955 		kprintf("msgrcv:  goodnight\n");
956 #endif
957 		eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
958 #ifdef MSG_DEBUG_OK
959 		kprintf("msgrcv:  good morning (eval=%d)\n", eval);
960 #endif
961 
962 		if (eval != 0) {
963 #ifdef MSG_DEBUG_OK
964 			kprintf("msgsnd:  interrupted system call\n");
965 #endif
966 			eval = EINTR;
967 			goto done;
968 		}
969 
970 		/*
971 		 * Make sure that the msq queue still exists
972 		 */
973 
974 		if (msqptr->msg_qbytes == 0 ||
975 		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
976 #ifdef MSG_DEBUG_OK
977 			kprintf("msqid deleted\n");
978 #endif
979 			eval = EIDRM;
980 			goto done;
981 		}
982 	}
983 
984 	/*
985 	 * Return the message to the user.
986 	 *
987 	 * First, do the bookkeeping (before we risk being interrupted).
988 	 */
989 
990 	msqptr->msg_cbytes -= msghdr->msg_ts;
991 	msqptr->msg_qnum--;
992 	msqptr->msg_lrpid = td->td_proc->p_pid;
993 	msqptr->msg_rtime = time_second;
994 
995 	/*
996 	 * Make msgsz the actual amount that we'll be returning.
997 	 * Note that this effectively truncates the message if it is too long
998 	 * (since msgsz is never increased).
999 	 */
1000 
1001 #ifdef MSG_DEBUG_OK
1002 	kprintf("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1003 	    msghdr->msg_ts);
1004 #endif
1005 	if (msgsz > msghdr->msg_ts)
1006 		msgsz = msghdr->msg_ts;
1007 
1008 	/*
1009 	 * Return the type to the user.
1010 	 */
1011 
1012 	eval = copyout((caddr_t)&(msghdr->msg_type), user_msgp,
1013 	    sizeof(msghdr->msg_type));
1014 	if (eval != 0) {
1015 #ifdef MSG_DEBUG_OK
1016 		kprintf("error (%d) copying out message type\n", eval);
1017 #endif
1018 		msg_freehdr(msghdr);
1019 		wakeup((caddr_t)msqptr);
1020 		goto done;
1021 	}
1022 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1023 
1024 	/*
1025 	 * Return the segments to the user
1026 	 */
1027 
1028 	next = msghdr->msg_spot;
1029 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1030 		size_t tlen;
1031 
1032 		if (msgsz - len > msginfo.msgssz)
1033 			tlen = msginfo.msgssz;
1034 		else
1035 			tlen = msgsz - len;
1036 		if (next <= -1)
1037 			panic("next too low #3");
1038 		if (next >= msginfo.msgseg)
1039 			panic("next out of range #3");
1040 		eval = copyout((caddr_t)&msgpool[next * msginfo.msgssz],
1041 		    user_msgp, tlen);
1042 		if (eval != 0) {
1043 #ifdef MSG_DEBUG_OK
1044 			kprintf("error (%d) copying out message segment\n",
1045 			    eval);
1046 #endif
1047 			msg_freehdr(msghdr);
1048 			wakeup((caddr_t)msqptr);
1049 			goto done;
1050 		}
1051 		user_msgp = (char *)user_msgp + tlen;
1052 		next = msgmaps[next].next;
1053 	}
1054 
1055 	/*
1056 	 * Done, return the actual number of bytes copied out.
1057 	 */
1058 
1059 	msg_freehdr(msghdr);
1060 	wakeup((caddr_t)msqptr);
1061 	eval = 0;
1062 done:
1063 	rel_mplock();
1064 	if (eval == 0)
1065 		uap->sysmsg_result = msgsz;
1066 	return(eval);
1067 }
1068 
1069 static int
1070 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1071 {
1072 
1073 	return (SYSCTL_OUT(req, msqids,
1074 	    sizeof(struct msqid_ds) * msginfo.msgmni));
1075 }
1076 
1077 TUNABLE_INT("kern.ipc.msgseg", &msginfo.msgseg);
1078 TUNABLE_INT("kern.ipc.msgssz", &msginfo.msgssz);
1079 TUNABLE_INT("kern.ipc.msgmni", &msginfo.msgmni);
1080 
1081 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
1082     "Max characters in message");
1083 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RD, &msginfo.msgmni, 0,
1084     "Max message queue identifiers");
1085 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0,
1086     "Max characters in message queue");
1087 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0,
1088     "Max SVID messages in system");
1089 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RD, &msginfo.msgssz, 0,
1090     "Power-of-two size of a message segment");
1091 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RD, &msginfo.msgseg, 0,
1092     "Number of message segments");
1093 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1094     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1095