xref: /dragonfly/sys/kern/sysv_msg.c (revision 2b3f93ea)
1 /* $FreeBSD: src/sys/kern/sysv_msg.c,v 1.23.2.5 2002/12/31 08:54:53 maxim Exp $ */
2 
3 /*
4  * Implementation of SVID messages
5  *
6  * Author:  Daniel Boulet
7  *
8  * Copyright 1993 Daniel Boulet and RTMX Inc.
9  *
10  * This system call was implemented by Daniel Boulet under contract from RTMX.
11  *
12  * Redistribution and use in source forms, with and without modification,
13  * are permitted provided that this entire comment appears intact.
14  *
15  * Redistribution in binary form may occur without any restrictions.
16  * Obviously, it would be nice if you gave credit where credit is due
17  * but requiring it would be too onerous.
18  *
19  * This software is provided ``AS IS'' without any warranties of any kind.
20  */
21 
22 #include "opt_sysvipc.h"
23 
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/sysmsg.h>
27 #include <sys/kernel.h>
28 #include <sys/proc.h>
29 #include <sys/caps.h>
30 #include <sys/msg.h>
31 #include <sys/sysent.h>
32 #include <sys/sysctl.h>
33 #include <sys/malloc.h>
34 #include <sys/jail.h>
35 
36 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
37 
38 static void msginit (void *);
39 
40 #define MSG_DEBUG
41 #undef MSG_DEBUG_OK
42 
43 static void msg_freehdr (struct msg *msghdr);
44 
45 struct msg {
46 	struct	msg *msg_next;	/* next msg in the chain */
47 	long	msg_type;	/* type of this message */
48     				/* >0 -> type of this message */
49     				/* 0 -> free header */
50 	u_short	msg_ts;		/* size of this message */
51 	short	msg_spot;	/* location of start of msg in buffer */
52 };
53 
54 
55 #ifndef MSGSSZ
56 #define MSGSSZ	8		/* Each segment must be 2^N long */
57 #endif
58 #ifndef MSGSEG
59 #define MSGSEG	2048		/* must be less than 32767 */
60 #endif
61 #define MSGMAX	(MSGSSZ*MSGSEG)
62 #ifndef MSGMNB
63 #define MSGMNB	2048		/* max # of bytes in a queue */
64 #endif
65 #ifndef MSGMNI
66 #define MSGMNI	40
67 #endif
68 #ifndef MSGTQL
69 #define MSGTQL	40
70 #endif
71 
72 /*
73  * Based on the configuration parameters described in an SVR2 (yes, two)
74  * config(1m) man page.
75  *
76  * Each message is broken up and stored in segments that are msgssz bytes
77  * long.  For efficiency reasons, this should be a power of two.  Also,
78  * it doesn't make sense if it is less than 8 or greater than about 256.
79  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
80  * two between 8 and 1024 inclusive (and panic's if it isn't).
81  */
82 struct msginfo msginfo = {
83                 MSGMAX,         /* max chars in a message */
84                 MSGMNI,         /* # of message queue identifiers */
85                 MSGMNB,         /* max chars in a queue */
86                 MSGTQL,         /* max messages in system */
87                 MSGSSZ,         /* size of a message segment */
88                 		/* (must be small power of 2 greater than 4) */
89                 MSGSEG          /* number of message segments */
90 };
91 
92 /*
93  * macros to convert between msqid_ds's and msqid's.
94  * (specific to this implementation)
95  */
96 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
97 #define MSQID_IX(id)	((id) & 0xffff)
98 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
99 
100 /*
101  * The rest of this file is specific to this particular implementation.
102  */
103 
104 struct msgmap {
105 	short	next;		/* next segment in buffer */
106     				/* -1 -> available */
107     				/* 0..(MSGSEG-1) -> index of next segment */
108 };
109 
110 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
111 
112 static int nfree_msgmaps;	/* # of free map entries */
113 static short free_msgmaps;	/* head of linked list of free map entries */
114 static struct msg *free_msghdrs;/* list of free msg headers */
115 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
116 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
117 static struct msg *msghdrs;	/* MSGTQL msg headers */
118 static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
119 static struct lwkt_token msg_token = LWKT_TOKEN_INITIALIZER(msg_token);
120 
121 static void
msginit(void * dummy)122 msginit(void *dummy)
123 {
124 	int i;
125 
126 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
127 	msgpool = kmalloc(msginfo.msgmax, M_MSG, M_WAITOK);
128 	msgmaps = kmalloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
129 	msghdrs = kmalloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
130 	msqids = kmalloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
131 
132 	/*
133 	 * msginfo.msgssz should be a power of two for efficiency reasons.
134 	 * It is also pretty silly if msginfo.msgssz is less than 8
135 	 * or greater than about 256 so ...
136 	 */
137 
138 	i = 8;
139 	while (i < 1024 && i != msginfo.msgssz)
140 		i <<= 1;
141     	if (i != msginfo.msgssz) {
142 		kprintf("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
143 		    msginfo.msgssz);
144 		panic("msginfo.msgssz not a small power of 2");
145 	}
146 
147 	if (msginfo.msgseg > 32767) {
148 		kprintf("msginfo.msgseg=%d\n", msginfo.msgseg);
149 		panic("msginfo.msgseg > 32767");
150 	}
151 
152 	for (i = 0; i < msginfo.msgseg; i++) {
153 		if (i > 0)
154 			msgmaps[i-1].next = i;
155 		msgmaps[i].next = -1;	/* implies entry is available */
156 	}
157 	free_msgmaps = 0;
158 	nfree_msgmaps = msginfo.msgseg;
159 
160 	for (i = 0; i < msginfo.msgtql; i++) {
161 		msghdrs[i].msg_type = 0;
162 		if (i > 0)
163 			msghdrs[i-1].msg_next = &msghdrs[i];
164 		msghdrs[i].msg_next = NULL;
165     	}
166 	free_msghdrs = &msghdrs[0];
167 
168 	for (i = 0; i < msginfo.msgmni; i++) {
169 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
170 		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
171 		msqids[i].msg_perm.mode = 0;
172 	}
173 }
174 SYSINIT(sysv_msg, SI_SUB_SYSV_MSG, SI_ORDER_FIRST, msginit, NULL);
175 
176 static void
msg_freehdr(struct msg * msghdr)177 msg_freehdr(struct msg *msghdr)
178 {
179 	while (msghdr->msg_ts > 0) {
180 		short next;
181 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
182 			panic("msghdr->msg_spot out of range");
183 		next = msgmaps[msghdr->msg_spot].next;
184 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
185 		free_msgmaps = msghdr->msg_spot;
186 		nfree_msgmaps++;
187 		msghdr->msg_spot = next;
188 		if (msghdr->msg_ts >= msginfo.msgssz)
189 			msghdr->msg_ts -= msginfo.msgssz;
190 		else
191 			msghdr->msg_ts = 0;
192 	}
193 	if (msghdr->msg_spot != -1)
194 		panic("msghdr->msg_spot != -1");
195 	msghdr->msg_next = free_msghdrs;
196 	free_msghdrs = msghdr;
197 }
198 
199 /*
200  * MPALMOSTSAFE
201  */
202 int
sys_msgctl(struct sysmsg * sysmsg,const struct msgctl_args * uap)203 sys_msgctl(struct sysmsg *sysmsg, const struct msgctl_args *uap)
204 {
205 	struct thread *td = curthread;
206 	struct proc *p = td->td_proc;
207 	struct prison *pr = p->p_ucred->cr_prison;
208 	int msqid = uap->msqid;
209 	int cmd = uap->cmd;
210 	struct msqid_ds *user_msqptr = uap->buf;
211 	int rval, eval;
212 	struct msqid_ds msqbuf;
213 	struct msqid_ds *msqptr;
214 
215 #ifdef MSG_DEBUG_OK
216 	kprintf("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr);
217 #endif
218 
219 	if (pr && !PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_SYS_SYSVIPC))
220 		return (ENOSYS);
221 
222 	lwkt_gettoken(&msg_token);
223 	msqid = IPCID_TO_IX(msqid);
224 
225 	if (msqid < 0 || msqid >= msginfo.msgmni) {
226 #ifdef MSG_DEBUG_OK
227 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
228 		    msginfo.msgmni);
229 #endif
230 		eval = EINVAL;
231 		goto done;
232 	}
233 
234 	msqptr = &msqids[msqid];
235 
236 	if (msqptr->msg_qbytes == 0) {
237 #ifdef MSG_DEBUG_OK
238 		kprintf("no such msqid\n");
239 #endif
240 		eval = EINVAL;
241 		goto done;
242 	}
243 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
244 #ifdef MSG_DEBUG_OK
245 		kprintf("wrong sequence number\n");
246 #endif
247 		eval = EINVAL;
248 		goto done;
249 	}
250 
251 	rval = 0;
252 
253 	switch (cmd) {
254 	case IPC_RMID:
255 	{
256 		struct msg *msghdr;
257 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)) != 0)
258 			break;
259 		/* Free the message headers */
260 		msghdr = msqptr->msg_first;
261 		while (msghdr != NULL) {
262 			struct msg *msghdr_tmp;
263 
264 			/* Free the segments of each message */
265 			msqptr->msg_cbytes -= msghdr->msg_ts;
266 			msqptr->msg_qnum--;
267 			msghdr_tmp = msghdr;
268 			msghdr = msghdr->msg_next;
269 			msg_freehdr(msghdr_tmp);
270 		}
271 
272 		if (msqptr->msg_cbytes != 0)
273 			panic("msg_cbytes is screwed up");
274 		if (msqptr->msg_qnum != 0)
275 			panic("msg_qnum is screwed up");
276 
277 		msqptr->msg_qbytes = 0;	/* Mark it as free */
278 
279 		wakeup((caddr_t)msqptr);
280 	}
281 
282 		break;
283 
284 	case IPC_SET:
285 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_M)) != 0)
286 			break;
287 		if ((eval = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
288 			break;
289 		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
290 			eval = caps_priv_check_td(td, SYSCAP_RESTRICTEDROOT);
291 			if (eval)
292 				break;
293 		}
294 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
295 #ifdef MSG_DEBUG_OK
296 			kprintf("can't increase msg_qbytes beyond %d (truncating)\n",
297 			    msginfo.msgmnb);
298 #endif
299 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
300 		}
301 		if (msqbuf.msg_qbytes == 0) {
302 #ifdef MSG_DEBUG_OK
303 			kprintf("can't reduce msg_qbytes to 0\n");
304 #endif
305 			eval = EINVAL;		/* non-standard errno! */
306 			break;
307 		}
308 		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
309 		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
310 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
311 					(msqbuf.msg_perm.mode & 0777);
312 		msqptr->msg_qbytes = msqbuf.msg_qbytes;
313 		msqptr->msg_ctime = time_second;
314 		break;
315 
316 	case IPC_STAT:
317 		if ((eval = ipcperm(p, &msqptr->msg_perm, IPC_R))) {
318 #ifdef MSG_DEBUG_OK
319 			kprintf("requester doesn't have read access\n");
320 #endif
321 			eval = EINVAL;
322 			break;
323 		}
324 		eval = copyout(msqptr, user_msqptr, sizeof(struct msqid_ds));
325 		break;
326 
327 	default:
328 #ifdef MSG_DEBUG_OK
329 		kprintf("invalid command %d\n", cmd);
330 #endif
331 		eval = EINVAL;
332 		break;
333 	}
334 done:
335 	lwkt_reltoken(&msg_token);
336 	if (eval == 0)
337 		sysmsg->sysmsg_result = rval;
338 	return(eval);
339 }
340 
341 /*
342  * MPALMOSTSAFE
343  */
344 int
sys_msgget(struct sysmsg * sysmsg,const struct msgget_args * uap)345 sys_msgget(struct sysmsg *sysmsg, const struct msgget_args *uap)
346 {
347 	struct thread *td = curthread;
348 	struct prison *pr = td->td_proc->p_ucred->cr_prison;
349 	int msqid, eval;
350 	int key = uap->key;
351 	int msgflg = uap->msgflg;
352 	struct ucred *cred = td->td_ucred;
353 	struct msqid_ds *msqptr = NULL;
354 
355 #ifdef MSG_DEBUG_OK
356 	kprintf("msgget(0x%x, 0%o)\n", key, msgflg);
357 #endif
358 	if (pr && !PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_SYS_SYSVIPC))
359 		return (ENOSYS);
360 
361 	eval = 0;
362 	lwkt_gettoken(&msg_token);
363 
364 	if (key != IPC_PRIVATE) {
365 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
366 			msqptr = &msqids[msqid];
367 			if (msqptr->msg_qbytes != 0 &&
368 			    msqptr->msg_perm.key == key)
369 				break;
370 		}
371 		if (msqid < msginfo.msgmni) {
372 #ifdef MSG_DEBUG_OK
373 			kprintf("found public key\n");
374 #endif
375 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
376 #ifdef MSG_DEBUG_OK
377 				kprintf("not exclusive\n");
378 #endif
379 				eval = EEXIST;
380 				goto done;
381 			}
382 			if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, msgflg & 0700 ))) {
383 #ifdef MSG_DEBUG_OK
384 				kprintf("requester doesn't have 0%o access\n",
385 				    msgflg & 0700);
386 #endif
387 				goto done;
388 			}
389 			goto done;
390 		}
391 	}
392 
393 #ifdef MSG_DEBUG_OK
394 	kprintf("need to allocate the msqid_ds\n");
395 #endif
396 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
397 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
398 			/*
399 			 * Look for an unallocated and unlocked msqid_ds.
400 			 * msqid_ds's can be locked by msgsnd or msgrcv while
401 			 * they are copying the message in/out.  We can't
402 			 * re-use the entry until they release it.
403 			 */
404 			msqptr = &msqids[msqid];
405 			if (msqptr->msg_qbytes == 0 &&
406 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
407 				break;
408 		}
409 		if (msqid == msginfo.msgmni) {
410 #ifdef MSG_DEBUG_OK
411 			kprintf("no more msqid_ds's available\n");
412 #endif
413 			eval = ENOSPC;
414 			goto done;
415 		}
416 #ifdef MSG_DEBUG_OK
417 		kprintf("msqid %d is available\n", msqid);
418 #endif
419 		msqptr->msg_perm.key = key;
420 		msqptr->msg_perm.cuid = cred->cr_uid;
421 		msqptr->msg_perm.uid = cred->cr_uid;
422 		msqptr->msg_perm.cgid = cred->cr_gid;
423 		msqptr->msg_perm.gid = cred->cr_gid;
424 		msqptr->msg_perm.mode = (msgflg & 0777);
425 		/* Make sure that the returned msqid is unique */
426 		msqptr->msg_perm.seq = (msqptr->msg_perm.seq + 1) & 0x7fff;
427 		msqptr->msg_first = NULL;
428 		msqptr->msg_last = NULL;
429 		msqptr->msg_cbytes = 0;
430 		msqptr->msg_qnum = 0;
431 		msqptr->msg_qbytes = msginfo.msgmnb;
432 		msqptr->msg_lspid = 0;
433 		msqptr->msg_lrpid = 0;
434 		msqptr->msg_stime = 0;
435 		msqptr->msg_rtime = 0;
436 		msqptr->msg_ctime = time_second;
437 	} else {
438 #ifdef MSG_DEBUG_OK
439 		kprintf("didn't find it and wasn't asked to create it\n");
440 #endif
441 		eval = ENOENT;
442 	}
443 
444 done:
445 	lwkt_reltoken(&msg_token);
446 	/* Construct the unique msqid */
447 	if (eval == 0)
448 		sysmsg->sysmsg_result = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
449 	return(eval);
450 }
451 
452 /*
453  * MPALMOSTSAFE
454  */
455 int
sys_msgsnd(struct sysmsg * sysmsg,const struct msgsnd_args * uap)456 sys_msgsnd(struct sysmsg *sysmsg, const struct msgsnd_args *uap)
457 {
458 	struct thread *td = curthread;
459 	struct prison *pr = td->td_proc->p_ucred->cr_prison;
460 	int msqid = uap->msqid;
461 	const void *user_msgp = uap->msgp;
462 	size_t msgsz = uap->msgsz;
463 	int msgflg = uap->msgflg;
464 	int segs_needed, eval;
465 	struct msqid_ds *msqptr;
466 	struct msg *msghdr;
467 	short next;
468 
469 #ifdef MSG_DEBUG_OK
470 	kprintf("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
471 	    msgflg);
472 #endif
473 
474 	if (pr && !PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_SYS_SYSVIPC))
475 		return (ENOSYS);
476 
477 	lwkt_gettoken(&msg_token);
478 	msqid = IPCID_TO_IX(msqid);
479 
480 	if (msqid < 0 || msqid >= msginfo.msgmni) {
481 #ifdef MSG_DEBUG_OK
482 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
483 		    msginfo.msgmni);
484 #endif
485 		eval = EINVAL;
486 		goto done;
487 	}
488 
489 	msqptr = &msqids[msqid];
490 	if (msqptr->msg_qbytes == 0) {
491 #ifdef MSG_DEBUG_OK
492 		kprintf("no such message queue id\n");
493 #endif
494 		eval = EINVAL;
495 		goto done;
496 	}
497 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
498 #ifdef MSG_DEBUG_OK
499 		kprintf("wrong sequence number\n");
500 #endif
501 		eval = EINVAL;
502 		goto done;
503 	}
504 
505 	if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, IPC_W))) {
506 #ifdef MSG_DEBUG_OK
507 		kprintf("requester doesn't have write access\n");
508 #endif
509 		eval = EINVAL;
510 		goto done;
511 	}
512 
513 	segs_needed = howmany(msgsz, msginfo.msgssz);
514 #ifdef MSG_DEBUG_OK
515 	kprintf("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
516 	    segs_needed);
517 #endif
518 	for (;;) {
519 		int need_more_resources = 0;
520 
521 		/*
522 		 * check msgsz
523 		 * (inside this loop in case msg_qbytes changes while we sleep)
524 		 */
525 
526 		if (msgsz > msqptr->msg_qbytes) {
527 #ifdef MSG_DEBUG_OK
528 			kprintf("msgsz > msqptr->msg_qbytes\n");
529 #endif
530 			eval = EINVAL;
531 			goto done;
532 		}
533 
534 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
535 #ifdef MSG_DEBUG_OK
536 			kprintf("msqid is locked\n");
537 #endif
538 			need_more_resources = 1;
539 		}
540 		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
541 #ifdef MSG_DEBUG_OK
542 			kprintf("msgsz + msg_cbytes > msg_qbytes\n");
543 #endif
544 			need_more_resources = 1;
545 		}
546 		if (segs_needed > nfree_msgmaps) {
547 #ifdef MSG_DEBUG_OK
548 			kprintf("segs_needed > nfree_msgmaps\n");
549 #endif
550 			need_more_resources = 1;
551 		}
552 		if (free_msghdrs == NULL) {
553 #ifdef MSG_DEBUG_OK
554 			kprintf("no more msghdrs\n");
555 #endif
556 			need_more_resources = 1;
557 		}
558 
559 		if (need_more_resources) {
560 			int we_own_it;
561 
562 			if ((msgflg & IPC_NOWAIT) != 0) {
563 #ifdef MSG_DEBUG_OK
564 				kprintf("need more resources but caller doesn't want to wait\n");
565 #endif
566 				eval = EAGAIN;
567 				goto done;
568 			}
569 
570 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
571 #ifdef MSG_DEBUG_OK
572 				kprintf("we don't own the msqid_ds\n");
573 #endif
574 				we_own_it = 0;
575 			} else {
576 				/* Force later arrivals to wait for our
577 				   request */
578 #ifdef MSG_DEBUG_OK
579 				kprintf("we own the msqid_ds\n");
580 #endif
581 				msqptr->msg_perm.mode |= MSG_LOCKED;
582 				we_own_it = 1;
583 			}
584 #ifdef MSG_DEBUG_OK
585 			kprintf("goodnight\n");
586 #endif
587 			eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
588 #ifdef MSG_DEBUG_OK
589 			kprintf("good morning, eval=%d\n", eval);
590 #endif
591 			if (we_own_it)
592 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
593 			if (eval != 0) {
594 #ifdef MSG_DEBUG_OK
595 				kprintf("msgsnd:  interrupted system call\n");
596 #endif
597 				eval = EINTR;
598 				goto done;
599 			}
600 
601 			/*
602 			 * Make sure that the msq queue still exists
603 			 */
604 
605 			if (msqptr->msg_qbytes == 0) {
606 #ifdef MSG_DEBUG_OK
607 				kprintf("msqid deleted\n");
608 #endif
609 				eval = EIDRM;
610 				goto done;
611 			}
612 
613 		} else {
614 #ifdef MSG_DEBUG_OK
615 			kprintf("got all the resources that we need\n");
616 #endif
617 			break;
618 		}
619 	}
620 
621 	/*
622 	 * We have the resources that we need.
623 	 * Make sure!
624 	 */
625 
626 	if (msqptr->msg_perm.mode & MSG_LOCKED)
627 		panic("msg_perm.mode & MSG_LOCKED");
628 	if (segs_needed > nfree_msgmaps)
629 		panic("segs_needed > nfree_msgmaps");
630 	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
631 		panic("msgsz + msg_cbytes > msg_qbytes");
632 	if (free_msghdrs == NULL)
633 		panic("no more msghdrs");
634 
635 	/*
636 	 * Re-lock the msqid_ds in case we page-fault when copying in the
637 	 * message
638 	 */
639 
640 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
641 		panic("msqid_ds is already locked");
642 	msqptr->msg_perm.mode |= MSG_LOCKED;
643 
644 	/*
645 	 * Allocate a message header
646 	 */
647 
648 	msghdr = free_msghdrs;
649 	free_msghdrs = msghdr->msg_next;
650 	msghdr->msg_spot = -1;
651 	msghdr->msg_ts = msgsz;
652 
653 	/*
654 	 * Allocate space for the message
655 	 */
656 
657 	while (segs_needed > 0) {
658 		if (nfree_msgmaps <= 0)
659 			panic("not enough msgmaps");
660 		if (free_msgmaps == -1)
661 			panic("nil free_msgmaps");
662 		next = free_msgmaps;
663 		if (next <= -1)
664 			panic("next too low #1");
665 		if (next >= msginfo.msgseg)
666 			panic("next out of range #1");
667 #ifdef MSG_DEBUG_OK
668 		kprintf("allocating segment %d to message\n", next);
669 #endif
670 		free_msgmaps = msgmaps[next].next;
671 		nfree_msgmaps--;
672 		msgmaps[next].next = msghdr->msg_spot;
673 		msghdr->msg_spot = next;
674 		segs_needed--;
675 	}
676 
677 	/*
678 	 * Copy in the message type
679 	 */
680 
681 	if ((eval = copyin(user_msgp, &msghdr->msg_type,
682 	    sizeof(msghdr->msg_type))) != 0) {
683 #ifdef MSG_DEBUG_OK
684 		kprintf("error %d copying the message type\n", eval);
685 #endif
686 		msg_freehdr(msghdr);
687 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
688 		wakeup((caddr_t)msqptr);
689 		goto done;
690 	}
691 	user_msgp = (const char *)user_msgp + sizeof(msghdr->msg_type);
692 
693 	/*
694 	 * Validate the message type
695 	 */
696 
697 	if (msghdr->msg_type < 1) {
698 		msg_freehdr(msghdr);
699 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
700 		wakeup((caddr_t)msqptr);
701 #ifdef MSG_DEBUG_OK
702 		kprintf("mtype (%d) < 1\n", msghdr->msg_type);
703 #endif
704 		eval = EINVAL;
705 		goto done;
706 	}
707 
708 	/*
709 	 * Copy in the message body
710 	 */
711 
712 	next = msghdr->msg_spot;
713 	while (msgsz > 0) {
714 		size_t tlen;
715 		if (msgsz > msginfo.msgssz)
716 			tlen = msginfo.msgssz;
717 		else
718 			tlen = msgsz;
719 		if (next <= -1)
720 			panic("next too low #2");
721 		if (next >= msginfo.msgseg)
722 			panic("next out of range #2");
723 		if ((eval = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
724 		    tlen)) != 0) {
725 #ifdef MSG_DEBUG_OK
726 			kprintf("error %d copying in message segment\n", eval);
727 #endif
728 			msg_freehdr(msghdr);
729 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
730 			wakeup((caddr_t)msqptr);
731 			goto done;
732 		}
733 		msgsz -= tlen;
734 		user_msgp = (const char *)user_msgp + tlen;
735 		next = msgmaps[next].next;
736 	}
737 	if (next != -1)
738 		panic("didn't use all the msg segments");
739 
740 	/*
741 	 * We've got the message.  Unlock the msqid_ds.
742 	 */
743 
744 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
745 
746 	/*
747 	 * Make sure that the msqid_ds is still allocated.
748 	 */
749 
750 	if (msqptr->msg_qbytes == 0) {
751 		msg_freehdr(msghdr);
752 		wakeup((caddr_t)msqptr);
753 		eval = EIDRM;
754 		goto done;
755 	}
756 
757 	/*
758 	 * Put the message into the queue
759 	 */
760 
761 	if (msqptr->msg_first == NULL) {
762 		msqptr->msg_first = msghdr;
763 		msqptr->msg_last = msghdr;
764 	} else {
765 		msqptr->msg_last->msg_next = msghdr;
766 		msqptr->msg_last = msghdr;
767 	}
768 	msqptr->msg_last->msg_next = NULL;
769 
770 	msqptr->msg_cbytes += msghdr->msg_ts;
771 	msqptr->msg_qnum++;
772 	msqptr->msg_lspid = td->td_proc->p_pid;
773 	msqptr->msg_stime = time_second;
774 
775 	wakeup((caddr_t)msqptr);
776 	eval = 0;
777 done:
778 	lwkt_reltoken(&msg_token);
779 	if (eval == 0)
780 		sysmsg->sysmsg_result = 0;
781 	return (eval);
782 }
783 
784 /*
785  * MPALMOSTSAFE
786  */
787 int
sys_msgrcv(struct sysmsg * sysmsg,const struct msgrcv_args * uap)788 sys_msgrcv(struct sysmsg *sysmsg, const struct msgrcv_args *uap)
789 {
790 	struct thread *td = curthread;
791 	struct prison *pr = td->td_proc->p_ucred->cr_prison;
792 	int msqid = uap->msqid;
793 	void *user_msgp = uap->msgp;
794 	size_t msgsz = uap->msgsz;
795 	long msgtyp = uap->msgtyp;
796 	int msgflg = uap->msgflg;
797 	size_t len;
798 	struct msqid_ds *msqptr;
799 	struct msg *msghdr;
800 	int eval;
801 	short next;
802 
803 #ifdef MSG_DEBUG_OK
804 	kprintf("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
805 	    msgsz, msgtyp, msgflg);
806 #endif
807 
808 	if (pr && !PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_SYS_SYSVIPC))
809 		return (ENOSYS);
810 
811 	lwkt_gettoken(&msg_token);
812 	msqid = IPCID_TO_IX(msqid);
813 
814 	if (msqid < 0 || msqid >= msginfo.msgmni) {
815 #ifdef MSG_DEBUG_OK
816 		kprintf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
817 		    msginfo.msgmni);
818 #endif
819 		eval = EINVAL;
820 		goto done;
821 	}
822 
823 	msqptr = &msqids[msqid];
824 	if (msqptr->msg_qbytes == 0) {
825 #ifdef MSG_DEBUG_OK
826 		kprintf("no such message queue id\n");
827 #endif
828 		eval = EINVAL;
829 		goto done;
830 	}
831 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
832 #ifdef MSG_DEBUG_OK
833 		kprintf("wrong sequence number\n");
834 #endif
835 		eval = EINVAL;
836 		goto done;
837 	}
838 
839 	if ((eval = ipcperm(td->td_proc, &msqptr->msg_perm, IPC_R))) {
840 #ifdef MSG_DEBUG_OK
841 		kprintf("requester doesn't have read access\n");
842 #endif
843 		goto done;
844 	}
845 
846 	msghdr = NULL;
847 	while (msghdr == NULL) {
848 		if (msgtyp == 0) {
849 			msghdr = msqptr->msg_first;
850 			if (msghdr != NULL) {
851 				if (msgsz < msghdr->msg_ts &&
852 				    (msgflg & MSG_NOERROR) == 0) {
853 #ifdef MSG_DEBUG_OK
854 					kprintf("first message on the queue is too big (want %d, got %d)\n",
855 					    msgsz, msghdr->msg_ts);
856 #endif
857 					eval = E2BIG;
858 					goto done;
859 				}
860 				if (msqptr->msg_first == msqptr->msg_last) {
861 					msqptr->msg_first = NULL;
862 					msqptr->msg_last = NULL;
863 				} else {
864 					msqptr->msg_first = msghdr->msg_next;
865 					if (msqptr->msg_first == NULL)
866 						panic("msg_first/last screwed up #1");
867 				}
868 			}
869 		} else {
870 			struct msg *previous;
871 			struct msg **prev;
872 
873 			previous = NULL;
874 			prev = &(msqptr->msg_first);
875 			while ((msghdr = *prev) != NULL) {
876 				/*
877 				 * Is this message's type an exact match or is
878 				 * this message's type less than or equal to
879 				 * the absolute value of a negative msgtyp?
880 				 * Note that the second half of this test can
881 				 * NEVER be true if msgtyp is positive since
882 				 * msg_type is always positive!
883 				 */
884 
885 				if (msgtyp == msghdr->msg_type ||
886 				    msghdr->msg_type <= -msgtyp) {
887 #ifdef MSG_DEBUG_OK
888 					kprintf("found message type %d, requested %d\n",
889 					    msghdr->msg_type, msgtyp);
890 #endif
891 					if (msgsz < msghdr->msg_ts &&
892 					    (msgflg & MSG_NOERROR) == 0) {
893 #ifdef MSG_DEBUG_OK
894 						kprintf("requested message on the queue is too big (want %d, got %d)\n",
895 						    msgsz, msghdr->msg_ts);
896 #endif
897 						eval = E2BIG;
898 						goto done;
899 					}
900 					*prev = msghdr->msg_next;
901 					if (msghdr == msqptr->msg_last) {
902 						if (previous == NULL) {
903 							if (prev !=
904 							    &msqptr->msg_first)
905 								panic("msg_first/last screwed up #2");
906 							msqptr->msg_first =
907 							    NULL;
908 							msqptr->msg_last =
909 							    NULL;
910 						} else {
911 							if (prev ==
912 							    &msqptr->msg_first)
913 								panic("msg_first/last screwed up #3");
914 							msqptr->msg_last =
915 							    previous;
916 						}
917 					}
918 					break;
919 				}
920 				previous = msghdr;
921 				prev = &(msghdr->msg_next);
922 			}
923 		}
924 
925 		/*
926 		 * We've either extracted the msghdr for the appropriate
927 		 * message or there isn't one.
928 		 * If there is one then bail out of this loop.
929 		 */
930 
931 		if (msghdr != NULL)
932 			break;
933 
934 		/*
935 		 * Hmph!  No message found.  Does the user want to wait?
936 		 */
937 
938 		if ((msgflg & IPC_NOWAIT) != 0) {
939 #ifdef MSG_DEBUG_OK
940 			kprintf("no appropriate message found (msgtyp=%d)\n",
941 			    msgtyp);
942 #endif
943 			/* The SVID says to return ENOMSG. */
944 #ifdef ENOMSG
945 			eval = ENOMSG;
946 #else
947 			/* Unfortunately, BSD doesn't define that code yet! */
948 			eval = EAGAIN;
949 #endif
950 			goto done;
951 		}
952 
953 		/*
954 		 * Wait for something to happen
955 		 */
956 
957 #ifdef MSG_DEBUG_OK
958 		kprintf("msgrcv:  goodnight\n");
959 #endif
960 		eval = tsleep((caddr_t)msqptr, PCATCH, "msgwait", 0);
961 #ifdef MSG_DEBUG_OK
962 		kprintf("msgrcv:  good morning (eval=%d)\n", eval);
963 #endif
964 
965 		if (eval != 0) {
966 #ifdef MSG_DEBUG_OK
967 			kprintf("msgsnd:  interrupted system call\n");
968 #endif
969 			eval = EINTR;
970 			goto done;
971 		}
972 
973 		/*
974 		 * Make sure that the msq queue still exists
975 		 */
976 
977 		if (msqptr->msg_qbytes == 0 ||
978 		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
979 #ifdef MSG_DEBUG_OK
980 			kprintf("msqid deleted\n");
981 #endif
982 			eval = EIDRM;
983 			goto done;
984 		}
985 	}
986 
987 	/*
988 	 * Return the message to the user.
989 	 *
990 	 * First, do the bookkeeping (before we risk being interrupted).
991 	 */
992 
993 	msqptr->msg_cbytes -= msghdr->msg_ts;
994 	msqptr->msg_qnum--;
995 	msqptr->msg_lrpid = td->td_proc->p_pid;
996 	msqptr->msg_rtime = time_second;
997 
998 	/*
999 	 * Make msgsz the actual amount that we'll be returning.
1000 	 * Note that this effectively truncates the message if it is too long
1001 	 * (since msgsz is never increased).
1002 	 */
1003 
1004 #ifdef MSG_DEBUG_OK
1005 	kprintf("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1006 	    msghdr->msg_ts);
1007 #endif
1008 	if (msgsz > msghdr->msg_ts)
1009 		msgsz = msghdr->msg_ts;
1010 
1011 	/*
1012 	 * Return the type to the user.
1013 	 */
1014 
1015 	eval = copyout((caddr_t)&(msghdr->msg_type), user_msgp,
1016 	    sizeof(msghdr->msg_type));
1017 	if (eval != 0) {
1018 #ifdef MSG_DEBUG_OK
1019 		kprintf("error (%d) copying out message type\n", eval);
1020 #endif
1021 		msg_freehdr(msghdr);
1022 		wakeup((caddr_t)msqptr);
1023 		goto done;
1024 	}
1025 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1026 
1027 	/*
1028 	 * Return the segments to the user
1029 	 */
1030 
1031 	next = msghdr->msg_spot;
1032 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1033 		size_t tlen;
1034 
1035 		if (msgsz - len > msginfo.msgssz)
1036 			tlen = msginfo.msgssz;
1037 		else
1038 			tlen = msgsz - len;
1039 		if (next <= -1)
1040 			panic("next too low #3");
1041 		if (next >= msginfo.msgseg)
1042 			panic("next out of range #3");
1043 		eval = copyout((caddr_t)&msgpool[next * msginfo.msgssz],
1044 		    user_msgp, tlen);
1045 		if (eval != 0) {
1046 #ifdef MSG_DEBUG_OK
1047 			kprintf("error (%d) copying out message segment\n",
1048 			    eval);
1049 #endif
1050 			msg_freehdr(msghdr);
1051 			wakeup((caddr_t)msqptr);
1052 			goto done;
1053 		}
1054 		user_msgp = (char *)user_msgp + tlen;
1055 		next = msgmaps[next].next;
1056 	}
1057 
1058 	/*
1059 	 * Done, return the actual number of bytes copied out.
1060 	 */
1061 
1062 	msg_freehdr(msghdr);
1063 	wakeup((caddr_t)msqptr);
1064 	eval = 0;
1065 done:
1066 	lwkt_reltoken(&msg_token);
1067 	if (eval == 0)
1068 		sysmsg->sysmsg_result = msgsz;
1069 	return(eval);
1070 }
1071 
1072 static int
sysctl_msqids(SYSCTL_HANDLER_ARGS)1073 sysctl_msqids(SYSCTL_HANDLER_ARGS)
1074 {
1075 	return (SYSCTL_OUT(req, msqids,
1076 		sizeof(struct msqid_ds) * msginfo.msgmni));
1077 }
1078 
1079 TUNABLE_INT("kern.ipc.msgseg", &msginfo.msgseg);
1080 TUNABLE_INT("kern.ipc.msgssz", &msginfo.msgssz);
1081 TUNABLE_INT("kern.ipc.msgmni", &msginfo.msgmni);
1082 
1083 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
1084     "Max characters in message");
1085 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RD, &msginfo.msgmni, 0,
1086     "Max message queue identifiers");
1087 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0,
1088     "Max characters in message queue");
1089 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0,
1090     "Max SVID messages in system");
1091 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RD, &msginfo.msgssz, 0,
1092     "Power-of-two size of a message segment");
1093 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RD, &msginfo.msgseg, 0,
1094     "Number of message segments");
1095 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1096     NULL, 0, sysctl_msqids, "", "Message queue IDs");
1097