xref: /minix/minix/servers/ipc/sem.c (revision 045e0ed3)
1 #include "inc.h"
2 
3 struct sem_struct;
4 
5 /* IPC-server process table, currently used for semaphores only. */
6 struct iproc {
7 	struct sem_struct *ip_sem;	/* affected semaphore set, or NULL */
8 	struct sembuf *ip_sops;		/* pending operations (malloc'ed) */
9 	unsigned int ip_nsops;		/* number of pending operations */
10 	struct sembuf *ip_blkop;	/* pointer to operation that blocked */
11 	endpoint_t ip_endpt;		/* process endpoint */
12 	pid_t ip_pid;			/* process PID */
13 	TAILQ_ENTRY(iproc) ip_next;	/* next waiting process */
14 } iproc[NR_PROCS];
15 
16 struct semaphore {
17 	unsigned short semval;		/* semaphore value */
18 	unsigned short semzcnt;		/* # waiting for zero */
19 	unsigned short semncnt;		/* # waiting for increase */
20 	pid_t sempid;			/* process that did last op */
21 };
22 
23 /*
24  * For the list of waiting processes, we use a doubly linked tail queue.  In
25  * order to maintain a basic degree of fairness, we keep the pending processes
26  * in FCFS (well, at least first tested) order, which means we need to be able
27  * to add new processes at the end of the list.  In order to remove waiting
28  * processes O(1) instead of O(n) we need a doubly linked list; in the common
29  * case we do have the element's predecessor, but STAILQ_REMOVE is O(n) anyway
30  * and NetBSD has no STAILQ_REMOVE_AFTER yet.
31  *
32  * We use one list per semaphore set: semop(2) affects only one semaphore set,
33  * but it may involve operations on multiple semaphores within the set.  While
34  * it is possible to recheck only semaphores that were affected by a particular
35  * operation, and associate waiting lists to individual semaphores, the number
36  * of expected waiting processes is currently not high enough to justify the
37  * extra complexity of such an implementation.
38  */
39 struct sem_struct {
40 	struct semid_ds semid_ds;
41 	struct semaphore sems[SEMMSL];
42 	TAILQ_HEAD(waiters, iproc) waiters;
43 };
44 
45 static struct sem_struct sem_list[SEMMNI];
46 static unsigned int sem_list_nr = 0; /* highest in-use slot number plus one */
47 
48 /*
49  * Find a semaphore set by key.  The given key must not be IPC_PRIVATE.  Return
50  * a pointer to the semaphore set if found, or NULL otherwise.
51  */
52 static struct sem_struct *
53 sem_find_key(key_t key)
54 {
55 	unsigned int i;
56 
57 	for (i = 0; i < sem_list_nr; i++) {
58 		if (!(sem_list[i].semid_ds.sem_perm.mode & SEM_ALLOC))
59 			continue;
60 		if (sem_list[i].semid_ds.sem_perm._key == key)
61 			return &sem_list[i];
62 	}
63 
64 	return NULL;
65 }
66 
67 /*
68  * Find a semaphore set by identifier.  Return a pointer to the semaphore set
69  * if found, or NULL otherwise.
70  */
71 static struct sem_struct *
72 sem_find_id(int id)
73 {
74 	struct sem_struct *sem;
75 	unsigned int i;
76 
77 	i = IPCID_TO_IX(id);
78 	if (i >= sem_list_nr)
79 		return NULL;
80 
81 	sem = &sem_list[i];
82 	if (!(sem->semid_ds.sem_perm.mode & SEM_ALLOC))
83 		return NULL;
84 	if (sem->semid_ds.sem_perm._seq != IPCID_TO_SEQ(id))
85 		return NULL;
86 	return sem;
87 }
88 
89 /*
90  * Implementation of the semget(2) system call.
91  */
92 int
93 do_semget(message * m)
94 {
95 	struct sem_struct *sem;
96 	unsigned int i, seq;
97 	key_t key;
98 	int nsems, flag;
99 
100 	key = m->m_lc_ipc_semget.key;
101 	nsems = m->m_lc_ipc_semget.nr;
102 	flag = m->m_lc_ipc_semget.flag;
103 
104 	if (key != IPC_PRIVATE && (sem = sem_find_key(key)) != NULL) {
105 		if ((flag & IPC_CREAT) && (flag & IPC_EXCL))
106 			return EEXIST;
107 		if (!check_perm(&sem->semid_ds.sem_perm, m->m_source, flag))
108 			return EACCES;
109 		if (nsems > sem->semid_ds.sem_nsems)
110 			return EINVAL;
111 		i = sem - sem_list;
112 	} else {
113 		if (key != IPC_PRIVATE && !(flag & IPC_CREAT))
114 			return ENOENT;
115 		if (nsems <= 0 || nsems > SEMMSL)
116 			return EINVAL;
117 
118 		/* Find a free entry. */
119 		for (i = 0; i < __arraycount(sem_list); i++)
120 			if (!(sem_list[i].semid_ds.sem_perm.mode & SEM_ALLOC))
121 				break;
122 		if (i == __arraycount(sem_list))
123 			return ENOSPC;
124 
125 		/* Initialize the entry. */
126 		sem = &sem_list[i];
127 		seq = sem->semid_ds.sem_perm._seq;
128 		memset(sem, 0, sizeof(*sem));
129 		sem->semid_ds.sem_perm._key = key;
130 		sem->semid_ds.sem_perm.cuid =
131 		    sem->semid_ds.sem_perm.uid = getnuid(m->m_source);
132 		sem->semid_ds.sem_perm.cgid =
133 		    sem->semid_ds.sem_perm.gid = getngid(m->m_source);
134 		sem->semid_ds.sem_perm.mode = SEM_ALLOC | (flag & ACCESSPERMS);
135 		sem->semid_ds.sem_perm._seq = (seq + 1) & 0x7fff;
136 		sem->semid_ds.sem_nsems = nsems;
137 		sem->semid_ds.sem_otime = 0;
138 		sem->semid_ds.sem_ctime = clock_time(NULL);
139 		TAILQ_INIT(&sem->waiters);
140 
141 		assert(i <= sem_list_nr);
142 		if (i == sem_list_nr) {
143 			/*
144 			 * If no semaphore sets were allocated before,
145 			 * subscribe to process events now.
146 			 */
147 			if (sem_list_nr == 0)
148 				update_sem_sub(TRUE /*want_events*/);
149 
150 			sem_list_nr++;
151 		}
152 	}
153 
154 	m->m_lc_ipc_semget.retid = IXSEQ_TO_IPCID(i, sem->semid_ds.sem_perm);
155 	return OK;
156 }
157 
158 /*
159  * Increase the proper suspension count (semncnt or semzcnt) of the semaphore
160  * on which the given process is blocked.
161  */
162 static void
163 inc_susp_count(struct iproc * ip)
164 {
165 	struct sembuf *blkop;
166 	struct semaphore *sp;
167 
168 	blkop = ip->ip_blkop;
169 	sp = &ip->ip_sem->sems[blkop->sem_num];
170 
171 	if (blkop->sem_op != 0) {
172 		assert(sp->semncnt < USHRT_MAX);
173 		sp->semncnt++;
174 	} else {
175 		assert(sp->semncnt < USHRT_MAX);
176 		sp->semzcnt++;
177 	}
178 }
179 
180 /*
181  * Decrease the proper suspension count (semncnt or semzcnt) of the semaphore
182  * on which the given process is blocked.
183  */
184 static void
185 dec_susp_count(struct iproc * ip)
186 {
187 	struct sembuf *blkop;
188 	struct semaphore *sp;
189 
190 	blkop = ip->ip_blkop;
191 	sp = &ip->ip_sem->sems[blkop->sem_num];
192 
193 	if (blkop->sem_op != 0) {
194 		assert(sp->semncnt > 0);
195 		sp->semncnt--;
196 	} else {
197 		assert(sp->semzcnt > 0);
198 		sp->semzcnt--;
199 	}
200 }
201 
202 /*
203  * Send a reply for a semop(2) call suspended earlier, thus waking up the
204  * process.
205  */
206 static void
207 send_reply(endpoint_t who, int ret)
208 {
209 	message m;
210 
211 	memset(&m, 0, sizeof(m));
212 	m.m_type = ret;
213 
214 	ipc_sendnb(who, &m);
215 }
216 
217 /*
218  * Satisfy or cancel the semop(2) call on which the given process is blocked,
219  * and send the given reply code (OK or a negative error code) to wake it up,
220  * unless the given code is EDONTREPLY.
221  */
222 static void
223 complete_semop(struct iproc * ip, int code)
224 {
225 	struct sem_struct *sem;
226 
227 	sem = ip->ip_sem;
228 
229 	assert(sem != NULL);
230 
231 	TAILQ_REMOVE(&sem->waiters, ip, ip_next);
232 
233 	dec_susp_count(ip);
234 
235 	assert(ip->ip_sops != NULL);
236 	free(ip->ip_sops);
237 
238 	ip->ip_sops = NULL;
239 	ip->ip_blkop = NULL;
240 	ip->ip_sem = NULL;
241 
242 	if (code != EDONTREPLY)
243 		send_reply(ip->ip_endpt, code);
244 }
245 
246 /*
247  * Free up the given semaphore set.  This includes cancelling any blocking
248  * semop(2) calls on any of its semaphores.
249  */
250 static void
251 remove_set(struct sem_struct * sem)
252 {
253 	struct iproc *ip;
254 
255 	/*
256 	 * Cancel all semop(2) operations on this semaphore set, with an EIDRM
257 	 * reply code.
258 	 */
259 	while (!TAILQ_EMPTY(&sem->waiters)) {
260 		ip = TAILQ_FIRST(&sem->waiters);
261 
262 		complete_semop(ip, EIDRM);
263 	}
264 
265 	/* Mark the entry as free. */
266 	sem->semid_ds.sem_perm.mode &= ~SEM_ALLOC;
267 
268 	/*
269 	 * This may have been the last in-use slot in the list.  Ensure that
270 	 * sem_list_nr again equals the highest in-use slot number plus one.
271 	 */
272 	while (sem_list_nr > 0 &&
273 	    !(sem_list[sem_list_nr - 1].semid_ds.sem_perm.mode & SEM_ALLOC))
274 		sem_list_nr--;
275 
276 	/*
277 	 * If this was our last semaphore set, unsubscribe from process events.
278 	 */
279 	if (sem_list_nr == 0)
280 		update_sem_sub(FALSE /*want_events*/);
281 }
282 
283 /*
284  * Try to perform a set of semaphore operations, as given by semop(2), on a
285  * semaphore set.  The entire action must be atomic, i.e., either succeed in
286  * its entirety or fail without making any changes.  Return OK on success, in
287  * which case the PIDs of all affected semaphores will be updated to the given
288  * 'pid' value, and the semaphore set's sem_otime will be updated as well.
289  * Return SUSPEND if the call should be suspended, in which case 'blkop' will
290  * be set to a pointer to the operation causing the call to block.  Return an
291  * error code if the call failed altogether.
292  */
293 static int
294 try_semop(struct sem_struct *sem, struct sembuf *sops, unsigned int nsops,
295 	pid_t pid, struct sembuf ** blkop)
296 {
297 	struct semaphore *sp;
298 	struct sembuf *op;
299 	unsigned int i;
300 	int r;
301 
302 	/*
303 	 * The operation must be processed atomically.  However, it must also
304 	 * be processed "in array order," which we assume to mean that while
305 	 * processing one operation, the changes of the previous operations
306 	 * must be taken into account.  This is relevant for cases where the
307 	 * same semaphore is referenced by more than one operation, for example
308 	 * to perform an atomic increase-if-zero action on a single semaphore.
309 	 * As a result, we must optimistically modify semaphore values and roll
310 	 * back on suspension or failure afterwards.
311 	 */
312 	r = OK;
313 	op = NULL;
314 	for (i = 0; i < nsops; i++) {
315 		sp = &sem->sems[sops[i].sem_num];
316 		op = &sops[i];
317 
318 		if (op->sem_op > 0) {
319 			if (SEMVMX - sp->semval < op->sem_op) {
320 				r = ERANGE;
321 				break;
322 			}
323 			sp->semval += op->sem_op;
324 		} else if (op->sem_op < 0) {
325 			/*
326 			 * No SEMVMX check; if the process wants to deadlock
327 			 * itself by supplying -SEMVMX it is free to do so..
328 			 */
329 			if ((int)sp->semval < -(int)op->sem_op) {
330 				r = (op->sem_flg & IPC_NOWAIT) ? EAGAIN :
331 				    SUSPEND;
332 				break;
333 			}
334 			sp->semval += op->sem_op;
335 		} else /* (op->sem_op == 0) */ {
336 			if (sp->semval != 0) {
337 				r = (op->sem_flg & IPC_NOWAIT) ? EAGAIN :
338 				    SUSPEND;
339 				break;
340 			}
341 		}
342 	}
343 
344 	/*
345 	 * If we did not go through all the operations, then either an error
346 	 * occurred or the user process is to be suspended.  In that case we
347 	 * must roll back any progress we have made so far, and return the
348 	 * operation that caused the call to block.
349 	 */
350 	if (i < nsops) {
351 		assert(op != NULL);
352 		*blkop = op;
353 
354 		/* Roll back all changes made so far. */
355 		while (i-- > 0)
356 			sem->sems[sops[i].sem_num].semval -= sops[i].sem_op;
357 
358 		assert(r != OK);
359 		return r;
360 	}
361 
362 	/*
363 	 * The operation has completed successfully.  Also update all affected
364 	 * semaphores' PID values, and the semaphore set's last-semop time.
365 	 * The caller must do everything else.
366 	 */
367 	for (i = 0; i < nsops; i++)
368 		sem->sems[sops[i].sem_num].sempid = pid;
369 
370 	sem->semid_ds.sem_otime = clock_time(NULL);
371 
372 	return OK;
373 }
374 
375 /*
376  * Check whether any blocked operations can now be satisfied on any of the
377  * semaphores in the given semaphore set.  Do this repeatedly as necessary, as
378  * any unblocked operation may in turn allow other operations to be resumed.
379  */
380 static void
381 check_set(struct sem_struct * sem)
382 {
383 	struct iproc *ip, *nextip;
384 	struct sembuf *blkop;
385 	int r, woken_up;
386 
387 	/*
388 	 * Go through all the waiting processes in FIFO order, which is our
389 	 * best attempt at providing at least some fairness.  Keep trying as
390 	 * long as we woke up at least one process, which means we made actual
391 	 * progress.
392 	 */
393 	do {
394 		woken_up = FALSE;
395 
396 		TAILQ_FOREACH_SAFE(ip, &sem->waiters, ip_next, nextip) {
397 			/* Retry the entire semop(2) operation, atomically. */
398 			r = try_semop(ip->ip_sem, ip->ip_sops, ip->ip_nsops,
399 			    ip->ip_pid, &blkop);
400 
401 			if (r != SUSPEND) {
402 				/* Success or failure. */
403 				complete_semop(ip, r);
404 
405 				/* No changes are made on failure. */
406 				if (r == OK)
407 					woken_up = TRUE;
408 			} else if (blkop != ip->ip_blkop) {
409 				/*
410 				 * The process stays suspended, but it is now
411 				 * blocked on a different semaphore.  As a
412 				 * result, we need to adjust the semaphores'
413 				 * suspension counts.
414 				 */
415 				dec_susp_count(ip);
416 
417 				ip->ip_blkop = blkop;
418 
419 				inc_susp_count(ip);
420 			}
421 		}
422 	} while (woken_up);
423 }
424 
425 /*
426  * Fill a seminfo structure with actual information.  The information returned
427  * depends on the given command, which may be either IPC_INFO or SEM_INFO.
428  */
429 static void
430 fill_seminfo(struct seminfo * sinfo, int cmd)
431 {
432 	unsigned int i;
433 
434 	assert(cmd == IPC_INFO || cmd == SEM_INFO);
435 
436 	memset(sinfo, 0, sizeof(*sinfo));
437 
438 	sinfo->semmap = SEMMNI;
439 	sinfo->semmni = SEMMNI;
440 	sinfo->semmns = SEMMNI * SEMMSL;
441 	sinfo->semmnu = 0; /* TODO: support for SEM_UNDO */
442 	sinfo->semmsl = SEMMSL;
443 	sinfo->semopm = SEMOPM;
444 	sinfo->semume = 0; /* TODO: support for SEM_UNDO */
445 	if (cmd == SEM_INFO) {
446 		/*
447 		 * For SEM_INFO the semusz field is expected to contain the
448 		 * number of semaphore sets currently in use.
449 		 */
450 		sinfo->semusz = sem_list_nr;
451 	} else
452 		sinfo->semusz = 0; /* TODO: support for SEM_UNDO */
453 	sinfo->semvmx = SEMVMX;
454 	if (cmd == SEM_INFO) {
455 		/*
456 		 * For SEM_INFO the semaem field is expected to contain
457 		 * the total number of allocated semaphores.
458 		 */
459 		for (i = 0; i < sem_list_nr; i++)
460 			sinfo->semaem += sem_list[i].semid_ds.sem_nsems;
461 	} else
462 		sinfo->semaem = 0; /* TODO: support for SEM_UNDO */
463 }
464 
465 /*
466  * Implementation of the semctl(2) system call.
467  */
468 int
469 do_semctl(message * m)
470 {
471 	static unsigned short valbuf[SEMMSL];
472 	unsigned int i;
473 	vir_bytes opt;
474 	uid_t uid;
475 	int r, id, num, cmd, val;
476 	struct semid_ds tmp_ds;
477 	struct sem_struct *sem;
478 	struct seminfo sinfo;
479 
480 	id = m->m_lc_ipc_semctl.id;
481 	num = m->m_lc_ipc_semctl.num;
482 	cmd = m->m_lc_ipc_semctl.cmd;
483 	opt = m->m_lc_ipc_semctl.opt;
484 
485 	/*
486 	 * Look up the target semaphore set.  The IPC_INFO and SEM_INFO
487 	 * commands have no associated semaphore set.  The SEM_STAT command
488 	 * takes an array index into the semaphore set table.  For all other
489 	 * commands, look up the semaphore set by its given identifier.
490 	 * */
491 	switch (cmd) {
492 	case IPC_INFO:
493 	case SEM_INFO:
494 		sem = NULL;
495 		break;
496 	case SEM_STAT:
497 		if (id < 0 || (unsigned int)id >= sem_list_nr)
498 			return EINVAL;
499 		sem = &sem_list[id];
500 		if (!(sem->semid_ds.sem_perm.mode & SEM_ALLOC))
501 			return EINVAL;
502 		break;
503 	default:
504 		if ((sem = sem_find_id(id)) == NULL)
505 			return EINVAL;
506 		break;
507 	}
508 
509 	/*
510 	 * Check if the caller has the appropriate permissions on the target
511 	 * semaphore set.  SETVAL and SETALL require write permission.  IPC_SET
512 	 * and IPC_RMID require ownership permission, and return EPERM instead
513 	 * of EACCES on failure.  IPC_INFO and SEM_INFO are free for general
514 	 * use.  All other calls require read permission.
515 	 */
516 	switch (cmd) {
517 	case SETVAL:
518 	case SETALL:
519 		assert(sem != NULL);
520 		if (!check_perm(&sem->semid_ds.sem_perm, m->m_source, IPC_W))
521 			return EACCES;
522 		break;
523 	case IPC_SET:
524 	case IPC_RMID:
525 		assert(sem != NULL);
526 		uid = getnuid(m->m_source);
527 		if (uid != sem->semid_ds.sem_perm.cuid &&
528 		    uid != sem->semid_ds.sem_perm.uid && uid != 0)
529 			return EPERM;
530 		break;
531 	case IPC_INFO:
532 	case SEM_INFO:
533 		break;
534 	default:
535 		assert(sem != NULL);
536 		if (!check_perm(&sem->semid_ds.sem_perm, m->m_source, IPC_R))
537 			return EACCES;
538 	}
539 
540 	switch (cmd) {
541 	case IPC_STAT:
542 	case SEM_STAT:
543 		if ((r = sys_datacopy(SELF, (vir_bytes)&sem->semid_ds,
544 		    m->m_source, opt, sizeof(sem->semid_ds))) != OK)
545 			return r;
546 		if (cmd == SEM_STAT)
547 			m->m_lc_ipc_semctl.ret =
548 			    IXSEQ_TO_IPCID(id, sem->semid_ds.sem_perm);
549 		break;
550 	case IPC_SET:
551 		if ((r = sys_datacopy(m->m_source, opt, SELF,
552 		    (vir_bytes)&tmp_ds, sizeof(tmp_ds))) != OK)
553 			return r;
554 		sem->semid_ds.sem_perm.uid = tmp_ds.sem_perm.uid;
555 		sem->semid_ds.sem_perm.gid = tmp_ds.sem_perm.gid;
556 		sem->semid_ds.sem_perm.mode &= ~ACCESSPERMS;
557 		sem->semid_ds.sem_perm.mode |=
558 		    tmp_ds.sem_perm.mode & ACCESSPERMS;
559 		sem->semid_ds.sem_ctime = clock_time(NULL);
560 		break;
561 	case IPC_RMID:
562 		/*
563 		 * Awaken all processes blocked in semop(2) on any semaphore in
564 		 * this set, and remove the semaphore set itself.
565 		 */
566 		remove_set(sem);
567 		break;
568 	case IPC_INFO:
569 	case SEM_INFO:
570 		fill_seminfo(&sinfo, cmd);
571 
572 		if ((r = sys_datacopy(SELF, (vir_bytes)&sinfo, m->m_source,
573 		    opt, sizeof(sinfo))) != OK)
574 			return r;
575 		/* Return the highest in-use slot number if any, or zero. */
576 		if (sem_list_nr > 0)
577 			m->m_lc_ipc_semctl.ret = sem_list_nr - 1;
578 		else
579 			m->m_lc_ipc_semctl.ret = 0;
580 		break;
581 	case GETALL:
582 		assert(sem->semid_ds.sem_nsems <= __arraycount(valbuf));
583 		for (i = 0; i < sem->semid_ds.sem_nsems; i++)
584 			valbuf[i] = sem->sems[i].semval;
585 		r = sys_datacopy(SELF, (vir_bytes)valbuf, m->m_source,
586 		    opt, sizeof(unsigned short) * sem->semid_ds.sem_nsems);
587 		if (r != OK)
588 			return r;
589 		break;
590 	case GETNCNT:
591 		if (num < 0 || num >= sem->semid_ds.sem_nsems)
592 			return EINVAL;
593 		m->m_lc_ipc_semctl.ret = sem->sems[num].semncnt;
594 		break;
595 	case GETPID:
596 		if (num < 0 || num >= sem->semid_ds.sem_nsems)
597 			return EINVAL;
598 		m->m_lc_ipc_semctl.ret = sem->sems[num].sempid;
599 		break;
600 	case GETVAL:
601 		if (num < 0 || num >= sem->semid_ds.sem_nsems)
602 			return EINVAL;
603 		m->m_lc_ipc_semctl.ret = sem->sems[num].semval;
604 		break;
605 	case GETZCNT:
606 		if (num < 0 || num >= sem->semid_ds.sem_nsems)
607 			return EINVAL;
608 		m->m_lc_ipc_semctl.ret = sem->sems[num].semzcnt;
609 		break;
610 	case SETALL:
611 		assert(sem->semid_ds.sem_nsems <= __arraycount(valbuf));
612 		r = sys_datacopy(m->m_source, opt, SELF, (vir_bytes)valbuf,
613 		    sizeof(unsigned short) * sem->semid_ds.sem_nsems);
614 		if (r != OK)
615 			return r;
616 		for (i = 0; i < sem->semid_ds.sem_nsems; i++)
617 			if (valbuf[i] > SEMVMX)
618 				return ERANGE;
619 #ifdef DEBUG_SEM
620 		for (i = 0; i < sem->semid_ds.sem_nsems; i++)
621 			printf("SEMCTL: SETALL val: [%d] %d\n", i, valbuf[i]);
622 #endif
623 		for (i = 0; i < sem->semid_ds.sem_nsems; i++)
624 			sem->sems[i].semval = valbuf[i];
625 		sem->semid_ds.sem_ctime = clock_time(NULL);
626 		/* Awaken any waiting parties if now possible. */
627 		check_set(sem);
628 		break;
629 	case SETVAL:
630 		val = (int)opt;
631 		if (num < 0 || num >= sem->semid_ds.sem_nsems)
632 			return EINVAL;
633 		if (val < 0 || val > SEMVMX)
634 			return ERANGE;
635 		sem->sems[num].semval = val;
636 #ifdef DEBUG_SEM
637 		printf("SEMCTL: SETVAL: %d %d\n", num, val);
638 #endif
639 		sem->semid_ds.sem_ctime = clock_time(NULL);
640 		/* Awaken any waiting parties if now possible. */
641 		check_set(sem);
642 		break;
643 	default:
644 		return EINVAL;
645 	}
646 
647 	return OK;
648 }
649 
650 /*
651  * Implementation of the semop(2) system call.
652  */
653 int
654 do_semop(message * m)
655 {
656 	unsigned int i, mask, slot;
657 	int id, r;
658 	struct sembuf *sops, *blkop;
659 	unsigned int nsops;
660 	struct sem_struct *sem;
661 	struct iproc *ip;
662 	pid_t pid;
663 
664 	id = m->m_lc_ipc_semop.id;
665 	nsops = m->m_lc_ipc_semop.size;
666 
667 	if ((sem = sem_find_id(id)) == NULL)
668 		return EINVAL;
669 
670 	if (nsops == 0)
671 		return OK; /* nothing to do */
672 	if (nsops > SEMOPM)
673 		return E2BIG;
674 
675 	/* Get the array from the user process. */
676 	sops = malloc(sizeof(sops[0]) * nsops);
677 	if (sops == NULL)
678 		return ENOMEM;
679 	r = sys_datacopy(m->m_source, (vir_bytes)m->m_lc_ipc_semop.ops, SELF,
680 	    (vir_bytes)sops, sizeof(sops[0]) * nsops);
681 	if (r != OK)
682 		goto out_free;
683 
684 #ifdef DEBUG_SEM
685 	for (i = 0; i < nsops; i++)
686 		printf("SEMOP: num:%d  op:%d  flg:%d\n",
687 			sops[i].sem_num, sops[i].sem_op, sops[i].sem_flg);
688 #endif
689 	/*
690 	 * Check for permissions.  We do this only once, even though the call
691 	 * might suspend and the semaphore set's permissions might be changed
692 	 * before the call resumes.  The specification is not clear on this.
693 	 * Either way, perform the permission check before checking on the
694 	 * validity of semaphore numbers, since obtaining the semaphore set
695 	 * size itself requires read permission (except through sysctl(2)..).
696 	 */
697 	mask = 0;
698 	for (i = 0; i < nsops; i++) {
699 		if (sops[i].sem_op != 0)
700 			mask |= IPC_W; /* check for write permission */
701 		else
702 			mask |= IPC_R; /* check for read permission */
703 	}
704 	r = EACCES;
705 	if (!check_perm(&sem->semid_ds.sem_perm, m->m_source, mask))
706 		goto out_free;
707 
708 	/* Check that all given semaphore numbers are within range. */
709 	r = EFBIG;
710 	for (i = 0; i < nsops; i++)
711 		if (sops[i].sem_num >= sem->semid_ds.sem_nsems)
712 			goto out_free;
713 
714 	/*
715 	 * Do not check if the same semaphore is referenced more than once
716 	 * (there was such a check here originally), because that is actually
717 	 * a valid case.  The result is however that it is possible to
718 	 * construct a semop(2) request that will never complete, and thus,
719 	 * care must be taken that such requests do not create potential
720 	 * deadlock situations etc.
721 	 */
722 
723 	pid = getnpid(m->m_source);
724 
725 	/*
726 	 * We do not yet support SEM_UNDO at all, so we better not give the
727 	 * caller the impression that we do.  For now, print a warning so that
728 	 * we know when an application actually fails for that reason.
729 	 */
730 	for (i = 0; i < nsops; i++) {
731 		if (sops[i].sem_flg & SEM_UNDO) {
732 			/* Print a warning only if this isn't the test set.. */
733 			if (sops[i].sem_flg != SHRT_MAX)
734 				printf("IPC: pid %d tried to use SEM_UNDO\n",
735 				    pid);
736 			r = EINVAL;
737 			goto out_free;
738 		}
739 	}
740 
741 	/* Try to perform the operation now. */
742 	r = try_semop(sem, sops, nsops, pid, &blkop);
743 
744 	if (r == SUSPEND) {
745 		/*
746 		 * The operation ended up blocking on a particular semaphore
747 		 * operation.  Save all details in the slot for the user
748 		 * process, and add it to the list of processes waiting for
749 		 * this semaphore set.
750 		 */
751 		slot = _ENDPOINT_P(m->m_source);
752 		assert(slot < __arraycount(iproc));
753 
754 		ip = &iproc[slot];
755 		assert(ip->ip_sem == NULL); /* can't already be in use */
756 
757 		ip->ip_endpt = m->m_source;
758 		ip->ip_pid = pid;
759 		ip->ip_sem = sem;
760 		ip->ip_sops = sops;
761 		ip->ip_nsops = nsops;
762 		ip->ip_blkop = blkop;
763 
764 		TAILQ_INSERT_TAIL(&sem->waiters, ip, ip_next);
765 
766 		inc_susp_count(ip);
767 
768 		return r;
769 	}
770 
771 out_free:
772 	free(sops);
773 
774 	/* Awaken any other waiting parties if now possible. */
775 	if (r == OK)
776 		check_set(sem);
777 
778 	return r;
779 }
780 
781 /*
782  * Return semaphore information for a remote MIB call on the sysvipc_info node
783  * in the kern.ipc subtree.  The particular semantics of this call are tightly
784  * coupled to the implementation of the ipcs(1) userland utility.
785  */
786 ssize_t
787 get_sem_mib_info(struct rmib_oldp * oldp)
788 {
789 	struct sem_sysctl_info semsi;
790 	struct semid_ds *semds;
791 	unsigned int i;
792 	ssize_t r, off;
793 
794 	off = 0;
795 
796 	fill_seminfo(&semsi.seminfo, IPC_INFO);
797 
798 	/*
799 	 * As a hackish exception, the requested size may imply that just
800 	 * general information is to be returned, without throwing an ENOMEM
801 	 * error because there is no space for full output.
802 	 */
803 	if (rmib_getoldlen(oldp) == sizeof(semsi.seminfo))
804 		return rmib_copyout(oldp, 0, &semsi.seminfo,
805 		    sizeof(semsi.seminfo));
806 
807 	/*
808 	 * ipcs(1) blindly expects the returned array to be of size
809 	 * seminfo.semmni, using the SEM_ALLOC mode flag to see whether each
810 	 * entry is valid.  If we return a smaller size, ipcs(1) will access
811 	 * arbitrary memory.
812 	 */
813 	assert(semsi.seminfo.semmni > 0);
814 
815 	if (oldp == NULL)
816 		return sizeof(semsi) + sizeof(semsi.semids[0]) *
817 		    (semsi.seminfo.semmni - 1);
818 
819 	/*
820 	 * Copy out entries one by one.  For the first entry, copy out the
821 	 * entire "semsi" structure.  For subsequent entries, reuse the single
822 	 * embedded 'semids' element of "semsi" and copy out only that element.
823 	 */
824 	for (i = 0; i < (unsigned int)semsi.seminfo.semmni; i++) {
825 		semds = &sem_list[i].semid_ds;
826 
827 		memset(&semsi.semids[0], 0, sizeof(semsi.semids[0]));
828 		if (i < sem_list_nr && (semds->sem_perm.mode & SEM_ALLOC)) {
829 			prepare_mib_perm(&semsi.semids[0].sem_perm,
830 			    &semds->sem_perm);
831 			semsi.semids[0].sem_nsems = semds->sem_nsems;
832 			semsi.semids[0].sem_otime = semds->sem_otime;
833 			semsi.semids[0].sem_ctime = semds->sem_ctime;
834 		}
835 
836 		if (off == 0)
837 			r = rmib_copyout(oldp, off, &semsi, sizeof(semsi));
838 		else
839 			r = rmib_copyout(oldp, off, &semsi.semids[0],
840 			    sizeof(semsi.semids[0]));
841 
842 		if (r < 0)
843 			return r;
844 		off += r;
845 	}
846 
847 	return off;
848 }
849 
850 /*
851  * Return TRUE iff no semaphore sets are allocated.
852  */
853 int
854 is_sem_nil(void)
855 {
856 
857 	return (sem_list_nr == 0);
858 }
859 
860 /*
861  * Check if the given endpoint is blocked on a semop(2) call.  If so, cancel
862  * the call, because either it is interrupted by a signal or the process was
863  * killed.  In the former case, unblock the process by replying with EINTR.
864  */
865 void
866 sem_process_event(endpoint_t endpt, int has_exited)
867 {
868 	unsigned int slot;
869 	struct iproc *ip;
870 
871 	slot = _ENDPOINT_P(endpt);
872 	assert(slot < __arraycount(iproc));
873 
874 	ip = &iproc[slot];
875 
876 	/* Was the process blocked on a semop(2) call at all? */
877 	if (ip->ip_sem == NULL)
878 		return;
879 
880 	assert(ip->ip_endpt == endpt);
881 
882 	/*
883 	 * It was; cancel the semop(2) call.  If the process is being removed
884 	 * because its call was interrupted by a signal, then we must wake it
885 	 * up with EINTR.
886 	 */
887 	complete_semop(ip, has_exited ? EDONTREPLY : EINTR);
888 }
889