xref: /minix/minix/servers/vfs/main.c (revision 0a6a1f1d)
1 /*
2  * a loop that gets messages requesting work, carries out the work, and sends
3  * replies.
4  *
5  * The entry points into this file are:
6  *   main:	main program of the Virtual File System
7  *   reply:	send a reply to a process after the requested work is done
8  *
9  */
10 
11 #include "fs.h"
12 #include <fcntl.h>
13 #include <string.h>
14 #include <stdio.h>
15 #include <signal.h>
16 #include <assert.h>
17 #include <stdlib.h>
18 #include <sys/ioc_memory.h>
19 #include <sys/svrctl.h>
20 #include <sys/select.h>
21 #include <minix/callnr.h>
22 #include <minix/com.h>
23 #include <minix/const.h>
24 #include <minix/endpoint.h>
25 #include <minix/safecopies.h>
26 #include <minix/debug.h>
27 #include <minix/vfsif.h>
28 #include "file.h"
29 #include "vmnt.h"
30 #include "vnode.h"
31 
32 #if ENABLE_SYSCALL_STATS
33 EXTERN unsigned long calls_stats[NR_VFS_CALLS];
34 #endif
35 
36 /* Thread related prototypes */
37 static void do_reply(struct worker_thread *wp);
38 static void do_work(void);
39 static void do_init_root(void);
40 static void handle_work(void (*func)(void));
41 static void reply(message *m_out, endpoint_t whom, int result);
42 
43 static int get_work(void);
44 static void service_pm(void);
45 static int unblock(struct fproc *rfp);
46 
47 /* SEF functions and variables. */
48 static void sef_local_startup(void);
49 static int sef_cb_init_fresh(int type, sef_init_info_t *info);
50 static int sef_cb_init_lu(int type, sef_init_info_t *info);
51 
52 /*===========================================================================*
53  *				main					     *
54  *===========================================================================*/
55 int main(void)
56 {
57 /* This is the main program of the file system.  The main loop consists of
58  * three major activities: getting new work, processing the work, and sending
59  * the reply.  This loop never terminates as long as the file system runs.
60  */
61   int transid;
62   struct worker_thread *wp;
63 
64   /* SEF local startup. */
65   sef_local_startup();
66 
67   printf("Started VFS: %d worker thread(s)\n", NR_WTHREADS);
68 
69   /* This is the main loop that gets work, processes it, and sends replies. */
70   while (TRUE) {
71 	worker_yield();	/* let other threads run */
72 
73 	send_work();
74 
75 	/* The get_work() function returns TRUE if we have a new message to
76 	 * process. It returns FALSE if it spawned other thread activities.
77 	 */
78 	if (!get_work())
79 		continue;
80 
81 	transid = TRNS_GET_ID(m_in.m_type);
82 	if (IS_VFS_FS_TRANSID(transid)) {
83 		wp = worker_get((thread_t) transid - VFS_TRANSID);
84 		if (wp == NULL || wp->w_fp == NULL) {
85 			printf("VFS: spurious message %d from endpoint %d\n",
86 				m_in.m_type, m_in.m_source);
87 			continue;
88 		}
89 		m_in.m_type = TRNS_DEL_ID(m_in.m_type);
90 		do_reply(wp);
91 		continue;
92 	} else if (who_e == PM_PROC_NR) { /* Calls from PM */
93 		/* Special control messages from PM */
94 		service_pm();
95 		continue;
96 	} else if (is_notify(call_nr)) {
97 		/* A task ipc_notify()ed us */
98 		switch (who_e) {
99 		case DS_PROC_NR:
100 			/* Start a thread to handle DS events, if no thread
101 			 * is pending or active for it already. DS is not
102 			 * supposed to issue calls to VFS or be the subject of
103 			 * postponed PM requests, so this should be no problem.
104 			 */
105 			if (worker_can_start(fp))
106 				handle_work(ds_event);
107 			break;
108 		case KERNEL:
109 			mthread_stacktraces();
110 			break;
111 		case CLOCK:
112 			/* Timer expired. Used only for select(). Check it. */
113 			expire_timers(m_in.m_notify.timestamp);
114 			break;
115 		default:
116 			printf("VFS: ignoring notification from %d\n", who_e);
117 		}
118 		continue;
119 	} else if (who_p < 0) { /* i.e., message comes from a task */
120 		/* We're going to ignore this message. Tasks should
121 		 * send ipc_notify()s only.
122 		 */
123 		 printf("VFS: ignoring message from %d (%d)\n", who_e, call_nr);
124 		 continue;
125 	}
126 
127 	if (IS_BDEV_RS(call_nr)) {
128 		/* We've got results for a block device request. */
129 		bdev_reply();
130 	} else if (IS_CDEV_RS(call_nr)) {
131 		/* We've got results for a character device request. */
132 		cdev_reply();
133 	} else {
134 		/* Normal syscall. This spawns a new thread. */
135 		handle_work(do_work);
136 	}
137   }
138   return(OK);				/* shouldn't come here */
139 }
140 
141 /*===========================================================================*
142  *			       handle_work				     *
143  *===========================================================================*/
144 static void handle_work(void (*func)(void))
145 {
146 /* Handle asynchronous device replies and new system calls. If the originating
147  * endpoint is an FS endpoint, take extra care not to get in deadlock. */
148   struct vmnt *vmp = NULL;
149   endpoint_t proc_e;
150   int use_spare = FALSE;
151 
152   proc_e = m_in.m_source;
153 
154   if (fp->fp_flags & FP_SRV_PROC) {
155 	vmp = find_vmnt(proc_e);
156 	if (vmp != NULL) {
157 		/* A callback from an FS endpoint. Can do only one at once. */
158 		if (vmp->m_flags & VMNT_CALLBACK) {
159 			replycode(proc_e, EAGAIN);
160 			return;
161 		}
162 		/* Already trying to resolve a deadlock? Can't handle more. */
163 		if (worker_available() == 0) {
164 			replycode(proc_e, EAGAIN);
165 			return;
166 		}
167 		/* A thread is available. Set callback flag. */
168 		vmp->m_flags |= VMNT_CALLBACK;
169 		if (vmp->m_flags & VMNT_MOUNTING) {
170 			vmp->m_flags |= VMNT_FORCEROOTBSF;
171 		}
172 	}
173 
174 	/* Use the spare thread to handle this request if needed. */
175 	use_spare = TRUE;
176   }
177 
178   worker_start(fp, func, &m_in, use_spare);
179 }
180 
181 
182 /*===========================================================================*
183  *			       do_reply				             *
184  *===========================================================================*/
185 static void do_reply(struct worker_thread *wp)
186 {
187   struct vmnt *vmp = NULL;
188 
189   if(who_e != VM_PROC_NR && (vmp = find_vmnt(who_e)) == NULL)
190 	panic("Couldn't find vmnt for endpoint %d", who_e);
191 
192   if (wp->w_task != who_e) {
193 	printf("VFS: tid %d: expected %d to reply, not %d\n",
194 		wp->w_tid, wp->w_task, who_e);
195 	return;
196   }
197   /* It should be impossible to trigger the following case, but it is here for
198    * consistency reasons: worker_stop() resets w_sendrec but not w_task.
199    */
200   if (wp->w_sendrec == NULL) {
201 	printf("VFS: tid %d: late reply from %d ignored\n", wp->w_tid, who_e);
202 	return;
203   }
204   *wp->w_sendrec = m_in;
205   wp->w_sendrec = NULL;
206   wp->w_task = NONE;
207   if(vmp) vmp->m_comm.c_cur_reqs--; /* We've got our reply, make room for others */
208   worker_signal(wp); /* Continue this thread */
209 }
210 
211 /*===========================================================================*
212  *			       do_pending_pipe				     *
213  *===========================================================================*/
214 static void do_pending_pipe(void)
215 {
216   int r, op;
217   struct filp *f;
218   tll_access_t locktype;
219 
220   f = fp->fp_filp[fp->fp_fd];
221   assert(f != NULL);
222 
223   locktype = (job_call_nr == VFS_READ) ? VNODE_READ : VNODE_WRITE;
224   op = (job_call_nr == VFS_READ) ? READING : WRITING;
225   lock_filp(f, locktype);
226 
227   r = rw_pipe(op, who_e, f, fp->fp_io_buffer, fp->fp_io_nbytes);
228 
229   if (r != SUSPEND) { /* Do we have results to report? */
230 	/* Process is writing, but there is no reader. Send a SIGPIPE signal.
231 	 * This should match the corresponding code in read_write().
232 	 */
233 	if (r == EPIPE && op == WRITING) {
234 		if (!(f->filp_flags & O_NOSIGPIPE))
235 			sys_kill(fp->fp_endpoint, SIGPIPE);
236 	}
237 
238 	replycode(fp->fp_endpoint, r);
239   }
240 
241   unlock_filp(f);
242 }
243 
244 /*===========================================================================*
245  *			       do_work					     *
246  *===========================================================================*/
247 static void do_work(void)
248 {
249   unsigned int call_index;
250   int error;
251 
252   if (fp->fp_pid == PID_FREE) {
253 	/* Process vanished before we were able to handle request.
254 	 * Replying has no use. Just drop it.
255 	 */
256 	return;
257   }
258 
259   memset(&job_m_out, 0, sizeof(job_m_out));
260 
261   /* At this point we assume that we're dealing with a call that has been
262    * made specifically to VFS. Typically it will be a POSIX call from a
263    * normal process, but we also handle a few calls made by drivers such
264    * such as UDS and VND through here. Call the internal function that
265    * does the work.
266    */
267   if (IS_VFS_CALL(job_call_nr)) {
268 	call_index = (unsigned int) (job_call_nr - VFS_BASE);
269 
270 	if (call_index < NR_VFS_CALLS && call_vec[call_index] != NULL) {
271 #if ENABLE_SYSCALL_STATS
272 		calls_stats[call_index]++;
273 #endif
274 		error = (*call_vec[call_index])();
275 	} else
276 		error = ENOSYS;
277   } else
278 	error = ENOSYS;
279 
280   /* Copy the results back to the user and send reply. */
281   if (error != SUSPEND) reply(&job_m_out, fp->fp_endpoint, error);
282 }
283 
284 /*===========================================================================*
285  *				sef_cb_lu_prepare			     *
286  *===========================================================================*/
287 static int sef_cb_lu_prepare(int state)
288 {
289 /* This function is called to decide whether we can enter the given live
290  * update state, and to prepare for such an update. If we are requested to
291  * update to a request-free or protocol-free state, make sure there is no work
292  * pending or being processed, and shut down all worker threads.
293  */
294 
295   switch (state) {
296   case SEF_LU_STATE_REQUEST_FREE:
297   case SEF_LU_STATE_PROTOCOL_FREE:
298 	if (!worker_idle()) {
299 		printf("VFS: worker threads not idle, blocking update\n");
300 		break;
301 	}
302 
303 	worker_cleanup();
304 
305 	return OK;
306   }
307 
308   return ENOTREADY;
309 }
310 
311 /*===========================================================================*
312  *			       sef_cb_lu_state_changed			     *
313  *===========================================================================*/
314 static void sef_cb_lu_state_changed(int old_state, int state)
315 {
316 /* Worker threads (especially their stacks) pose a serious problem for state
317  * transfer during live update, and therefore, we shut down all worker threads
318  * during live update and restart them afterwards. This function is called in
319  * the old VFS instance when the state changed. We use it to restart worker
320  * threads after a failed live update.
321  */
322 
323   if (state != SEF_LU_STATE_NULL)
324 	return;
325 
326   switch (old_state) {
327   case SEF_LU_STATE_REQUEST_FREE:
328   case SEF_LU_STATE_PROTOCOL_FREE:
329 	worker_init();
330   }
331 }
332 
333 /*===========================================================================*
334  *				sef_cb_init_lu				     *
335  *===========================================================================*/
336 static int sef_cb_init_lu(int type, sef_init_info_t *info)
337 {
338 /* This function is called in the new VFS instance during a live update. */
339   int r;
340 
341   /* Perform regular state transfer. */
342   if ((r = SEF_CB_INIT_LU_DEFAULT(type, info)) != OK)
343 	return r;
344 
345   /* Recreate worker threads, if necessary. */
346   switch (info->prepare_state) {
347   case SEF_LU_STATE_REQUEST_FREE:
348   case SEF_LU_STATE_PROTOCOL_FREE:
349 	worker_init();
350   }
351 
352   return OK;
353 }
354 
355 /*===========================================================================*
356  *			       sef_local_startup			     *
357  *===========================================================================*/
358 static void sef_local_startup(void)
359 {
360   /* Register init callbacks. */
361   sef_setcb_init_fresh(sef_cb_init_fresh);
362   sef_setcb_init_restart(SEF_CB_INIT_RESTART_STATEFUL);
363 
364   /* Register live update callbacks. */
365   sef_setcb_init_lu(sef_cb_init_lu);
366   sef_setcb_lu_prepare(sef_cb_lu_prepare);
367   sef_setcb_lu_state_changed(sef_cb_lu_state_changed);
368   sef_setcb_lu_state_isvalid(sef_cb_lu_state_isvalid_standard);
369 
370   /* Let SEF perform startup. */
371   sef_startup();
372 }
373 
374 /*===========================================================================*
375  *				sef_cb_init_fresh			     *
376  *===========================================================================*/
377 static int sef_cb_init_fresh(int UNUSED(type), sef_init_info_t *info)
378 {
379 /* Initialize the virtual file server. */
380   int s, i;
381   struct fproc *rfp;
382   message mess;
383   struct rprocpub rprocpub[NR_BOOT_PROCS];
384 
385   self = NULL;
386   verbose = 0;
387 
388   /* Initialize proc endpoints to NONE */
389   for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
390 	rfp->fp_endpoint = NONE;
391 	rfp->fp_pid = PID_FREE;
392   }
393 
394   /* Initialize the process table with help of the process manager messages.
395    * Expect one message for each system process with its slot number and pid.
396    * When no more processes follow, the magic process number NONE is sent.
397    * Then, stop and synchronize with the PM.
398    */
399   do {
400 	if ((s = sef_receive(PM_PROC_NR, &mess)) != OK)
401 		panic("VFS: couldn't receive from PM: %d", s);
402 
403 	if (mess.m_type != VFS_PM_INIT)
404 		panic("unexpected message from PM: %d", mess.m_type);
405 
406 	if (NONE == mess.VFS_PM_ENDPT) break;
407 
408 	rfp = &fproc[mess.VFS_PM_SLOT];
409 	rfp->fp_flags = FP_NOFLAGS;
410 	rfp->fp_pid = mess.VFS_PM_PID;
411 	rfp->fp_endpoint = mess.VFS_PM_ENDPT;
412 	rfp->fp_grant = GRANT_INVALID;
413 	rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
414 	rfp->fp_realuid = (uid_t) SYS_UID;
415 	rfp->fp_effuid = (uid_t) SYS_UID;
416 	rfp->fp_realgid = (gid_t) SYS_GID;
417 	rfp->fp_effgid = (gid_t) SYS_GID;
418 	rfp->fp_umask = ~0;
419   } while (TRUE);			/* continue until process NONE */
420   mess.m_type = OK;			/* tell PM that we succeeded */
421   s = ipc_send(PM_PROC_NR, &mess);		/* send synchronization message */
422 
423   system_hz = sys_hz();
424 
425   /* Subscribe to block and character driver events. */
426   s = ds_subscribe("drv\\.[bc]..\\..*", DSF_INITIAL | DSF_OVERWRITE);
427   if (s != OK) panic("VFS: can't subscribe to driver events (%d)", s);
428 
429   /* Initialize worker threads */
430   worker_init();
431 
432   /* Initialize global locks */
433   if (mthread_mutex_init(&bsf_lock, NULL) != 0)
434 	panic("VFS: couldn't initialize block special file lock");
435 
436   init_dmap();			/* Initialize device table. */
437 
438   /* Map all the services in the boot image. */
439   if ((s = sys_safecopyfrom(RS_PROC_NR, info->rproctab_gid, 0,
440 			    (vir_bytes) rprocpub, sizeof(rprocpub))) != OK){
441 	panic("sys_safecopyfrom failed: %d", s);
442   }
443   for (i = 0; i < NR_BOOT_PROCS; i++) {
444 	if (rprocpub[i].in_use) {
445 		if ((s = map_service(&rprocpub[i])) != OK) {
446 			panic("VFS: unable to map service: %d", s);
447 		}
448 	}
449   }
450 
451   /* Initialize locks and initial values for all processes. */
452   for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
453 	if (mutex_init(&rfp->fp_lock, NULL) != 0)
454 		panic("unable to initialize fproc lock");
455 	rfp->fp_worker = NULL;
456 #if LOCK_DEBUG
457 	rfp->fp_vp_rdlocks = 0;
458 	rfp->fp_vmnt_rdlocks = 0;
459 #endif
460 
461 	/* Initialize process directories. mount_fs will set them to the
462 	 * correct values.
463 	 */
464 	for (i = 0; i < OPEN_MAX; i++)
465 		rfp->fp_filp[i] = NULL;
466 	rfp->fp_rd = NULL;
467 	rfp->fp_wd = NULL;
468   }
469 
470   init_vnodes();		/* init vnodes */
471   init_vmnts();			/* init vmnt structures */
472   init_select();		/* init select() structures */
473   init_filps();			/* Init filp structures */
474 
475   /* Mount PFS and initial file system root. */
476   worker_start(fproc_addr(VFS_PROC_NR), do_init_root, &mess /*unused*/,
477 	FALSE /*use_spare*/);
478 
479   return(OK);
480 }
481 
482 /*===========================================================================*
483  *			       do_init_root				     *
484  *===========================================================================*/
485 static void do_init_root(void)
486 {
487   char *mount_type, *mount_label;
488   int r;
489 
490   /* Disallow requests from e.g. init(8) while doing the initial mounting. */
491   worker_allow(FALSE);
492 
493   /* Mount the pipe file server. */
494   mount_pfs();
495 
496   /* Mount the root file system. */
497   mount_type = "mfs";       /* FIXME: use boot image process name instead */
498   mount_label = "fs_imgrd"; /* FIXME: obtain this from RS */
499 
500   r = mount_fs(DEV_IMGRD, "bootramdisk", "/", MFS_PROC_NR, 0, mount_type,
501 	mount_label);
502   if (r != OK)
503 	panic("Failed to initialize root");
504 
505   /* All done with mounting, allow requests now. */
506   worker_allow(TRUE);
507 }
508 
509 /*===========================================================================*
510  *				lock_proc				     *
511  *===========================================================================*/
512 void lock_proc(struct fproc *rfp)
513 {
514   int r;
515   struct worker_thread *org_self;
516 
517   r = mutex_trylock(&rfp->fp_lock);
518   if (r == 0) return;
519 
520   org_self = worker_suspend();
521 
522   if ((r = mutex_lock(&rfp->fp_lock)) != 0)
523 	panic("unable to lock fproc lock: %d", r);
524 
525   worker_resume(org_self);
526 }
527 
528 /*===========================================================================*
529  *				unlock_proc				     *
530  *===========================================================================*/
531 void unlock_proc(struct fproc *rfp)
532 {
533   int r;
534 
535   if ((r = mutex_unlock(&rfp->fp_lock)) != 0)
536 	panic("Failed to unlock: %d", r);
537 }
538 
539 /*===========================================================================*
540  *				thread_cleanup				     *
541  *===========================================================================*/
542 void thread_cleanup(void)
543 {
544 /* Perform cleanup actions for a worker thread. */
545 
546 #if LOCK_DEBUG
547   check_filp_locks_by_me();
548   check_vnode_locks_by_me(fp);
549   check_vmnt_locks_by_me(fp);
550 #endif
551 
552   if (fp->fp_flags & FP_SRV_PROC) {
553 	struct vmnt *vmp;
554 
555 	if ((vmp = find_vmnt(fp->fp_endpoint)) != NULL) {
556 		vmp->m_flags &= ~VMNT_CALLBACK;
557 	}
558   }
559 }
560 
561 /*===========================================================================*
562  *				get_work				     *
563  *===========================================================================*/
564 static int get_work(void)
565 {
566   /* Normally wait for new input.  However, if 'reviving' is nonzero, a
567    * suspended process must be awakened.  Return TRUE if there is a message to
568    * process (usually newly received, but possibly a resumed request), or FALSE
569    * if a thread for other activities has been spawned instead.
570    */
571   int r, proc_p;
572   register struct fproc *rp;
573 
574   if (reviving != 0) {
575 	/* Find a suspended process. */
576 	for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++)
577 		if (rp->fp_pid != PID_FREE && (rp->fp_flags & FP_REVIVED))
578 			return unblock(rp); /* So main loop can process job */
579 
580 	panic("VFS: get_work couldn't revive anyone");
581   }
582 
583   for(;;) {
584 	/* Normal case.  No one to revive. Get a useful request. */
585 	if ((r = sef_receive(ANY, &m_in)) != OK) {
586 		panic("VFS: sef_receive error: %d", r);
587 	}
588 
589 	proc_p = _ENDPOINT_P(m_in.m_source);
590 	if (proc_p < 0 || proc_p >= NR_PROCS) fp = NULL;
591 	else fp = &fproc[proc_p];
592 
593 	/* Negative who_p is never used to access the fproc array. Negative
594 	 * numbers (kernel tasks) are treated in a special way.
595 	 */
596 	if (fp && fp->fp_endpoint == NONE) {
597 		printf("VFS: ignoring request from %d: NONE endpoint %d (%d)\n",
598 			m_in.m_source, who_p, m_in.m_type);
599 		continue;
600 	}
601 
602 	/* Internal consistency check; our mental image of process numbers and
603 	 * endpoints must match with how the rest of the system thinks of them.
604 	 */
605 	if (fp && fp->fp_endpoint != who_e) {
606 		if (fproc[who_p].fp_endpoint == NONE)
607 			printf("slot unknown even\n");
608 
609 		panic("VFS: receive endpoint inconsistent (source %d, who_p "
610 			"%d, stored ep %d, who_e %d).\n", m_in.m_source, who_p,
611 			fproc[who_p].fp_endpoint, who_e);
612 	}
613 
614 	return TRUE;
615   }
616   /* NOTREACHED */
617 }
618 
619 /*===========================================================================*
620  *				reply					     *
621  *===========================================================================*/
622 static void reply(message *m_out, endpoint_t whom, int result)
623 {
624 /* Send a reply to a user process.  If the send fails, just ignore it. */
625   int r;
626 
627   m_out->m_type = result;
628   r = ipc_sendnb(whom, m_out);
629   if (r != OK) {
630 	printf("VFS: %d couldn't send reply %d to %d: %d\n", mthread_self(),
631 		result, whom, r);
632 	util_stacktrace();
633   }
634 }
635 
636 /*===========================================================================*
637  *				replycode				     *
638  *===========================================================================*/
639 void replycode(endpoint_t whom, int result)
640 {
641 /* Send a reply to a user process.  If the send fails, just ignore it. */
642   message m_out;
643 
644   memset(&m_out, 0, sizeof(m_out));
645 
646   reply(&m_out, whom, result);
647 }
648 
649 /*===========================================================================*
650  *				service_pm_postponed			     *
651  *===========================================================================*/
652 void service_pm_postponed(void)
653 {
654   int r, term_signal;
655   vir_bytes core_path;
656   vir_bytes exec_path, stack_frame, pc, newsp, ps_str;
657   size_t exec_path_len, stack_frame_len;
658   endpoint_t proc_e;
659   message m_out;
660 
661   memset(&m_out, 0, sizeof(m_out));
662 
663   switch(job_call_nr) {
664   case VFS_PM_EXEC:
665 	proc_e = job_m_in.VFS_PM_ENDPT;
666 	exec_path = (vir_bytes) job_m_in.VFS_PM_PATH;
667 	exec_path_len = (size_t) job_m_in.VFS_PM_PATH_LEN;
668 	stack_frame = (vir_bytes) job_m_in.VFS_PM_FRAME;
669 	stack_frame_len = (size_t) job_m_in.VFS_PM_FRAME_LEN;
670 	ps_str = (vir_bytes) job_m_in.VFS_PM_PS_STR;
671 
672 	assert(proc_e == fp->fp_endpoint);
673 
674 	r = pm_exec(exec_path, exec_path_len, stack_frame, stack_frame_len,
675 		&pc, &newsp, &ps_str);
676 
677 	/* Reply status to PM */
678 	m_out.m_type = VFS_PM_EXEC_REPLY;
679 	m_out.VFS_PM_ENDPT = proc_e;
680 	m_out.VFS_PM_PC = (void *) pc;
681 	m_out.VFS_PM_STATUS = r;
682 	m_out.VFS_PM_NEWSP = (void *) newsp;
683 	m_out.VFS_PM_NEWPS_STR = ps_str;
684 
685 	break;
686 
687   case VFS_PM_EXIT:
688 	proc_e = job_m_in.VFS_PM_ENDPT;
689 
690 	assert(proc_e == fp->fp_endpoint);
691 
692 	pm_exit();
693 
694 	/* Reply dummy status to PM for synchronization */
695 	m_out.m_type = VFS_PM_EXIT_REPLY;
696 	m_out.VFS_PM_ENDPT = proc_e;
697 
698 	break;
699 
700   case VFS_PM_DUMPCORE:
701 	proc_e = job_m_in.VFS_PM_ENDPT;
702 	term_signal = job_m_in.VFS_PM_TERM_SIG;
703 	core_path = (vir_bytes) job_m_in.VFS_PM_PATH;
704 
705 	/* A zero signal used to indicate that a coredump should be generated
706 	 * without terminating the target process, but this was broken in so
707 	 * many ways that we no longer support this. Userland should implement
708 	 * this functionality itself, for example through ptrace(2).
709 	 */
710 	if (term_signal == 0)
711 		panic("no termination signal given for coredump!");
712 
713 	assert(proc_e == fp->fp_endpoint);
714 
715 	r = pm_dumpcore(term_signal, core_path);
716 
717 	/* Reply status to PM */
718 	m_out.m_type = VFS_PM_CORE_REPLY;
719 	m_out.VFS_PM_ENDPT = proc_e;
720 	m_out.VFS_PM_STATUS = r;
721 
722 	break;
723 
724   case VFS_PM_UNPAUSE:
725 	proc_e = job_m_in.VFS_PM_ENDPT;
726 
727 	assert(proc_e == fp->fp_endpoint);
728 
729 	unpause();
730 
731 	m_out.m_type = VFS_PM_UNPAUSE_REPLY;
732 	m_out.VFS_PM_ENDPT = proc_e;
733 
734 	break;
735 
736   default:
737 	panic("Unhandled postponed PM call %d", job_m_in.m_type);
738   }
739 
740   r = ipc_send(PM_PROC_NR, &m_out);
741   if (r != OK)
742 	panic("service_pm_postponed: ipc_send failed: %d", r);
743 }
744 
745 /*===========================================================================*
746  *				service_pm				     *
747  *===========================================================================*/
748 static void service_pm(void)
749 {
750 /* Process a request from PM. This function is called from the main thread, and
751  * may therefore not block. Any requests that may require blocking the calling
752  * thread must be executed in a separate thread. Aside from VFS_PM_REBOOT, all
753  * requests from PM involve another, target process: for example, PM tells VFS
754  * that a process is performing a setuid() call. For some requests however,
755  * that other process may not be idle, and in that case VFS must serialize the
756  * PM request handling with any operation is it handling for that target
757  * process. As it happens, the requests that may require blocking are also the
758  * ones where the target process may not be idle. For both these reasons, such
759  * requests are run in worker threads associated to the target process.
760  */
761   struct fproc *rfp;
762   int r, slot;
763   message m_out;
764 
765   memset(&m_out, 0, sizeof(m_out));
766 
767   switch (call_nr) {
768   case VFS_PM_SETUID:
769 	{
770 		endpoint_t proc_e;
771 		uid_t euid, ruid;
772 
773 		proc_e = m_in.VFS_PM_ENDPT;
774 		euid = m_in.VFS_PM_EID;
775 		ruid = m_in.VFS_PM_RID;
776 
777 		pm_setuid(proc_e, euid, ruid);
778 
779 		m_out.m_type = VFS_PM_SETUID_REPLY;
780 		m_out.VFS_PM_ENDPT = proc_e;
781 	}
782 	break;
783 
784   case VFS_PM_SETGID:
785 	{
786 		endpoint_t proc_e;
787 		gid_t egid, rgid;
788 
789 		proc_e = m_in.VFS_PM_ENDPT;
790 		egid = m_in.VFS_PM_EID;
791 		rgid = m_in.VFS_PM_RID;
792 
793 		pm_setgid(proc_e, egid, rgid);
794 
795 		m_out.m_type = VFS_PM_SETGID_REPLY;
796 		m_out.VFS_PM_ENDPT = proc_e;
797 	}
798 	break;
799 
800   case VFS_PM_SETSID:
801 	{
802 		endpoint_t proc_e;
803 
804 		proc_e = m_in.VFS_PM_ENDPT;
805 		pm_setsid(proc_e);
806 
807 		m_out.m_type = VFS_PM_SETSID_REPLY;
808 		m_out.VFS_PM_ENDPT = proc_e;
809 	}
810 	break;
811 
812   case VFS_PM_EXEC:
813   case VFS_PM_EXIT:
814   case VFS_PM_DUMPCORE:
815   case VFS_PM_UNPAUSE:
816 	{
817 		endpoint_t proc_e = m_in.VFS_PM_ENDPT;
818 
819 		if(isokendpt(proc_e, &slot) != OK) {
820 			printf("VFS: proc ep %d not ok\n", proc_e);
821 			return;
822 		}
823 
824 		rfp = &fproc[slot];
825 
826 		/* PM requests on behalf of a proc are handled after the
827 		 * system call that might be in progress for that proc has
828 		 * finished. If the proc is not busy, we start a new thread.
829 		 */
830 		worker_start(rfp, NULL, &m_in, FALSE /*use_spare*/);
831 
832 		return;
833 	}
834   case VFS_PM_FORK:
835   case VFS_PM_SRV_FORK:
836 	{
837 		endpoint_t pproc_e, proc_e;
838 		pid_t child_pid;
839 		uid_t reuid;
840 		gid_t regid;
841 
842 		pproc_e = m_in.VFS_PM_PENDPT;
843 		proc_e = m_in.VFS_PM_ENDPT;
844 		child_pid = m_in.VFS_PM_CPID;
845 		reuid = m_in.VFS_PM_REUID;
846 		regid = m_in.VFS_PM_REGID;
847 
848 		pm_fork(pproc_e, proc_e, child_pid);
849 		m_out.m_type = VFS_PM_FORK_REPLY;
850 
851 		if (call_nr == VFS_PM_SRV_FORK) {
852 			m_out.m_type = VFS_PM_SRV_FORK_REPLY;
853 			pm_setuid(proc_e, reuid, reuid);
854 			pm_setgid(proc_e, regid, regid);
855 		}
856 
857 		m_out.VFS_PM_ENDPT = proc_e;
858 	}
859 	break;
860   case VFS_PM_SETGROUPS:
861 	{
862 		endpoint_t proc_e;
863 		int group_no;
864 		gid_t *group_addr;
865 
866 		proc_e = m_in.VFS_PM_ENDPT;
867 		group_no = m_in.VFS_PM_GROUP_NO;
868 		group_addr = (gid_t *) m_in.VFS_PM_GROUP_ADDR;
869 
870 		pm_setgroups(proc_e, group_no, group_addr);
871 
872 		m_out.m_type = VFS_PM_SETGROUPS_REPLY;
873 		m_out.VFS_PM_ENDPT = proc_e;
874 	}
875 	break;
876 
877   case VFS_PM_REBOOT:
878 	/* Reboot requests are not considered postponed PM work and are instead
879 	 * handled from a separate worker thread that is associated with PM's
880 	 * process. PM makes no regular VFS calls, and thus, from VFS's
881 	 * perspective, PM is always idle. Therefore, we can safely do this.
882 	 * We do assume that PM sends us only one VFS_PM_REBOOT message at
883 	 * once, or ever for that matter. :)
884 	 */
885 	worker_start(fproc_addr(PM_PROC_NR), pm_reboot, &m_in,
886 		FALSE /*use_spare*/);
887 
888 	return;
889 
890     default:
891 	printf("VFS: don't know how to handle PM request %d\n", call_nr);
892 
893 	return;
894   }
895 
896   r = ipc_send(PM_PROC_NR, &m_out);
897   if (r != OK)
898 	panic("service_pm: ipc_send failed: %d", r);
899 }
900 
901 
902 /*===========================================================================*
903  *				unblock					     *
904  *===========================================================================*/
905 static int unblock(rfp)
906 struct fproc *rfp;
907 {
908 /* Unblock a process that was previously blocked on a pipe or a lock.  This is
909  * done by reconstructing the original request and continuing/repeating it.
910  * This function returns TRUE when it has restored a request for execution, and
911  * FALSE if the caller should continue looking for work to do.
912  */
913   int blocked_on;
914 
915   blocked_on = rfp->fp_blocked_on;
916 
917   /* Reconstruct the original request from the saved data. */
918   memset(&m_in, 0, sizeof(m_in));
919   m_in.m_source = rfp->fp_endpoint;
920   m_in.m_type = rfp->fp_block_callnr;
921   switch (m_in.m_type) {
922   case VFS_READ:
923   case VFS_WRITE:
924 	assert(blocked_on == FP_BLOCKED_ON_PIPE);
925 	m_in.m_lc_vfs_readwrite.fd = rfp->fp_fd;
926 	m_in.m_lc_vfs_readwrite.buf = rfp->fp_io_buffer;
927 	m_in.m_lc_vfs_readwrite.len = rfp->fp_io_nbytes;
928 	break;
929   case VFS_FCNTL:
930 	assert(blocked_on == FP_BLOCKED_ON_LOCK);
931 	m_in.m_lc_vfs_fcntl.fd = rfp->fp_fd;
932 	m_in.m_lc_vfs_fcntl.cmd = rfp->fp_io_nbytes;
933 	m_in.m_lc_vfs_fcntl.arg_ptr = rfp->fp_io_buffer;
934 	assert(m_in.m_lc_vfs_fcntl.cmd == F_SETLKW);
935 	break;
936   default:
937 	panic("unblocking call %d blocked on %d ??", m_in.m_type, blocked_on);
938   }
939 
940   rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;	/* no longer blocked */
941   rfp->fp_flags &= ~FP_REVIVED;
942   reviving--;
943   assert(reviving >= 0);
944 
945   /* This should not be device I/O. If it is, it'll 'leak' grants. */
946   assert(!GRANT_VALID(rfp->fp_grant));
947 
948   /* Pending pipe reads/writes cannot be repeated as is, and thus require a
949    * special resumption procedure.
950    */
951   if (blocked_on == FP_BLOCKED_ON_PIPE) {
952 	worker_start(rfp, do_pending_pipe, &m_in, FALSE /*use_spare*/);
953 	return(FALSE);	/* Retrieve more work */
954   }
955 
956   /* A lock request. Repeat the original request as though it just came in. */
957   fp = rfp;
958   return(TRUE);	/* We've unblocked a process */
959 }
960