xref: /minix/minix/servers/vfs/main.c (revision 90b80121)
1 /*
2  * a loop that gets messages requesting work, carries out the work, and sends
3  * replies.
4  *
5  * The entry points into this file are:
6  *   main:	main program of the Virtual File System
7  *   reply:	send a reply to a process after the requested work is done
8  *
9  */
10 
11 #include "fs.h"
12 #include <fcntl.h>
13 #include <string.h>
14 #include <stdio.h>
15 #include <signal.h>
16 #include <assert.h>
17 #include <stdlib.h>
18 #include <sys/ioc_memory.h>
19 #include <sys/svrctl.h>
20 #include <sys/select.h>
21 #include <minix/callnr.h>
22 #include <minix/com.h>
23 #include <minix/const.h>
24 #include <minix/endpoint.h>
25 #include <minix/safecopies.h>
26 #include <minix/debug.h>
27 #include <minix/vfsif.h>
28 #include "file.h"
29 #include "vmnt.h"
30 #include "vnode.h"
31 
32 #if ENABLE_SYSCALL_STATS
33 EXTERN unsigned long calls_stats[NR_VFS_CALLS];
34 #endif
35 
36 /* Thread related prototypes */
37 static void do_reply(struct worker_thread *wp);
38 static void do_work(void);
39 static void do_init_root(void);
40 static void handle_work(void (*func)(void));
41 
42 static int get_work(void);
43 static void service_pm(void);
44 static int unblock(struct fproc *rfp);
45 
46 /* SEF functions and variables. */
47 static void sef_local_startup(void);
48 static int sef_cb_init_fresh(int type, sef_init_info_t *info);
49 static int sef_cb_init_lu(int type, sef_init_info_t *info);
50 
51 /*===========================================================================*
52  *				main					     *
53  *===========================================================================*/
54 int main(void)
55 {
56 /* This is the main program of the file system.  The main loop consists of
57  * three major activities: getting new work, processing the work, and sending
58  * the reply.  This loop never terminates as long as the file system runs.
59  */
60   int transid;
61   struct worker_thread *wp;
62 
63   /* SEF local startup. */
64   sef_local_startup();
65 
66   printf("Started VFS: %d worker thread(s)\n", NR_WTHREADS);
67 
68   /* This is the main loop that gets work, processes it, and sends replies. */
69   while (TRUE) {
70 	worker_yield();	/* let other threads run */
71 
72 	send_work();
73 
74 	/* The get_work() function returns TRUE if we have a new message to
75 	 * process. It returns FALSE if it spawned other thread activities.
76 	 */
77 	if (!get_work())
78 		continue;
79 
80 	transid = TRNS_GET_ID(m_in.m_type);
81 	if (IS_VFS_FS_TRANSID(transid)) {
82 		wp = worker_get((thread_t) transid - VFS_TRANSID);
83 		if (wp == NULL || wp->w_fp == NULL) {
84 			printf("VFS: spurious message %d from endpoint %d\n",
85 				m_in.m_type, m_in.m_source);
86 			continue;
87 		}
88 		m_in.m_type = TRNS_DEL_ID(m_in.m_type);
89 		do_reply(wp);
90 		continue;
91 	} else if (who_e == PM_PROC_NR) { /* Calls from PM */
92 		/* Special control messages from PM */
93 		service_pm();
94 		continue;
95 	} else if (is_notify(call_nr)) {
96 		/* A task ipc_notify()ed us */
97 		switch (who_e) {
98 		case DS_PROC_NR:
99 			/* Start a thread to handle DS events, if no thread
100 			 * is pending or active for it already. DS is not
101 			 * supposed to issue calls to VFS or be the subject of
102 			 * postponed PM requests, so this should be no problem.
103 			 */
104 			if (worker_can_start(fp))
105 				handle_work(ds_event);
106 			break;
107 		case KERNEL:
108 			mthread_stacktraces();
109 			break;
110 		case CLOCK:
111 			/* Timer expired. Used only for select(). Check it. */
112 			expire_timers(m_in.m_notify.timestamp);
113 			break;
114 		default:
115 			printf("VFS: ignoring notification from %d\n", who_e);
116 		}
117 		continue;
118 	} else if (who_p < 0) { /* i.e., message comes from a task */
119 		/* We're going to ignore this message. Tasks should
120 		 * send ipc_notify()s only.
121 		 */
122 		 printf("VFS: ignoring message from %d (%d)\n", who_e, call_nr);
123 		 continue;
124 	}
125 
126 	if (IS_BDEV_RS(call_nr)) {
127 		/* We've got results for a block device request. */
128 		bdev_reply();
129 	} else if (IS_CDEV_RS(call_nr)) {
130 		/* We've got results for a character device request. */
131 		cdev_reply();
132 	} else if (IS_SDEV_RS(call_nr)) {
133 		/* We've got results for a socket driver request. */
134 		sdev_reply();
135 	} else {
136 		/* Normal syscall. This spawns a new thread. */
137 		handle_work(do_work);
138 	}
139   }
140   return(OK);				/* shouldn't come here */
141 }
142 
143 /*===========================================================================*
144  *			       handle_work				     *
145  *===========================================================================*/
146 static void handle_work(void (*func)(void))
147 {
148 /* Handle asynchronous device replies and new system calls. If the originating
149  * endpoint is an FS endpoint, take extra care not to get in deadlock. */
150   struct vmnt *vmp = NULL;
151   endpoint_t proc_e;
152   int use_spare = FALSE;
153 
154   proc_e = m_in.m_source;
155 
156   if (fp->fp_flags & FP_SRV_PROC) {
157 	vmp = find_vmnt(proc_e);
158 	if (vmp != NULL) {
159 		/* A callback from an FS endpoint. Can do only one at once. */
160 		if (vmp->m_flags & VMNT_CALLBACK) {
161 			replycode(proc_e, EAGAIN);
162 			return;
163 		}
164 		/* Already trying to resolve a deadlock? Can't handle more. */
165 		if (worker_available() == 0) {
166 			replycode(proc_e, EAGAIN);
167 			return;
168 		}
169 		/* A thread is available. Set callback flag. */
170 		vmp->m_flags |= VMNT_CALLBACK;
171 		if (vmp->m_flags & VMNT_MOUNTING) {
172 			vmp->m_flags |= VMNT_FORCEROOTBSF;
173 		}
174 	}
175 
176 	/* Use the spare thread to handle this request if needed. */
177 	use_spare = TRUE;
178   }
179 
180   worker_start(fp, func, &m_in, use_spare);
181 }
182 
183 
184 /*===========================================================================*
185  *			       do_reply				             *
186  *===========================================================================*/
187 static void do_reply(struct worker_thread *wp)
188 {
189   struct vmnt *vmp = NULL;
190 
191   if(who_e != VM_PROC_NR && (vmp = find_vmnt(who_e)) == NULL)
192 	panic("Couldn't find vmnt for endpoint %d", who_e);
193 
194   if (wp->w_task != who_e) {
195 	printf("VFS: tid %d: expected %d to reply, not %d\n",
196 		wp->w_tid, wp->w_task, who_e);
197 	return;
198   }
199   /* It should be impossible to trigger the following case, but it is here for
200    * consistency reasons: worker_stop() resets w_sendrec but not w_task.
201    */
202   if (wp->w_sendrec == NULL) {
203 	printf("VFS: tid %d: late reply from %d ignored\n", wp->w_tid, who_e);
204 	return;
205   }
206   *wp->w_sendrec = m_in;
207   wp->w_sendrec = NULL;
208   wp->w_task = NONE;
209   if(vmp) vmp->m_comm.c_cur_reqs--; /* We've got our reply, make room for others */
210   worker_signal(wp); /* Continue this thread */
211 }
212 
213 /*===========================================================================*
214  *			       do_pending_pipe				     *
215  *===========================================================================*/
216 static void do_pending_pipe(void)
217 {
218   vir_bytes buf;
219   size_t nbytes, cum_io;
220   int r, op, fd;
221   struct filp *f;
222   tll_access_t locktype;
223 
224   assert(fp->fp_blocked_on == FP_BLOCKED_ON_NONE);
225 
226   /*
227    * We take all our needed resumption state from the m_in message, which is
228    * filled by unblock().  Since this is an internal resumption, there is no
229    * need to perform extensive checks on the message fields.
230    */
231   fd = job_m_in.m_lc_vfs_readwrite.fd;
232   buf = job_m_in.m_lc_vfs_readwrite.buf;
233   nbytes = job_m_in.m_lc_vfs_readwrite.len;
234   cum_io = job_m_in.m_lc_vfs_readwrite.cum_io;
235 
236   f = fp->fp_filp[fd];
237   assert(f != NULL);
238 
239   locktype = (job_call_nr == VFS_READ) ? VNODE_READ : VNODE_WRITE;
240   op = (job_call_nr == VFS_READ) ? READING : WRITING;
241   lock_filp(f, locktype);
242 
243   r = rw_pipe(op, who_e, f, job_call_nr, fd, buf, nbytes, cum_io);
244 
245   if (r != SUSPEND) { /* Do we have results to report? */
246 	/* Process is writing, but there is no reader. Send a SIGPIPE signal.
247 	 * This should match the corresponding code in read_write().
248 	 */
249 	if (r == EPIPE && op == WRITING) {
250 		if (!(f->filp_flags & O_NOSIGPIPE))
251 			sys_kill(fp->fp_endpoint, SIGPIPE);
252 	}
253 
254 	replycode(fp->fp_endpoint, r);
255   }
256 
257   unlock_filp(f);
258 }
259 
260 /*===========================================================================*
261  *			       do_work					     *
262  *===========================================================================*/
263 static void do_work(void)
264 {
265   unsigned int call_index;
266   int error;
267 
268   if (fp->fp_pid == PID_FREE) {
269 	/* Process vanished before we were able to handle request.
270 	 * Replying has no use. Just drop it.
271 	 */
272 	return;
273   }
274 
275   memset(&job_m_out, 0, sizeof(job_m_out));
276 
277   /* At this point we assume that we're dealing with a call that has been
278    * made specifically to VFS. Typically it will be a POSIX call from a
279    * normal process, but we also handle a few calls made by drivers such
280    * such as UDS and VND through here. Call the internal function that
281    * does the work.
282    */
283   if (IS_VFS_CALL(job_call_nr)) {
284 	call_index = (unsigned int) (job_call_nr - VFS_BASE);
285 
286 	if (call_index < NR_VFS_CALLS && call_vec[call_index] != NULL) {
287 #if ENABLE_SYSCALL_STATS
288 		calls_stats[call_index]++;
289 #endif
290 		error = (*call_vec[call_index])();
291 	} else
292 		error = ENOSYS;
293   } else
294 	error = ENOSYS;
295 
296   /* Copy the results back to the user and send reply. */
297   if (error != SUSPEND) reply(&job_m_out, fp->fp_endpoint, error);
298 }
299 
300 /*===========================================================================*
301  *				sef_cb_lu_prepare			     *
302  *===========================================================================*/
303 static int sef_cb_lu_prepare(int state)
304 {
305 /* This function is called to decide whether we can enter the given live
306  * update state, and to prepare for such an update. If we are requested to
307  * update to a request-free or protocol-free state, make sure there is no work
308  * pending or being processed, and shut down all worker threads.
309  */
310 
311   switch (state) {
312   case SEF_LU_STATE_REQUEST_FREE:
313   case SEF_LU_STATE_PROTOCOL_FREE:
314 	if (!worker_idle()) {
315 		printf("VFS: worker threads not idle, blocking update\n");
316 		break;
317 	}
318 
319 	worker_cleanup();
320 
321 	return OK;
322   }
323 
324   return ENOTREADY;
325 }
326 
327 /*===========================================================================*
328  *			       sef_cb_lu_state_changed			     *
329  *===========================================================================*/
330 static void sef_cb_lu_state_changed(int old_state, int state)
331 {
332 /* Worker threads (especially their stacks) pose a serious problem for state
333  * transfer during live update, and therefore, we shut down all worker threads
334  * during live update and restart them afterwards. This function is called in
335  * the old VFS instance when the state changed. We use it to restart worker
336  * threads after a failed live update.
337  */
338 
339   if (state != SEF_LU_STATE_NULL)
340 	return;
341 
342   switch (old_state) {
343   case SEF_LU_STATE_REQUEST_FREE:
344   case SEF_LU_STATE_PROTOCOL_FREE:
345 	worker_init();
346   }
347 }
348 
349 /*===========================================================================*
350  *				sef_cb_init_lu				     *
351  *===========================================================================*/
352 static int sef_cb_init_lu(int type, sef_init_info_t *info)
353 {
354 /* This function is called in the new VFS instance during a live update. */
355   int r;
356 
357   /* Perform regular state transfer. */
358   if ((r = SEF_CB_INIT_LU_DEFAULT(type, info)) != OK)
359 	return r;
360 
361   /* Recreate worker threads, if necessary. */
362   switch (info->prepare_state) {
363   case SEF_LU_STATE_REQUEST_FREE:
364   case SEF_LU_STATE_PROTOCOL_FREE:
365 	worker_init();
366   }
367 
368   return OK;
369 }
370 
371 /*===========================================================================*
372  *			       sef_local_startup			     *
373  *===========================================================================*/
374 static void sef_local_startup(void)
375 {
376   /* Register init callbacks. */
377   sef_setcb_init_fresh(sef_cb_init_fresh);
378   sef_setcb_init_restart(SEF_CB_INIT_RESTART_STATEFUL);
379 
380   /* Register live update callbacks. */
381   sef_setcb_init_lu(sef_cb_init_lu);
382   sef_setcb_lu_prepare(sef_cb_lu_prepare);
383   sef_setcb_lu_state_changed(sef_cb_lu_state_changed);
384   sef_setcb_lu_state_isvalid(sef_cb_lu_state_isvalid_standard);
385 
386   /* Let SEF perform startup. */
387   sef_startup();
388 }
389 
390 /*===========================================================================*
391  *				sef_cb_init_fresh			     *
392  *===========================================================================*/
393 static int sef_cb_init_fresh(int UNUSED(type), sef_init_info_t *info)
394 {
395 /* Initialize the virtual file server. */
396   int s, i;
397   struct fproc *rfp;
398   message mess;
399   struct rprocpub rprocpub[NR_BOOT_PROCS];
400 
401   self = NULL;
402   verbose = 0;
403 
404   /* Initialize proc endpoints to NONE */
405   for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
406 	rfp->fp_endpoint = NONE;
407 	rfp->fp_pid = PID_FREE;
408   }
409 
410   /* Initialize the process table with help of the process manager messages.
411    * Expect one message for each system process with its slot number and pid.
412    * When no more processes follow, the magic process number NONE is sent.
413    * Then, stop and synchronize with the PM.
414    */
415   do {
416 	if ((s = sef_receive(PM_PROC_NR, &mess)) != OK)
417 		panic("VFS: couldn't receive from PM: %d", s);
418 
419 	if (mess.m_type != VFS_PM_INIT)
420 		panic("unexpected message from PM: %d", mess.m_type);
421 
422 	if (NONE == mess.VFS_PM_ENDPT) break;
423 
424 	rfp = &fproc[mess.VFS_PM_SLOT];
425 	rfp->fp_flags = FP_NOFLAGS;
426 	rfp->fp_pid = mess.VFS_PM_PID;
427 	rfp->fp_endpoint = mess.VFS_PM_ENDPT;
428 	rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
429 	rfp->fp_realuid = (uid_t) SYS_UID;
430 	rfp->fp_effuid = (uid_t) SYS_UID;
431 	rfp->fp_realgid = (gid_t) SYS_GID;
432 	rfp->fp_effgid = (gid_t) SYS_GID;
433 	rfp->fp_umask = ~0;
434   } while (TRUE);			/* continue until process NONE */
435   mess.m_type = OK;			/* tell PM that we succeeded */
436   s = ipc_send(PM_PROC_NR, &mess);		/* send synchronization message */
437 
438   system_hz = sys_hz();
439 
440   /* Subscribe to block and character driver events. */
441   s = ds_subscribe("drv\\.[bc]..\\..*", DSF_INITIAL | DSF_OVERWRITE);
442   if (s != OK) panic("VFS: can't subscribe to driver events (%d)", s);
443 
444   /* Initialize worker threads */
445   worker_init();
446 
447   /* Initialize global locks */
448   if (mthread_mutex_init(&bsf_lock, NULL) != 0)
449 	panic("VFS: couldn't initialize block special file lock");
450 
451   init_dmap();			/* Initialize device table. */
452   init_smap();			/* Initialize socket table. */
453 
454   /* Map all the services in the boot image. */
455   if ((s = sys_safecopyfrom(RS_PROC_NR, info->rproctab_gid, 0,
456 			    (vir_bytes) rprocpub, sizeof(rprocpub))) != OK){
457 	panic("sys_safecopyfrom failed: %d", s);
458   }
459   for (i = 0; i < NR_BOOT_PROCS; i++) {
460 	if (rprocpub[i].in_use) {
461 		if ((s = map_service(&rprocpub[i])) != OK) {
462 			panic("VFS: unable to map service: %d", s);
463 		}
464 	}
465   }
466 
467   /* Initialize locks and initial values for all processes. */
468   for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
469 	if (mutex_init(&rfp->fp_lock, NULL) != 0)
470 		panic("unable to initialize fproc lock");
471 	rfp->fp_worker = NULL;
472 #if LOCK_DEBUG
473 	rfp->fp_vp_rdlocks = 0;
474 	rfp->fp_vmnt_rdlocks = 0;
475 #endif
476 
477 	/* Initialize process directories. mount_fs will set them to the
478 	 * correct values.
479 	 */
480 	for (i = 0; i < OPEN_MAX; i++)
481 		rfp->fp_filp[i] = NULL;
482 	rfp->fp_rd = NULL;
483 	rfp->fp_wd = NULL;
484   }
485 
486   init_vnodes();		/* init vnodes */
487   init_vmnts();			/* init vmnt structures */
488   init_select();		/* init select() structures */
489   init_filps();			/* Init filp structures */
490 
491   /* Mount PFS and initial file system root. */
492   worker_start(fproc_addr(VFS_PROC_NR), do_init_root, &mess /*unused*/,
493 	FALSE /*use_spare*/);
494 
495   return(OK);
496 }
497 
498 /*===========================================================================*
499  *			       do_init_root				     *
500  *===========================================================================*/
501 static void do_init_root(void)
502 {
503   char *mount_type, *mount_label;
504   int r;
505 
506   /* Disallow requests from e.g. init(8) while doing the initial mounting. */
507   worker_allow(FALSE);
508 
509   /* Mount the pipe file server. */
510   mount_pfs();
511 
512   /* Mount the root file system. */
513   mount_type = "mfs";       /* FIXME: use boot image process name instead */
514   mount_label = "fs_imgrd"; /* FIXME: obtain this from RS */
515 
516   r = mount_fs(DEV_IMGRD, "bootramdisk", "/", MFS_PROC_NR, 0, mount_type,
517 	mount_label);
518   if (r != OK)
519 	panic("Failed to initialize root");
520 
521   /* All done with mounting, allow requests now. */
522   worker_allow(TRUE);
523 }
524 
525 /*===========================================================================*
526  *				lock_proc				     *
527  *===========================================================================*/
528 void lock_proc(struct fproc *rfp)
529 {
530   int r;
531   struct worker_thread *org_self;
532 
533   r = mutex_trylock(&rfp->fp_lock);
534   if (r == 0) return;
535 
536   org_self = worker_suspend();
537 
538   if ((r = mutex_lock(&rfp->fp_lock)) != 0)
539 	panic("unable to lock fproc lock: %d", r);
540 
541   worker_resume(org_self);
542 }
543 
544 /*===========================================================================*
545  *				unlock_proc				     *
546  *===========================================================================*/
547 void unlock_proc(struct fproc *rfp)
548 {
549   int r;
550 
551   if ((r = mutex_unlock(&rfp->fp_lock)) != 0)
552 	panic("Failed to unlock: %d", r);
553 }
554 
555 /*===========================================================================*
556  *				thread_cleanup				     *
557  *===========================================================================*/
558 void thread_cleanup(void)
559 {
560 /* Perform cleanup actions for a worker thread. */
561 
562 #if LOCK_DEBUG
563   check_filp_locks_by_me();
564   check_vnode_locks_by_me(fp);
565   check_vmnt_locks_by_me(fp);
566 #endif
567 
568   if (fp->fp_flags & FP_SRV_PROC) {
569 	struct vmnt *vmp;
570 
571 	if ((vmp = find_vmnt(fp->fp_endpoint)) != NULL) {
572 		vmp->m_flags &= ~VMNT_CALLBACK;
573 	}
574   }
575 }
576 
577 /*===========================================================================*
578  *				get_work				     *
579  *===========================================================================*/
580 static int get_work(void)
581 {
582   /* Normally wait for new input.  However, if 'reviving' is nonzero, a
583    * suspended process must be awakened.  Return TRUE if there is a message to
584    * process (usually newly received, but possibly a resumed request), or FALSE
585    * if a thread for other activities has been spawned instead.
586    */
587   int r, proc_p;
588   register struct fproc *rp;
589 
590   if (reviving != 0) {
591 	/* Find a suspended process. */
592 	for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++)
593 		if (rp->fp_pid != PID_FREE && (rp->fp_flags & FP_REVIVED))
594 			return unblock(rp); /* So main loop can process job */
595 
596 	panic("VFS: get_work couldn't revive anyone");
597   }
598 
599   for(;;) {
600 	/* Normal case.  No one to revive. Get a useful request. */
601 	if ((r = sef_receive(ANY, &m_in)) != OK) {
602 		panic("VFS: sef_receive error: %d", r);
603 	}
604 
605 	proc_p = _ENDPOINT_P(m_in.m_source);
606 	if (proc_p < 0 || proc_p >= NR_PROCS) fp = NULL;
607 	else fp = &fproc[proc_p];
608 
609 	/* Negative who_p is never used to access the fproc array. Negative
610 	 * numbers (kernel tasks) are treated in a special way.
611 	 */
612 	if (fp && fp->fp_endpoint == NONE) {
613 		printf("VFS: ignoring request from %d: NONE endpoint %d (%d)\n",
614 			m_in.m_source, who_p, m_in.m_type);
615 		continue;
616 	}
617 
618 	/* Internal consistency check; our mental image of process numbers and
619 	 * endpoints must match with how the rest of the system thinks of them.
620 	 */
621 	if (fp && fp->fp_endpoint != who_e) {
622 		if (fproc[who_p].fp_endpoint == NONE)
623 			printf("slot unknown even\n");
624 
625 		panic("VFS: receive endpoint inconsistent (source %d, who_p "
626 			"%d, stored ep %d, who_e %d).\n", m_in.m_source, who_p,
627 			fproc[who_p].fp_endpoint, who_e);
628 	}
629 
630 	return TRUE;
631   }
632   /* NOTREACHED */
633 }
634 
635 /*===========================================================================*
636  *				reply					     *
637  *===========================================================================*/
638 void reply(message *m_out, endpoint_t whom, int result)
639 {
640 /* Send a reply to a user process.  If the send fails, just ignore it. */
641   int r;
642 
643   m_out->m_type = result;
644   r = ipc_sendnb(whom, m_out);
645   if (r != OK) {
646 	printf("VFS: %d couldn't send reply %d to %d: %d\n", mthread_self(),
647 		result, whom, r);
648 	util_stacktrace();
649   }
650 }
651 
652 /*===========================================================================*
653  *				replycode				     *
654  *===========================================================================*/
655 void replycode(endpoint_t whom, int result)
656 {
657 /* Send a reply to a user process.  If the send fails, just ignore it. */
658   message m_out;
659 
660   memset(&m_out, 0, sizeof(m_out));
661 
662   reply(&m_out, whom, result);
663 }
664 
665 /*===========================================================================*
666  *				service_pm_postponed			     *
667  *===========================================================================*/
668 void service_pm_postponed(void)
669 {
670   int r, term_signal;
671   vir_bytes core_path;
672   vir_bytes exec_path, stack_frame, pc, newsp, ps_str;
673   size_t exec_path_len, stack_frame_len;
674   endpoint_t proc_e;
675   message m_out;
676 
677   memset(&m_out, 0, sizeof(m_out));
678 
679   switch(job_call_nr) {
680   case VFS_PM_EXEC:
681 	proc_e = job_m_in.VFS_PM_ENDPT;
682 	exec_path = (vir_bytes) job_m_in.VFS_PM_PATH;
683 	exec_path_len = (size_t) job_m_in.VFS_PM_PATH_LEN;
684 	stack_frame = (vir_bytes) job_m_in.VFS_PM_FRAME;
685 	stack_frame_len = (size_t) job_m_in.VFS_PM_FRAME_LEN;
686 	ps_str = (vir_bytes) job_m_in.VFS_PM_PS_STR;
687 
688 	assert(proc_e == fp->fp_endpoint);
689 
690 	r = pm_exec(exec_path, exec_path_len, stack_frame, stack_frame_len,
691 		&pc, &newsp, &ps_str);
692 
693 	/* Reply status to PM */
694 	m_out.m_type = VFS_PM_EXEC_REPLY;
695 	m_out.VFS_PM_ENDPT = proc_e;
696 	m_out.VFS_PM_PC = (void *) pc;
697 	m_out.VFS_PM_STATUS = r;
698 	m_out.VFS_PM_NEWSP = (void *) newsp;
699 	m_out.VFS_PM_NEWPS_STR = ps_str;
700 
701 	break;
702 
703   case VFS_PM_EXIT:
704 	proc_e = job_m_in.VFS_PM_ENDPT;
705 
706 	assert(proc_e == fp->fp_endpoint);
707 
708 	pm_exit();
709 
710 	/* Reply dummy status to PM for synchronization */
711 	m_out.m_type = VFS_PM_EXIT_REPLY;
712 	m_out.VFS_PM_ENDPT = proc_e;
713 
714 	break;
715 
716   case VFS_PM_DUMPCORE:
717 	proc_e = job_m_in.VFS_PM_ENDPT;
718 	term_signal = job_m_in.VFS_PM_TERM_SIG;
719 	core_path = (vir_bytes) job_m_in.VFS_PM_PATH;
720 
721 	/* A zero signal used to indicate that a coredump should be generated
722 	 * without terminating the target process, but this was broken in so
723 	 * many ways that we no longer support this. Userland should implement
724 	 * this functionality itself, for example through ptrace(2).
725 	 */
726 	if (term_signal == 0)
727 		panic("no termination signal given for coredump!");
728 
729 	assert(proc_e == fp->fp_endpoint);
730 
731 	r = pm_dumpcore(term_signal, core_path);
732 
733 	/* Reply status to PM */
734 	m_out.m_type = VFS_PM_CORE_REPLY;
735 	m_out.VFS_PM_ENDPT = proc_e;
736 	m_out.VFS_PM_STATUS = r;
737 
738 	break;
739 
740   case VFS_PM_UNPAUSE:
741 	proc_e = job_m_in.VFS_PM_ENDPT;
742 
743 	assert(proc_e == fp->fp_endpoint);
744 
745 	unpause();
746 
747 	m_out.m_type = VFS_PM_UNPAUSE_REPLY;
748 	m_out.VFS_PM_ENDPT = proc_e;
749 
750 	break;
751 
752   default:
753 	panic("Unhandled postponed PM call %d", job_m_in.m_type);
754   }
755 
756   r = ipc_send(PM_PROC_NR, &m_out);
757   if (r != OK)
758 	panic("service_pm_postponed: ipc_send failed: %d", r);
759 }
760 
761 /*===========================================================================*
762  *				service_pm				     *
763  *===========================================================================*/
764 static void service_pm(void)
765 {
766 /* Process a request from PM. This function is called from the main thread, and
767  * may therefore not block. Any requests that may require blocking the calling
768  * thread must be executed in a separate thread. Aside from VFS_PM_REBOOT, all
769  * requests from PM involve another, target process: for example, PM tells VFS
770  * that a process is performing a setuid() call. For some requests however,
771  * that other process may not be idle, and in that case VFS must serialize the
772  * PM request handling with any operation is it handling for that target
773  * process. As it happens, the requests that may require blocking are also the
774  * ones where the target process may not be idle. For both these reasons, such
775  * requests are run in worker threads associated to the target process.
776  */
777   struct fproc *rfp;
778   int r, slot;
779   message m_out;
780 
781   memset(&m_out, 0, sizeof(m_out));
782 
783   switch (call_nr) {
784   case VFS_PM_SETUID:
785 	{
786 		endpoint_t proc_e;
787 		uid_t euid, ruid;
788 
789 		proc_e = m_in.VFS_PM_ENDPT;
790 		euid = m_in.VFS_PM_EID;
791 		ruid = m_in.VFS_PM_RID;
792 
793 		pm_setuid(proc_e, euid, ruid);
794 
795 		m_out.m_type = VFS_PM_SETUID_REPLY;
796 		m_out.VFS_PM_ENDPT = proc_e;
797 	}
798 	break;
799 
800   case VFS_PM_SETGID:
801 	{
802 		endpoint_t proc_e;
803 		gid_t egid, rgid;
804 
805 		proc_e = m_in.VFS_PM_ENDPT;
806 		egid = m_in.VFS_PM_EID;
807 		rgid = m_in.VFS_PM_RID;
808 
809 		pm_setgid(proc_e, egid, rgid);
810 
811 		m_out.m_type = VFS_PM_SETGID_REPLY;
812 		m_out.VFS_PM_ENDPT = proc_e;
813 	}
814 	break;
815 
816   case VFS_PM_SETSID:
817 	{
818 		endpoint_t proc_e;
819 
820 		proc_e = m_in.VFS_PM_ENDPT;
821 		pm_setsid(proc_e);
822 
823 		m_out.m_type = VFS_PM_SETSID_REPLY;
824 		m_out.VFS_PM_ENDPT = proc_e;
825 	}
826 	break;
827 
828   case VFS_PM_EXEC:
829   case VFS_PM_EXIT:
830   case VFS_PM_DUMPCORE:
831   case VFS_PM_UNPAUSE:
832 	{
833 		endpoint_t proc_e = m_in.VFS_PM_ENDPT;
834 
835 		if(isokendpt(proc_e, &slot) != OK) {
836 			printf("VFS: proc ep %d not ok\n", proc_e);
837 			return;
838 		}
839 
840 		rfp = &fproc[slot];
841 
842 		/* PM requests on behalf of a proc are handled after the
843 		 * system call that might be in progress for that proc has
844 		 * finished. If the proc is not busy, we start a new thread.
845 		 */
846 		worker_start(rfp, NULL, &m_in, FALSE /*use_spare*/);
847 
848 		return;
849 	}
850   case VFS_PM_FORK:
851   case VFS_PM_SRV_FORK:
852 	{
853 		endpoint_t pproc_e, proc_e;
854 		pid_t child_pid;
855 		uid_t reuid;
856 		gid_t regid;
857 
858 		pproc_e = m_in.VFS_PM_PENDPT;
859 		proc_e = m_in.VFS_PM_ENDPT;
860 		child_pid = m_in.VFS_PM_CPID;
861 		reuid = m_in.VFS_PM_REUID;
862 		regid = m_in.VFS_PM_REGID;
863 
864 		pm_fork(pproc_e, proc_e, child_pid);
865 		m_out.m_type = VFS_PM_FORK_REPLY;
866 
867 		if (call_nr == VFS_PM_SRV_FORK) {
868 			m_out.m_type = VFS_PM_SRV_FORK_REPLY;
869 			pm_setuid(proc_e, reuid, reuid);
870 			pm_setgid(proc_e, regid, regid);
871 		}
872 
873 		m_out.VFS_PM_ENDPT = proc_e;
874 	}
875 	break;
876   case VFS_PM_SETGROUPS:
877 	{
878 		endpoint_t proc_e;
879 		int group_no;
880 		gid_t *group_addr;
881 
882 		proc_e = m_in.VFS_PM_ENDPT;
883 		group_no = m_in.VFS_PM_GROUP_NO;
884 		group_addr = (gid_t *) m_in.VFS_PM_GROUP_ADDR;
885 
886 		pm_setgroups(proc_e, group_no, group_addr);
887 
888 		m_out.m_type = VFS_PM_SETGROUPS_REPLY;
889 		m_out.VFS_PM_ENDPT = proc_e;
890 	}
891 	break;
892 
893   case VFS_PM_REBOOT:
894 	/* Reboot requests are not considered postponed PM work and are instead
895 	 * handled from a separate worker thread that is associated with PM's
896 	 * process. PM makes no regular VFS calls, and thus, from VFS's
897 	 * perspective, PM is always idle. Therefore, we can safely do this.
898 	 * We do assume that PM sends us only one VFS_PM_REBOOT message at
899 	 * once, or ever for that matter. :)
900 	 */
901 	worker_start(fproc_addr(PM_PROC_NR), pm_reboot, &m_in,
902 		FALSE /*use_spare*/);
903 
904 	return;
905 
906     default:
907 	printf("VFS: don't know how to handle PM request %d\n", call_nr);
908 
909 	return;
910   }
911 
912   r = ipc_send(PM_PROC_NR, &m_out);
913   if (r != OK)
914 	panic("service_pm: ipc_send failed: %d", r);
915 }
916 
917 
918 /*===========================================================================*
919  *				unblock					     *
920  *===========================================================================*/
921 static int
922 unblock(struct fproc *rfp)
923 {
924 /* Unblock a process that was previously blocked on a pipe or a lock.  This is
925  * done by reconstructing the original request and continuing/repeating it.
926  * This function returns TRUE when it has restored a request for execution, and
927  * FALSE if the caller should continue looking for work to do.
928  */
929   int blocked_on;
930 
931   blocked_on = rfp->fp_blocked_on;
932 
933   /* Reconstruct the original request from the saved data. */
934   memset(&m_in, 0, sizeof(m_in));
935   m_in.m_source = rfp->fp_endpoint;
936   switch (blocked_on) {
937   case FP_BLOCKED_ON_PIPE:
938 	assert(rfp->fp_pipe.callnr == VFS_READ ||
939 	    rfp->fp_pipe.callnr == VFS_WRITE);
940 	m_in.m_type = rfp->fp_pipe.callnr;
941 	m_in.m_lc_vfs_readwrite.fd = rfp->fp_pipe.fd;
942 	m_in.m_lc_vfs_readwrite.buf = rfp->fp_pipe.buf;
943 	m_in.m_lc_vfs_readwrite.len = rfp->fp_pipe.nbytes;
944 	m_in.m_lc_vfs_readwrite.cum_io = rfp->fp_pipe.cum_io;
945 	break;
946   case FP_BLOCKED_ON_FLOCK:
947 	assert(rfp->fp_flock.cmd == F_SETLKW);
948 	m_in.m_type = VFS_FCNTL;
949 	m_in.m_lc_vfs_fcntl.fd = rfp->fp_flock.fd;
950 	m_in.m_lc_vfs_fcntl.cmd = rfp->fp_flock.cmd;
951 	m_in.m_lc_vfs_fcntl.arg_ptr = rfp->fp_flock.arg;
952 	break;
953   default:
954 	panic("unblocking call blocked on %d ??", blocked_on);
955   }
956 
957   rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;	/* no longer blocked */
958   rfp->fp_flags &= ~FP_REVIVED;
959   reviving--;
960   assert(reviving >= 0);
961 
962   /* Pending pipe reads/writes cannot be repeated as is, and thus require a
963    * special resumption procedure.
964    */
965   if (blocked_on == FP_BLOCKED_ON_PIPE) {
966 	worker_start(rfp, do_pending_pipe, &m_in, FALSE /*use_spare*/);
967 	return(FALSE);	/* Retrieve more work */
968   }
969 
970   /* A lock request. Repeat the original request as though it just came in. */
971   fp = rfp;
972   return(TRUE);	/* We've unblocked a process */
973 }
974