xref: /minix/minix/kernel/system.c (revision 03ac74ed)
1 /* This task handles the interface between the kernel and user-level servers.
2  * System services can be accessed by doing a system call. System calls are
3  * transformed into request messages, which are handled by this task. By
4  * convention, a sys_call() is transformed in a SYS_CALL request message that
5  * is handled in a function named do_call().
6  *
7  * A private call vector is used to map all system calls to the functions that
8  * handle them. The actual handler functions are contained in separate files
9  * to keep this file clean. The call vector is used in the system task's main
10  * loop to handle all incoming requests.
11  *
12  * In addition to the main sys_task() entry point, which starts the main loop,
13  * there are several other minor entry points:
14  *   get_priv:		assign privilege structure to user or system process
15  *   set_sendto_bit:	allow a process to send messages to a new target
16  *   unset_sendto_bit:	disallow a process from sending messages to a target
17  *   fill_sendto_mask:	fill the target mask of a given process
18  *   send_sig:		send a signal directly to a system process
19  *   cause_sig:		take action to cause a signal to occur via a signal mgr
20  *   sig_delay_done:	tell PM that a process is not sending
21  *   send_diag_sig:	send a diagnostics signal to interested processes
22  *   get_randomness:	accumulate randomness in a buffer
23  *   clear_endpoint:	remove a process' ability to send and receive messages
24  *   sched_proc:	schedule a process
25  *
26  * Changes:
27 *    Nov 22, 2009   get_priv supports static priv ids (Cristiano Giuffrida)
28  *   Aug 04, 2005   check if system call is allowed  (Jorrit N. Herder)
29  *   Jul 20, 2005   send signal to services with message  (Jorrit N. Herder)
30  *   Jan 15, 2005   new, generalized virtual copy function  (Jorrit N. Herder)
31  *   Oct 10, 2004   dispatch system calls from call vector  (Jorrit N. Herder)
32  *   Sep 30, 2004   source code documentation updated  (Jorrit N. Herder)
33  */
34 
35 #include "kernel/system.h"
36 #include "kernel/vm.h"
37 #include "kernel/clock.h"
38 #include <stdlib.h>
39 #include <stddef.h>
40 #include <assert.h>
41 #include <signal.h>
42 #include <unistd.h>
43 #include <minix/endpoint.h>
44 #include <minix/safecopies.h>
45 
46 /* Declaration of the call vector that defines the mapping of system calls
47  * to handler functions. The vector is initialized in sys_init() with map(),
48  * which makes sure the system call numbers are ok. No space is allocated,
49  * because the dummy is declared extern. If an illegal call is given, the
50  * array size will be negative and this won't compile.
51  */
52 static int (*call_vec[NR_SYS_CALLS])(struct proc * caller, message *m_ptr);
53 
54 #define map(call_nr, handler) 					\
55     {	int call_index = call_nr-KERNEL_CALL; 				\
56     	assert(call_index >= 0 && call_index < NR_SYS_CALLS);			\
57     call_vec[call_index] = (handler)  ; }
58 
59 static void kernel_call_finish(struct proc * caller, message *msg, int result)
60 {
61   if(result == VMSUSPEND) {
62 	  /* Special case: message has to be saved for handling
63 	   * until VM tells us it's allowed. VM has been notified
64 	   * and we must wait for its reply to restart the call.
65 	   */
66 	  assert(RTS_ISSET(caller, RTS_VMREQUEST));
67 	  assert(caller->p_vmrequest.type == VMSTYPE_KERNELCALL);
68 	  caller->p_vmrequest.saved.reqmsg = *msg;
69 	  caller->p_misc_flags |= MF_KCALL_RESUME;
70   } else {
71 	  /*
72 	   * call is finished, we could have been suspended because of VM,
73 	   * remove the request message
74 	   */
75 	  caller->p_vmrequest.saved.reqmsg.m_source = NONE;
76 	  if (result != EDONTREPLY) {
77 		  /* copy the result as a message to the original user buffer */
78 		  msg->m_source = SYSTEM;
79 		  msg->m_type = result;		/* report status of call */
80 #if DEBUG_IPC_HOOK
81 	hook_ipc_msgkresult(msg, caller);
82 #endif
83 		  if (copy_msg_to_user(msg, (message *)caller->p_delivermsg_vir)) {
84 			  printf("WARNING wrong user pointer 0x%08x from "
85 					  "process %s / %d\n",
86 					  caller->p_delivermsg_vir,
87 					  caller->p_name,
88 					  caller->p_endpoint);
89 			  cause_sig(proc_nr(caller), SIGSEGV);
90 		  }
91 	  }
92   }
93 }
94 
95 static int kernel_call_dispatch(struct proc * caller, message *msg)
96 {
97   int result = OK;
98   int call_nr;
99 
100 #if DEBUG_IPC_HOOK
101 	hook_ipc_msgkcall(msg, caller);
102 #endif
103   call_nr = msg->m_type - KERNEL_CALL;
104 
105   /* See if the caller made a valid request and try to handle it. */
106   if (call_nr < 0 || call_nr >= NR_SYS_CALLS) {	/* check call number */
107 	  printf("SYSTEM: illegal request %d from %d.\n",
108 			  call_nr,msg->m_source);
109 	  result = EBADREQUEST;			/* illegal message type */
110   }
111   else if (!GET_BIT(priv(caller)->s_k_call_mask, call_nr)) {
112 	  printf("SYSTEM: denied request %d from %d.\n",
113 			  call_nr,msg->m_source);
114 	  result = ECALLDENIED;			/* illegal message type */
115   } else {
116 	  /* handle the system call */
117 	  if (call_vec[call_nr])
118 		  result = (*call_vec[call_nr])(caller, msg);
119 	  else {
120 		  printf("Unused kernel call %d from %d\n",
121 				  call_nr, caller->p_endpoint);
122 		  result = EBADREQUEST;
123 	  }
124   }
125 
126   return result;
127 }
128 
129 /*===========================================================================*
130  *				kernel_call				     *
131  *===========================================================================*/
132 /*
133  * this function checks the basic syscall parameters and if accepted it
134  * dispatches its handling to the right handler
135  */
136 void kernel_call(message *m_user, struct proc * caller)
137 {
138   int result = OK;
139   message msg;
140 
141   caller->p_delivermsg_vir = (vir_bytes) m_user;
142   /*
143    * the ldt and cr3 of the caller process is loaded because it just've trapped
144    * into the kernel or was already set in switch_to_user() before we resume
145    * execution of an interrupted kernel call
146    */
147   if (copy_msg_from_user(m_user, &msg) == 0) {
148 	  msg.m_source = caller->p_endpoint;
149 	  result = kernel_call_dispatch(caller, &msg);
150   }
151   else {
152 	  printf("WARNING wrong user pointer 0x%08x from process %s / %d\n",
153 			  m_user, caller->p_name, caller->p_endpoint);
154 	  cause_sig(proc_nr(caller), SIGSEGV);
155 	  return;
156   }
157 
158 
159   /* remember who invoked the kcall so we can bill it its time */
160   kbill_kcall = caller;
161 
162   kernel_call_finish(caller, &msg, result);
163 }
164 
165 /*===========================================================================*
166  *				initialize				     *
167  *===========================================================================*/
168 void system_init(void)
169 {
170   register struct priv *sp;
171   int i;
172 
173   /* Initialize IRQ handler hooks. Mark all hooks available. */
174   for (i=0; i<NR_IRQ_HOOKS; i++) {
175       irq_hooks[i].proc_nr_e = NONE;
176   }
177 
178   /* Initialize all alarm timers for all processes. */
179   for (sp=BEG_PRIV_ADDR; sp < END_PRIV_ADDR; sp++) {
180     tmr_inittimer(&(sp->s_alarm_timer));
181   }
182 
183   /* Initialize the call vector to a safe default handler. Some system calls
184    * may be disabled or nonexistant. Then explicitly map known calls to their
185    * handler functions. This is done with a macro that gives a compile error
186    * if an illegal call number is used. The ordering is not important here.
187    */
188   for (i=0; i<NR_SYS_CALLS; i++) {
189       call_vec[i] = NULL;
190   }
191 
192   /* Process management. */
193   map(SYS_FORK, do_fork); 		/* a process forked a new process */
194   map(SYS_EXEC, do_exec);		/* update process after execute */
195   map(SYS_CLEAR, do_clear);		/* clean up after process exit */
196   map(SYS_EXIT, do_exit);		/* a system process wants to exit */
197   map(SYS_PRIVCTL, do_privctl);		/* system privileges control */
198   map(SYS_TRACE, do_trace);		/* request a trace operation */
199   map(SYS_SETGRANT, do_setgrant);	/* get/set own parameters */
200   map(SYS_RUNCTL, do_runctl);		/* set/clear stop flag of a process */
201   map(SYS_UPDATE, do_update);		/* update a process into another */
202   map(SYS_STATECTL, do_statectl);	/* let a process control its state */
203 
204   /* Signal handling. */
205   map(SYS_KILL, do_kill); 		/* cause a process to be signaled */
206   map(SYS_GETKSIG, do_getksig);		/* signal manager checks for signals */
207   map(SYS_ENDKSIG, do_endksig);		/* signal manager finished signal */
208   map(SYS_SIGSEND, do_sigsend);		/* start POSIX-style signal */
209   map(SYS_SIGRETURN, do_sigreturn);	/* return from POSIX-style signal */
210 
211   /* Device I/O. */
212   map(SYS_IRQCTL, do_irqctl);  		/* interrupt control operations */
213 #if defined(__i386__)
214   map(SYS_DEVIO, do_devio);   		/* inb, inw, inl, outb, outw, outl */
215   map(SYS_VDEVIO, do_vdevio);  		/* vector with devio requests */
216 #endif
217 
218   /* Memory management. */
219   map(SYS_MEMSET, do_memset);		/* write char to memory area */
220   map(SYS_VMCTL, do_vmctl);		/* various VM process settings */
221 
222   /* Copying. */
223   map(SYS_UMAP, do_umap);		/* map virtual to physical address */
224   map(SYS_UMAP_REMOTE, do_umap_remote);	/* do_umap for non-caller process */
225   map(SYS_VUMAP, do_vumap);		/* vectored virtual to physical map */
226   map(SYS_VIRCOPY, do_vircopy); 	/* use pure virtual addressing */
227   map(SYS_PHYSCOPY, do_copy);	 	/* use physical addressing */
228   map(SYS_SAFECOPYFROM, do_safecopy_from);/* copy with pre-granted permission */
229   map(SYS_SAFECOPYTO, do_safecopy_to);	/* copy with pre-granted permission */
230   map(SYS_VSAFECOPY, do_vsafecopy);	/* vectored safecopy */
231 
232   /* safe memset */
233   map(SYS_SAFEMEMSET, do_safememset);	/* safememset */
234 
235   /* Clock functionality. */
236   map(SYS_TIMES, do_times);		/* get uptime and process times */
237   map(SYS_SETALARM, do_setalarm);	/* schedule a synchronous alarm */
238   map(SYS_STIME, do_stime);		/* set the boottime */
239   map(SYS_SETTIME, do_settime);		/* set the system time (realtime) */
240   map(SYS_VTIMER, do_vtimer);		/* set or retrieve a virtual timer */
241 
242   /* System control. */
243   map(SYS_ABORT, do_abort);		/* abort MINIX */
244   map(SYS_GETINFO, do_getinfo); 	/* request system information */
245   map(SYS_DIAGCTL, do_diagctl);		/* diagnostics-related functionality */
246 
247   /* Profiling. */
248   map(SYS_SPROF, do_sprofile);         /* start/stop statistical profiling */
249 
250   /* arm-specific. */
251 #if defined(__arm__)
252   map(SYS_PADCONF, do_padconf);		/* configure pinmux */
253 #endif
254 
255   /* i386-specific. */
256 #if defined(__i386__)
257   map(SYS_READBIOS, do_readbios);	/* read from BIOS locations */
258   map(SYS_IOPENABLE, do_iopenable); 	/* Enable I/O */
259   map(SYS_SDEVIO, do_sdevio);		/* phys_insb, _insw, _outsb, _outsw */
260 #endif
261 
262   /* Machine state switching. */
263   map(SYS_SETMCONTEXT, do_setmcontext); /* set machine context */
264   map(SYS_GETMCONTEXT, do_getmcontext); /* get machine context */
265 
266   /* Scheduling */
267   map(SYS_SCHEDULE, do_schedule);	/* reschedule a process */
268   map(SYS_SCHEDCTL, do_schedctl);	/* change process scheduler */
269 
270 }
271 /*===========================================================================*
272  *				get_priv				     *
273  *===========================================================================*/
274 int get_priv(
275   register struct proc *rc,		/* new (child) process pointer */
276   int priv_id				/* privilege id */
277 )
278 {
279 /* Allocate a new privilege structure for a system process. Privilege ids
280  * can be assigned either statically or dynamically.
281  */
282   register struct priv *sp;                 /* privilege structure */
283 
284   if(priv_id == NULL_PRIV_ID) {             /* allocate slot dynamically */
285       for (sp = BEG_DYN_PRIV_ADDR; sp < END_DYN_PRIV_ADDR; ++sp)
286           if (sp->s_proc_nr == NONE) break;
287       if (sp >= END_DYN_PRIV_ADDR) return(ENOSPC);
288   }
289   else {                                    /* allocate slot from id */
290       if(!is_static_priv_id(priv_id)) {
291           return EINVAL;                    /* invalid static priv id */
292       }
293       if(priv[priv_id].s_proc_nr != NONE) {
294           return EBUSY;                     /* slot already in use */
295       }
296       sp = &priv[priv_id];
297   }
298   rc->p_priv = sp;			    /* assign new slot */
299   rc->p_priv->s_proc_nr = proc_nr(rc);	    /* set association */
300 
301   return(OK);
302 }
303 
304 /*===========================================================================*
305  *				set_sendto_bit				     *
306  *===========================================================================*/
307 void set_sendto_bit(const struct proc *rp, int id)
308 {
309 /* Allow a process to send messages to the process(es) associated with the
310  * system privilege structure with the given ID.
311  */
312 
313   /* Disallow the process from sending to a process privilege structure with no
314    * associated process, and disallow the process from sending to itself.
315    */
316   if (id_to_nr(id) == NONE || priv_id(rp) == id) {
317 	unset_sys_bit(priv(rp)->s_ipc_to, id);
318 	return;
319   }
320 
321   set_sys_bit(priv(rp)->s_ipc_to, id);
322 
323   /* The process that this process can now send to, must be able to reply (or
324    * vice versa). Therefore, its send mask should be updated as well. Ignore
325    * receivers that don't support traps other than RECEIVE, they can't reply
326    * or send messages anyway.
327    */
328   if (priv_addr(id)->s_trap_mask & ~((1 << RECEIVE)))
329       set_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
330 }
331 
332 /*===========================================================================*
333  *				unset_sendto_bit			     *
334  *===========================================================================*/
335 void unset_sendto_bit(const struct proc *rp, int id)
336 {
337 /* Prevent a process from sending to another process. Retain the send mask
338  * symmetry by also unsetting the bit for the other direction.
339  */
340 
341   unset_sys_bit(priv(rp)->s_ipc_to, id);
342 
343   unset_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
344 }
345 
346 /*===========================================================================*
347  *			      fill_sendto_mask				     *
348  *===========================================================================*/
349 void fill_sendto_mask(const struct proc *rp, sys_map_t *map)
350 {
351   int i;
352 
353   for (i=0; i < NR_SYS_PROCS; i++) {
354   	if (get_sys_bit(*map, i))
355   		set_sendto_bit(rp, i);
356   	else
357   		unset_sendto_bit(rp, i);
358   }
359 }
360 
361 /*===========================================================================*
362  *				send_sig				     *
363  *===========================================================================*/
364 int send_sig(endpoint_t ep, int sig_nr)
365 {
366 /* Notify a system process about a signal. This is straightforward. Simply
367  * set the signal that is to be delivered in the pending signals map and
368  * send a notification with source SYSTEM.
369  */
370   register struct proc *rp;
371   struct priv *priv;
372   int proc_nr;
373 
374   if(!isokendpt(ep, &proc_nr) || isemptyn(proc_nr))
375 	return EINVAL;
376 
377   rp = proc_addr(proc_nr);
378   priv = priv(rp);
379   if(!priv) return ENOENT;
380   sigaddset(&priv->s_sig_pending, sig_nr);
381   mini_notify(proc_addr(SYSTEM), rp->p_endpoint);
382 
383   return OK;
384 }
385 
386 /*===========================================================================*
387  *				cause_sig				     *
388  *===========================================================================*/
389 void cause_sig(proc_nr_t proc_nr, int sig_nr)
390 {
391 /* A system process wants to send signal 'sig_nr' to process 'proc_nr'.
392  * Examples are:
393  *  - HARDWARE wanting to cause a SIGSEGV after a CPU exception
394  *  - TTY wanting to cause SIGINT upon getting a DEL
395  *  - FS wanting to cause SIGPIPE for a broken pipe
396  * Signals are handled by sending a message to the signal manager assigned to
397  * the process. This function handles the signals and makes sure the signal
398  * manager gets them by sending a notification. The process being signaled
399  * is blocked while the signal manager has not finished all signals for it.
400  * Race conditions between calls to this function and the system calls that
401  * process pending kernel signals cannot exist. Signal related functions are
402  * only called when a user process causes a CPU exception and from the kernel
403  * process level, which runs to completion.
404  */
405   register struct proc *rp, *sig_mgr_rp;
406   endpoint_t sig_mgr;
407   int sig_mgr_proc_nr;
408   int s;
409 
410   /* Lookup signal manager. */
411   rp = proc_addr(proc_nr);
412   sig_mgr = priv(rp)->s_sig_mgr;
413   if(sig_mgr == SELF) sig_mgr = rp->p_endpoint;
414 
415   /* If the target is the signal manager of itself, send the signal directly. */
416   if(rp->p_endpoint == sig_mgr) {
417        if(SIGS_IS_LETHAL(sig_nr)) {
418            /* If the signal is lethal, see if a backup signal manager exists. */
419            sig_mgr = priv(rp)->s_bak_sig_mgr;
420            if(sig_mgr != NONE && isokendpt(sig_mgr, &sig_mgr_proc_nr)) {
421                priv(rp)->s_sig_mgr = sig_mgr;
422                priv(rp)->s_bak_sig_mgr = NONE;
423                sig_mgr_rp = proc_addr(sig_mgr_proc_nr);
424                RTS_UNSET(sig_mgr_rp, RTS_NO_PRIV);
425                cause_sig(proc_nr, sig_nr); /* try again with the new sig mgr. */
426                return;
427            }
428            /* We are out of luck. Time to panic. */
429            proc_stacktrace(rp);
430            panic("cause_sig: sig manager %d gets lethal signal %d for itself",
431 	   	rp->p_endpoint, sig_nr);
432        }
433        sigaddset(&priv(rp)->s_sig_pending, sig_nr);
434        if(OK != send_sig(rp->p_endpoint, SIGKSIGSM))
435        	panic("send_sig failed");
436        return;
437   }
438 
439   s = sigismember(&rp->p_pending, sig_nr);
440   /* Check if the signal is already pending. Process it otherwise. */
441   if (!s) {
442       sigaddset(&rp->p_pending, sig_nr);
443       if (! (RTS_ISSET(rp, RTS_SIGNALED))) {		/* other pending */
444 	  RTS_SET(rp, RTS_SIGNALED | RTS_SIG_PENDING);
445           if(OK != send_sig(sig_mgr, SIGKSIG))
446 	  	panic("send_sig failed");
447       }
448   }
449 }
450 
451 /*===========================================================================*
452  *				sig_delay_done				     *
453  *===========================================================================*/
454 void sig_delay_done(struct proc *rp)
455 {
456 /* A process is now known not to send any direct messages.
457  * Tell PM that the stop delay has ended, by sending a signal to the process.
458  * Used for actual signal delivery.
459  */
460 
461   rp->p_misc_flags &= ~MF_SIG_DELAY;
462 
463   cause_sig(proc_nr(rp), SIGSNDELAY);
464 }
465 
466 /*===========================================================================*
467  *				send_diag_sig				     *
468  *===========================================================================*/
469 void send_diag_sig(void)
470 {
471 /* Send a SIGKMESS signal to all processes in receiving updates about new
472  * diagnostics messages.
473  */
474   struct priv *privp;
475   endpoint_t ep;
476 
477   for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; privp++) {
478 	if (privp->s_proc_nr != NONE && privp->s_diag_sig == TRUE) {
479 		ep = proc_addr(privp->s_proc_nr)->p_endpoint;
480 		send_sig(ep, SIGKMESS);
481 	}
482   }
483 }
484 
485 /*===========================================================================*
486  *			         clear_memreq				     *
487  *===========================================================================*/
488 static void clear_memreq(struct proc *rp)
489 {
490   struct proc **rpp;
491 
492   if (!RTS_ISSET(rp, RTS_VMREQUEST))
493 	return; /* nothing to do */
494 
495   for (rpp = &vmrequest; *rpp != NULL;
496      rpp = &(*rpp)->p_vmrequest.nextrequestor) {
497 	if (*rpp == rp) {
498 		*rpp = rp->p_vmrequest.nextrequestor;
499 		break;
500 	}
501   }
502 
503   RTS_UNSET(rp, RTS_VMREQUEST);
504 }
505 
506 /*===========================================================================*
507  *			         clear_ipc				     *
508  *===========================================================================*/
509 static void clear_ipc(
510   register struct proc *rc	/* slot of process to clean up */
511 )
512 {
513 /* Clear IPC data for a given process slot. */
514   struct proc **xpp;			/* iterate over caller queue */
515 
516   if (RTS_ISSET(rc, RTS_SENDING)) {
517       int target_proc;
518 
519       okendpt(rc->p_sendto_e, &target_proc);
520       xpp = &proc_addr(target_proc)->p_caller_q; /* destination's queue */
521       while (*xpp) {		/* check entire queue */
522           if (*xpp == rc) {			/* process is on the queue */
523               *xpp = (*xpp)->p_q_link;		/* replace by next process */
524 #if DEBUG_ENABLE_IPC_WARNINGS
525 	      printf("endpoint %d / %s removed from queue at %d\n",
526 	          rc->p_endpoint, rc->p_name, rc->p_sendto_e);
527 #endif
528               break;				/* can only be queued once */
529           }
530           xpp = &(*xpp)->p_q_link;		/* proceed to next queued */
531       }
532       RTS_UNSET(rc, RTS_SENDING);
533   }
534   RTS_UNSET(rc, RTS_RECEIVING);
535 }
536 
537 /*===========================================================================*
538  *			         clear_endpoint				     *
539  *===========================================================================*/
540 void clear_endpoint(struct proc * rc)
541 {
542 /* Clean up the slot of the process given as 'rc'. */
543   if(isemptyp(rc)) panic("clear_proc: empty process: %d",  rc->p_endpoint);
544 
545 
546 #if DEBUG_IPC_HOOK
547   hook_ipc_clear(rc);
548 #endif
549 
550   /* Make sure that the exiting process is no longer scheduled. */
551   RTS_SET(rc, RTS_NO_ENDPOINT);
552   if (priv(rc)->s_flags & SYS_PROC)
553   {
554 	priv(rc)->s_asynsize= 0;
555   }
556 
557   /* If the process happens to be queued trying to send a
558    * message, then it must be removed from the message queues.
559    */
560   clear_ipc(rc);
561 
562   /* Likewise, if another process was sending or receive a message to or from
563    * the exiting process, it must be alerted that process no longer is alive.
564    * Check all processes.
565    */
566   clear_ipc_refs(rc, EDEADSRCDST);
567 
568   /* Finally, if the process was blocked on a VM request, remove it from the
569    * queue of processes waiting to be processed by VM.
570    */
571   clear_memreq(rc);
572 }
573 
574 /*===========================================================================*
575  *			       clear_ipc_refs				     *
576  *===========================================================================*/
577 void clear_ipc_refs(
578   register struct proc *rc,		/* slot of process to clean up */
579   int caller_ret			/* code to return on callers */
580 )
581 {
582 /* Clear IPC references for a given process slot. */
583   struct proc *rp;			/* iterate over process table */
584   int src_id;
585 
586   /* Tell processes that sent asynchronous messages to 'rc' they are not
587    * going to be delivered */
588   while ((src_id = has_pending_asend(rc, ANY)) != NULL_PRIV_ID)
589       cancel_async(proc_addr(id_to_nr(src_id)), rc);
590 
591   for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; rp++) {
592       if(isemptyp(rp))
593 	continue;
594 
595       /* Unset pending notification bits. */
596       unset_sys_bit(priv(rp)->s_notify_pending, priv(rc)->s_id);
597 
598       /* Unset pending asynchronous messages */
599       unset_sys_bit(priv(rp)->s_asyn_pending, priv(rc)->s_id);
600 
601       /* Check if process depends on given process. */
602       if (P_BLOCKEDON(rp) == rc->p_endpoint) {
603           rp->p_reg.retreg = caller_ret;	/* return requested code */
604 	  clear_ipc(rp);
605       }
606   }
607 }
608 
609 /*===========================================================================*
610  *                              kernel_call_resume                           *
611  *===========================================================================*/
612 void kernel_call_resume(struct proc *caller)
613 {
614 	int result;
615 
616 	assert(!RTS_ISSET(caller, RTS_SLOT_FREE));
617 	assert(!RTS_ISSET(caller, RTS_VMREQUEST));
618 
619 	assert(caller->p_vmrequest.saved.reqmsg.m_source == caller->p_endpoint);
620 
621 	/*
622 	printf("KERNEL_CALL restart from %s / %d rts 0x%08x misc 0x%08x\n",
623 			caller->p_name, caller->p_endpoint,
624 			caller->p_rts_flags, caller->p_misc_flags);
625 	 */
626 
627 	/* re-execute the kernel call, with MF_KCALL_RESUME still set so
628 	 * the call knows this is a retry.
629 	 */
630 	result = kernel_call_dispatch(caller, &caller->p_vmrequest.saved.reqmsg);
631 	/*
632 	 * we are resuming the kernel call so we have to remove this flag so it
633 	 * can be set again
634 	 */
635 	caller->p_misc_flags &= ~MF_KCALL_RESUME;
636 	kernel_call_finish(caller, &caller->p_vmrequest.saved.reqmsg, result);
637 }
638 
639 /*===========================================================================*
640  *                               sched_proc                                  *
641  *===========================================================================*/
642 int sched_proc(struct proc *p, int priority, int quantum, int cpu, int niced)
643 {
644 	/* Make sure the values given are within the allowed range.*/
645 	if ((priority < TASK_Q && priority != -1) || priority > NR_SCHED_QUEUES)
646 		return(EINVAL);
647 
648 	if (quantum < 1 && quantum != -1)
649 		return(EINVAL);
650 
651 #ifdef CONFIG_SMP
652 	if ((cpu < 0 && cpu != -1) || (cpu > 0 && (unsigned) cpu >= ncpus))
653 		return(EINVAL);
654 	if (cpu != -1 && !(cpu_is_ready(cpu)))
655 		return EBADCPU;
656 #endif
657 
658 	/* In some cases, we might be rescheduling a runnable process. In such
659 	 * a case (i.e. if we are updating the priority) we set the NO_QUANTUM
660 	 * flag before the generic unset to dequeue/enqueue the process
661 	 */
662 
663 	/* FIXME this preempts the process, do we really want to do that ?*/
664 
665 	/* FIXME this is a problem for SMP if the processes currently runs on a
666 	 * different CPU */
667 	if (proc_is_runnable(p)) {
668 #ifdef CONFIG_SMP
669 		if (p->p_cpu != cpuid && cpu != -1 && cpu != p->p_cpu) {
670 			smp_schedule_migrate_proc(p, cpu);
671 		}
672 #endif
673 
674 		RTS_SET(p, RTS_NO_QUANTUM);
675 	}
676 
677 	if (proc_is_runnable(p))
678 		RTS_SET(p, RTS_NO_QUANTUM);
679 
680 	if (priority != -1)
681 		p->p_priority = priority;
682 	if (quantum != -1) {
683 		p->p_quantum_size_ms = quantum;
684 		p->p_cpu_time_left = ms_2_cpu_time(quantum);
685 	}
686 #ifdef CONFIG_SMP
687 	if (cpu != -1)
688 		p->p_cpu = cpu;
689 #endif
690 
691 	if (niced)
692 		p->p_misc_flags |= MF_NICED;
693 	else
694 		p->p_misc_flags &= ~MF_NICED;
695 
696 	/* Clear the scheduling bit and enqueue the process */
697 	RTS_UNSET(p, RTS_NO_QUANTUM);
698 
699 	return OK;
700 }
701 
702 /*===========================================================================*
703  *				add_ipc_filter				     *
704  *===========================================================================*/
705 int add_ipc_filter(struct proc *rp, int type, vir_bytes address,
706 	size_t length)
707 {
708 	int num_elements, r;
709 	ipc_filter_t *ipcf, **ipcfp;
710 
711 	/* Validate arguments. */
712 	if (type != IPCF_BLACKLIST && type != IPCF_WHITELIST)
713 		return EINVAL;
714 
715 	if (length % sizeof(ipc_filter_el_t) != 0)
716 		return EINVAL;
717 
718 	num_elements = length / sizeof(ipc_filter_el_t);
719 	if (num_elements <= 0 || num_elements > IPCF_MAX_ELEMENTS)
720 		return E2BIG;
721 
722 	/* Allocate a new IPC filter slot. */
723 	IPCF_POOL_ALLOCATE_SLOT(type, &ipcf);
724 	if (ipcf == NULL)
725 		return ENOMEM;
726 
727 	/* Fill details. */
728 	ipcf->num_elements = num_elements;
729 	ipcf->next = NULL;
730 	r = data_copy(rp->p_endpoint, address,
731 		KERNEL, (vir_bytes)ipcf->elements, length);
732 	if (r == OK)
733 		r = check_ipc_filter(ipcf, TRUE /*fill_flags*/);
734 	if (r != OK) {
735 		IPCF_POOL_FREE_SLOT(ipcf);
736 		return r;
737 	}
738 
739 	/* Add the new filter at the end of the IPC filter chain. */
740 	for (ipcfp = &priv(rp)->s_ipcf; *ipcfp != NULL;
741 	    ipcfp = &(*ipcfp)->next)
742 		;
743 	*ipcfp = ipcf;
744 
745 	return OK;
746 }
747 
748 /*===========================================================================*
749  *				clear_ipc_filters			     *
750  *===========================================================================*/
751 void clear_ipc_filters(struct proc *rp)
752 {
753 	ipc_filter_t *curr_ipcf, *ipcf;
754 
755 	ipcf = priv(rp)->s_ipcf;
756 	while (ipcf != NULL) {
757 		curr_ipcf = ipcf;
758 		ipcf = ipcf->next;
759 		IPCF_POOL_FREE_SLOT(curr_ipcf);
760 	}
761 
762 	priv(rp)->s_ipcf = NULL;
763 
764 	/* VM is a special case here: since the cleared IPC filter may have
765 	 * blocked memory handling requests, we may now have to tell VM that
766 	 * there are "new" requests pending.
767 	 */
768 	if (rp->p_endpoint == VM_PROC_NR && vmrequest != NULL)
769 		if (send_sig(VM_PROC_NR, SIGKMEM) != OK)
770 			panic("send_sig failed");
771 }
772 
773 /*===========================================================================*
774  *				check_ipc_filter			     *
775  *===========================================================================*/
776 int check_ipc_filter(ipc_filter_t *ipcf, int fill_flags)
777 {
778 	ipc_filter_el_t *ipcf_el;
779 	int i, num_elements, flags;
780 
781 	if (ipcf == NULL)
782 		return OK;
783 
784 	num_elements = ipcf->num_elements;
785 	flags = 0;
786 	for (i = 0; i < num_elements; i++) {
787 		ipcf_el = &ipcf->elements[i];
788 		if (!IPCF_EL_CHECK(ipcf_el))
789 			return EINVAL;
790 		flags |= ipcf_el->flags;
791 	}
792 
793 	if (fill_flags)
794 		ipcf->flags = flags;
795 	else if (ipcf->flags != flags)
796 		return EINVAL;
797 	return OK;
798 }
799 
800 /*===========================================================================*
801  *				allow_ipc_filtered_msg			     *
802  *===========================================================================*/
803 int allow_ipc_filtered_msg(struct proc *rp, endpoint_t src_e,
804 	vir_bytes m_src_v, message *m_src_p)
805 {
806 	int i, r, num_elements, get_mtype, allow;
807 	ipc_filter_t *ipcf;
808 	ipc_filter_el_t *ipcf_el;
809 	message m_buff;
810 
811 	ipcf = priv(rp)->s_ipcf;
812 	if (ipcf == NULL)
813 		return TRUE; /* no IPC filters, always allow */
814 
815 	if (m_src_p == NULL) {
816 		assert(m_src_v != 0);
817 
818 		/* Should we copy in the message type? */
819 		get_mtype = FALSE;
820 		do {
821 #if DEBUG_DUMPIPCF
822 			if (TRUE) {
823 #else
824 			if (ipcf->flags & IPCF_MATCH_M_TYPE) {
825 #endif
826 				get_mtype = TRUE;
827 				break;
828 			}
829 			ipcf = ipcf->next;
830 		} while (ipcf);
831 		ipcf = priv(rp)->s_ipcf; /* reset to start */
832 
833 		/* If so, copy it in from the process. */
834 		if (get_mtype) {
835 			r = data_copy(src_e,
836 			    m_src_v + offsetof(message, m_type), KERNEL,
837 			    (vir_bytes)&m_buff.m_type, sizeof(m_buff.m_type));
838 			if (r != OK) {
839 				/* allow for now, this will fail later anyway */
840 #if DEBUG_DUMPIPCF
841 				printf("KERNEL: allow_ipc_filtered_msg: data "
842 				    "copy error %d, allowing message...\n", r);
843 #endif
844 				return TRUE;
845 			}
846 		}
847 		m_src_p = &m_buff;
848 	}
849 
850 	m_src_p->m_source = src_e;
851 
852 	/* See if the message is allowed. */
853 	allow = (ipcf->type == IPCF_BLACKLIST);
854 	do {
855 		if (allow != (ipcf->type == IPCF_WHITELIST)) {
856 			num_elements = ipcf->num_elements;
857 			for (i = 0; i < num_elements; i++) {
858 				ipcf_el = &ipcf->elements[i];
859 				if (IPCF_EL_MATCH(ipcf_el, m_src_p)) {
860 					allow = (ipcf->type == IPCF_WHITELIST);
861 					break;
862 				}
863 			}
864 		}
865 		ipcf = ipcf->next;
866 	} while (ipcf);
867 
868 #if DEBUG_DUMPIPCF
869 	printmsg(m_src_p, proc_addr(_ENDPOINT_P(src_e)), rp, allow ? '+' : '-',
870 	    TRUE /*printparams*/);
871 #endif
872 
873 	return allow;
874 }
875 
876 /*===========================================================================*
877  *			  allow_ipc_filtered_memreq			     *
878  *===========================================================================*/
879 int allow_ipc_filtered_memreq(struct proc *src_rp, struct proc *dst_rp)
880 {
881 	/* Determine whether VM should receive a request to handle memory
882 	 * that is the result of process 'src_rp' trying to access currently
883 	 * unavailable memory in process 'dst_rp'. Return TRUE if VM should
884 	 * be given the request, FALSE otherwise.
885 	 */
886 
887 	struct proc *vmp;
888 	message m_buf;
889 
890 	vmp = proc_addr(VM_PROC_NR);
891 
892 	/* If VM has no filter in place, all requests should go through. */
893 	if (priv(vmp)->s_ipcf == NULL)
894 		return TRUE;
895 
896 	/* VM obtains memory requests in response to a SIGKMEM signal, which
897 	 * is a notification sent from SYSTEM. Thus, if VM blocks such
898 	 * notifications, it also should not get any memory requests. Of
899 	 * course, VM should not be asking for requests in that case either,
900 	 * but the extra check doesn't hurt.
901 	 */
902 	m_buf.m_type = NOTIFY_MESSAGE;
903 	if (!allow_ipc_filtered_msg(vmp, SYSTEM, 0, &m_buf))
904 		return FALSE;
905 
906 	/* A more refined policy may be implemented here, for example to
907 	 * ensure that both the source and the destination (if different)
908 	 * are in the group of processes that VM wants to talk to. Since VM
909 	 * is basically not able to handle any memory requests during an
910 	 * update, we will not get here, and none of that is needed.
911 	 */
912 	return TRUE;
913 }
914 
915 /*===========================================================================*
916  *                             priv_add_irq                                  *
917  *===========================================================================*/
918 int priv_add_irq(struct proc *rp, int irq)
919 {
920         struct priv *priv = priv(rp);
921         int i;
922 
923 	priv->s_flags |= CHECK_IRQ;	/* Check IRQ */
924 
925 	/* When restarting a driver, check if it already has the permission */
926 	for (i = 0; i < priv->s_nr_irq; i++) {
927 		if (priv->s_irq_tab[i] == irq)
928 			return OK;
929 	}
930 
931 	i= priv->s_nr_irq;
932 	if (i >= NR_IRQ) {
933 		printf("do_privctl: %d already has %d irq's.\n",
934 			rp->p_endpoint, i);
935 		return ENOMEM;
936 	}
937 	priv->s_irq_tab[i]= irq;
938 	priv->s_nr_irq++;
939 	return OK;
940 }
941 
942 /*===========================================================================*
943  *                             priv_add_io                                   *
944  *===========================================================================*/
945 int priv_add_io(struct proc *rp, struct io_range *ior)
946 {
947         struct priv *priv = priv(rp);
948         int i;
949 
950 	priv->s_flags |= CHECK_IO_PORT;	/* Check I/O accesses */
951 
952 	for (i = 0; i < priv->s_nr_io_range; i++) {
953 		if (priv->s_io_tab[i].ior_base == ior->ior_base &&
954 			priv->s_io_tab[i].ior_limit == ior->ior_limit)
955 			return OK;
956 	}
957 
958 	i= priv->s_nr_io_range;
959 	if (i >= NR_IO_RANGE) {
960 		printf("do_privctl: %d already has %d i/o ranges.\n",
961 			rp->p_endpoint, i);
962 		return ENOMEM;
963 	}
964 
965 	priv->s_io_tab[i] = *ior;
966 	priv->s_nr_io_range++;
967 	return OK;
968 }
969 
970 /*===========================================================================*
971  *                             priv_add_mem                                  *
972  *===========================================================================*/
973 int priv_add_mem(struct proc *rp, struct minix_mem_range *memr)
974 {
975         struct priv *priv = priv(rp);
976         int i;
977 
978 	priv->s_flags |= CHECK_MEM;	/* Check memory mappings */
979 
980 	/* When restarting a driver, check if it already has the permission */
981 	for (i = 0; i < priv->s_nr_mem_range; i++) {
982 		if (priv->s_mem_tab[i].mr_base == memr->mr_base &&
983 			priv->s_mem_tab[i].mr_limit == memr->mr_limit)
984 			return OK;
985 	}
986 
987 	i= priv->s_nr_mem_range;
988 	if (i >= NR_MEM_RANGE) {
989 		printf("do_privctl: %d already has %d mem ranges.\n",
990 			rp->p_endpoint, i);
991 		return ENOMEM;
992 	}
993 	priv->s_mem_tab[i]= *memr;
994 	priv->s_nr_mem_range++;
995 	return OK;
996 }
997 
998