xref: /minix/minix/kernel/system.c (revision d2532d3d)
1 /* This task handles the interface between the kernel and user-level servers.
2  * System services can be accessed by doing a system call. System calls are
3  * transformed into request messages, which are handled by this task. By
4  * convention, a sys_call() is transformed in a SYS_CALL request message that
5  * is handled in a function named do_call().
6  *
7  * A private call vector is used to map all system calls to the functions that
8  * handle them. The actual handler functions are contained in separate files
9  * to keep this file clean. The call vector is used in the system task's main
10  * loop to handle all incoming requests.
11  *
12  * In addition to the main sys_task() entry point, which starts the main loop,
13  * there are several other minor entry points:
14  *   get_priv:		assign privilege structure to user or system process
15  *   set_sendto_bit:	allow a process to send messages to a new target
16  *   unset_sendto_bit:	disallow a process from sending messages to a target
17  *   fill_sendto_mask:	fill the target mask of a given process
18  *   send_sig:		send a signal directly to a system process
19  *   cause_sig:		take action to cause a signal to occur via a signal mgr
20  *   sig_delay_done:	tell PM that a process is not sending
21  *   send_diag_sig:	send a diagnostics signal to interested processes
22  *   get_randomness:	accumulate randomness in a buffer
23  *   clear_endpoint:	remove a process' ability to send and receive messages
24  *   sched_proc:	schedule a process
25  *
26  * Changes:
27 *    Nov 22, 2009   get_priv supports static priv ids (Cristiano Giuffrida)
28  *   Aug 04, 2005   check if system call is allowed  (Jorrit N. Herder)
29  *   Jul 20, 2005   send signal to services with message  (Jorrit N. Herder)
30  *   Jan 15, 2005   new, generalized virtual copy function  (Jorrit N. Herder)
31  *   Oct 10, 2004   dispatch system calls from call vector  (Jorrit N. Herder)
32  *   Sep 30, 2004   source code documentation updated  (Jorrit N. Herder)
33  */
34 
35 #include "kernel/kernel.h"
36 #include "kernel/system.h"
37 #include "kernel/vm.h"
38 #include "kernel/clock.h"
39 #include <stdlib.h>
40 #include <stddef.h>
41 #include <assert.h>
42 #include <signal.h>
43 #include <unistd.h>
44 #include <minix/endpoint.h>
45 #include <minix/safecopies.h>
46 
47 /* Declaration of the call vector that defines the mapping of system calls
48  * to handler functions. The vector is initialized in sys_init() with map(),
49  * which makes sure the system call numbers are ok. No space is allocated,
50  * because the dummy is declared extern. If an illegal call is given, the
51  * array size will be negative and this won't compile.
52  */
53 static int (*call_vec[NR_SYS_CALLS])(struct proc * caller, message *m_ptr);
54 
55 #define map(call_nr, handler) 					\
56     {	int call_index = call_nr-KERNEL_CALL; 				\
57     	assert(call_index >= 0 && call_index < NR_SYS_CALLS);			\
58     call_vec[call_index] = (handler)  ; }
59 
60 static void kernel_call_finish(struct proc * caller, message *msg, int result)
61 {
62   if(result == VMSUSPEND) {
63 	  /* Special case: message has to be saved for handling
64 	   * until VM tells us it's allowed. VM has been notified
65 	   * and we must wait for its reply to restart the call.
66 	   */
67 	  assert(RTS_ISSET(caller, RTS_VMREQUEST));
68 	  assert(caller->p_vmrequest.type == VMSTYPE_KERNELCALL);
69 	  caller->p_vmrequest.saved.reqmsg = *msg;
70 	  caller->p_misc_flags |= MF_KCALL_RESUME;
71   } else {
72 	  /*
73 	   * call is finished, we could have been suspended because of VM,
74 	   * remove the request message
75 	   */
76 	  caller->p_vmrequest.saved.reqmsg.m_source = NONE;
77 	  if (result != EDONTREPLY) {
78 		  /* copy the result as a message to the original user buffer */
79 		  msg->m_source = SYSTEM;
80 		  msg->m_type = result;		/* report status of call */
81 #if DEBUG_IPC_HOOK
82 	hook_ipc_msgkresult(msg, caller);
83 #endif
84 		  if (copy_msg_to_user(msg, (message *)caller->p_delivermsg_vir)) {
85 			  printf("WARNING wrong user pointer 0x%08x from "
86 					  "process %s / %d\n",
87 					  caller->p_delivermsg_vir,
88 					  caller->p_name,
89 					  caller->p_endpoint);
90 			  cause_sig(proc_nr(caller), SIGSEGV);
91 		  }
92 	  }
93   }
94 }
95 
96 static int kernel_call_dispatch(struct proc * caller, message *msg)
97 {
98   int result = OK;
99   int call_nr;
100 
101 #if DEBUG_IPC_HOOK
102 	hook_ipc_msgkcall(msg, caller);
103 #endif
104   call_nr = msg->m_type - KERNEL_CALL;
105 
106   /* See if the caller made a valid request and try to handle it. */
107   if (call_nr < 0 || call_nr >= NR_SYS_CALLS) {	/* check call number */
108 	  printf("SYSTEM: illegal request %d from %d.\n",
109 			  call_nr,msg->m_source);
110 	  result = EBADREQUEST;			/* illegal message type */
111   }
112   else if (!GET_BIT(priv(caller)->s_k_call_mask, call_nr)) {
113 	  printf("SYSTEM: denied request %d from %d.\n",
114 			  call_nr,msg->m_source);
115 	  result = ECALLDENIED;			/* illegal message type */
116   } else {
117 	  /* handle the system call */
118 	  if (call_vec[call_nr])
119 		  result = (*call_vec[call_nr])(caller, msg);
120 	  else {
121 		  printf("Unused kernel call %d from %d\n",
122 				  call_nr, caller->p_endpoint);
123 		  result = EBADREQUEST;
124 	  }
125   }
126 
127   return result;
128 }
129 
130 /*===========================================================================*
131  *				kernel_call				     *
132  *===========================================================================*/
133 /*
134  * this function checks the basic syscall parameters and if accepted it
135  * dispatches its handling to the right handler
136  */
137 void kernel_call(message *m_user, struct proc * caller)
138 {
139   int result = OK;
140   message msg;
141 
142   caller->p_delivermsg_vir = (vir_bytes) m_user;
143   /*
144    * the ldt and cr3 of the caller process is loaded because it just've trapped
145    * into the kernel or was already set in switch_to_user() before we resume
146    * execution of an interrupted kernel call
147    */
148   if (copy_msg_from_user(m_user, &msg) == 0) {
149 	  msg.m_source = caller->p_endpoint;
150 	  result = kernel_call_dispatch(caller, &msg);
151   }
152   else {
153 	  printf("WARNING wrong user pointer 0x%08x from process %s / %d\n",
154 			  m_user, caller->p_name, caller->p_endpoint);
155 	  cause_sig(proc_nr(caller), SIGSEGV);
156 	  return;
157   }
158 
159 
160   /* remember who invoked the kcall so we can bill it its time */
161   kbill_kcall = caller;
162 
163   kernel_call_finish(caller, &msg, result);
164 }
165 
166 /*===========================================================================*
167  *				initialize				     *
168  *===========================================================================*/
169 void system_init(void)
170 {
171   register struct priv *sp;
172   int i;
173 
174   /* Initialize IRQ handler hooks. Mark all hooks available. */
175   for (i=0; i<NR_IRQ_HOOKS; i++) {
176       irq_hooks[i].proc_nr_e = NONE;
177   }
178 
179   /* Initialize all alarm timers for all processes. */
180   for (sp=BEG_PRIV_ADDR; sp < END_PRIV_ADDR; sp++) {
181     tmr_inittimer(&(sp->s_alarm_timer));
182   }
183 
184   /* Initialize the call vector to a safe default handler. Some system calls
185    * may be disabled or nonexistant. Then explicitly map known calls to their
186    * handler functions. This is done with a macro that gives a compile error
187    * if an illegal call number is used. The ordering is not important here.
188    */
189   for (i=0; i<NR_SYS_CALLS; i++) {
190       call_vec[i] = NULL;
191   }
192 
193   /* Process management. */
194   map(SYS_FORK, do_fork); 		/* a process forked a new process */
195   map(SYS_EXEC, do_exec);		/* update process after execute */
196   map(SYS_CLEAR, do_clear);		/* clean up after process exit */
197   map(SYS_EXIT, do_exit);		/* a system process wants to exit */
198   map(SYS_PRIVCTL, do_privctl);		/* system privileges control */
199   map(SYS_TRACE, do_trace);		/* request a trace operation */
200   map(SYS_SETGRANT, do_setgrant);	/* get/set own parameters */
201   map(SYS_RUNCTL, do_runctl);		/* set/clear stop flag of a process */
202   map(SYS_UPDATE, do_update);		/* update a process into another */
203   map(SYS_STATECTL, do_statectl);	/* let a process control its state */
204 
205   /* Signal handling. */
206   map(SYS_KILL, do_kill); 		/* cause a process to be signaled */
207   map(SYS_GETKSIG, do_getksig);		/* signal manager checks for signals */
208   map(SYS_ENDKSIG, do_endksig);		/* signal manager finished signal */
209   map(SYS_SIGSEND, do_sigsend);		/* start POSIX-style signal */
210   map(SYS_SIGRETURN, do_sigreturn);	/* return from POSIX-style signal */
211 
212   /* Device I/O. */
213   map(SYS_IRQCTL, do_irqctl);  		/* interrupt control operations */
214 #if defined(__i386__)
215   map(SYS_DEVIO, do_devio);   		/* inb, inw, inl, outb, outw, outl */
216   map(SYS_VDEVIO, do_vdevio);  		/* vector with devio requests */
217 #endif
218 
219   /* Memory management. */
220   map(SYS_MEMSET, do_memset);		/* write char to memory area */
221   map(SYS_VMCTL, do_vmctl);		/* various VM process settings */
222 
223   /* Copying. */
224   map(SYS_UMAP, do_umap);		/* map virtual to physical address */
225   map(SYS_UMAP_REMOTE, do_umap_remote);	/* do_umap for non-caller process */
226   map(SYS_VUMAP, do_vumap);		/* vectored virtual to physical map */
227   map(SYS_VIRCOPY, do_vircopy); 	/* use pure virtual addressing */
228   map(SYS_PHYSCOPY, do_copy);	 	/* use physical addressing */
229   map(SYS_SAFECOPYFROM, do_safecopy_from);/* copy with pre-granted permission */
230   map(SYS_SAFECOPYTO, do_safecopy_to);	/* copy with pre-granted permission */
231   map(SYS_VSAFECOPY, do_vsafecopy);	/* vectored safecopy */
232 
233   /* safe memset */
234   map(SYS_SAFEMEMSET, do_safememset);	/* safememset */
235 
236   /* Clock functionality. */
237   map(SYS_TIMES, do_times);		/* get uptime and process times */
238   map(SYS_SETALARM, do_setalarm);	/* schedule a synchronous alarm */
239   map(SYS_STIME, do_stime);		/* set the boottime */
240   map(SYS_SETTIME, do_settime);		/* set the system time (realtime) */
241   map(SYS_VTIMER, do_vtimer);		/* set or retrieve a virtual timer */
242 
243   /* System control. */
244   map(SYS_ABORT, do_abort);		/* abort MINIX */
245   map(SYS_GETINFO, do_getinfo); 	/* request system information */
246   map(SYS_DIAGCTL, do_diagctl);		/* diagnostics-related functionality */
247 
248   /* Profiling. */
249   map(SYS_SPROF, do_sprofile);         /* start/stop statistical profiling */
250 
251   /* arm-specific. */
252 #if defined(__arm__)
253   map(SYS_PADCONF, do_padconf);		/* configure pinmux */
254 #endif
255 
256   /* i386-specific. */
257 #if defined(__i386__)
258   map(SYS_READBIOS, do_readbios);	/* read from BIOS locations */
259   map(SYS_IOPENABLE, do_iopenable); 	/* Enable I/O */
260   map(SYS_SDEVIO, do_sdevio);		/* phys_insb, _insw, _outsb, _outsw */
261 #endif
262 
263   /* Machine state switching. */
264   map(SYS_SETMCONTEXT, do_setmcontext); /* set machine context */
265   map(SYS_GETMCONTEXT, do_getmcontext); /* get machine context */
266 
267   /* Scheduling */
268   map(SYS_SCHEDULE, do_schedule);	/* reschedule a process */
269   map(SYS_SCHEDCTL, do_schedctl);	/* change process scheduler */
270 
271 }
272 /*===========================================================================*
273  *				get_priv				     *
274  *===========================================================================*/
275 int get_priv(rc, priv_id)
276 register struct proc *rc;		/* new (child) process pointer */
277 int priv_id;				/* privilege id */
278 {
279 /* Allocate a new privilege structure for a system process. Privilege ids
280  * can be assigned either statically or dynamically.
281  */
282   register struct priv *sp;                 /* privilege structure */
283 
284   if(priv_id == NULL_PRIV_ID) {             /* allocate slot dynamically */
285       for (sp = BEG_DYN_PRIV_ADDR; sp < END_DYN_PRIV_ADDR; ++sp)
286           if (sp->s_proc_nr == NONE) break;
287       if (sp >= END_DYN_PRIV_ADDR) return(ENOSPC);
288   }
289   else {                                    /* allocate slot from id */
290       if(!is_static_priv_id(priv_id)) {
291           return EINVAL;                    /* invalid static priv id */
292       }
293       if(priv[priv_id].s_proc_nr != NONE) {
294           return EBUSY;                     /* slot already in use */
295       }
296       sp = &priv[priv_id];
297   }
298   rc->p_priv = sp;			    /* assign new slot */
299   rc->p_priv->s_proc_nr = proc_nr(rc);	    /* set association */
300 
301   return(OK);
302 }
303 
304 /*===========================================================================*
305  *				set_sendto_bit				     *
306  *===========================================================================*/
307 void set_sendto_bit(const struct proc *rp, int id)
308 {
309 /* Allow a process to send messages to the process(es) associated with the
310  * system privilege structure with the given ID.
311  */
312 
313   /* Disallow the process from sending to a process privilege structure with no
314    * associated process, and disallow the process from sending to itself.
315    */
316   if (id_to_nr(id) == NONE || priv_id(rp) == id) {
317 	unset_sys_bit(priv(rp)->s_ipc_to, id);
318 	return;
319   }
320 
321   set_sys_bit(priv(rp)->s_ipc_to, id);
322 
323   /* The process that this process can now send to, must be able to reply (or
324    * vice versa). Therefore, its send mask should be updated as well. Ignore
325    * receivers that don't support traps other than RECEIVE, they can't reply
326    * or send messages anyway.
327    */
328   if (priv_addr(id)->s_trap_mask & ~((1 << RECEIVE)))
329       set_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
330 }
331 
332 /*===========================================================================*
333  *				unset_sendto_bit			     *
334  *===========================================================================*/
335 void unset_sendto_bit(const struct proc *rp, int id)
336 {
337 /* Prevent a process from sending to another process. Retain the send mask
338  * symmetry by also unsetting the bit for the other direction.
339  */
340 
341   unset_sys_bit(priv(rp)->s_ipc_to, id);
342 
343   unset_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
344 }
345 
346 /*===========================================================================*
347  *			      fill_sendto_mask				     *
348  *===========================================================================*/
349 void fill_sendto_mask(const struct proc *rp, sys_map_t *map)
350 {
351   int i;
352 
353   for (i=0; i < NR_SYS_PROCS; i++) {
354   	if (get_sys_bit(*map, i))
355   		set_sendto_bit(rp, i);
356   	else
357   		unset_sendto_bit(rp, i);
358   }
359 }
360 
361 /*===========================================================================*
362  *				send_sig				     *
363  *===========================================================================*/
364 int send_sig(endpoint_t ep, int sig_nr)
365 {
366 /* Notify a system process about a signal. This is straightforward. Simply
367  * set the signal that is to be delivered in the pending signals map and
368  * send a notification with source SYSTEM.
369  */
370   register struct proc *rp;
371   struct priv *priv;
372   int proc_nr;
373 
374   if(!isokendpt(ep, &proc_nr) || isemptyn(proc_nr))
375 	return EINVAL;
376 
377   rp = proc_addr(proc_nr);
378   priv = priv(rp);
379   if(!priv) return ENOENT;
380   sigaddset(&priv->s_sig_pending, sig_nr);
381   mini_notify(proc_addr(SYSTEM), rp->p_endpoint);
382 
383   return OK;
384 }
385 
386 /*===========================================================================*
387  *				cause_sig				     *
388  *===========================================================================*/
389 void cause_sig(proc_nr, sig_nr)
390 proc_nr_t proc_nr;		/* process to be signalled */
391 int sig_nr;			/* signal to be sent */
392 {
393 /* A system process wants to send a signal to a process.  Examples are:
394  *  - HARDWARE wanting to cause a SIGSEGV after a CPU exception
395  *  - TTY wanting to cause SIGINT upon getting a DEL
396  *  - FS wanting to cause SIGPIPE for a broken pipe
397  * Signals are handled by sending a message to the signal manager assigned to
398  * the process. This function handles the signals and makes sure the signal
399  * manager gets them by sending a notification. The process being signaled
400  * is blocked while the signal manager has not finished all signals for it.
401  * Race conditions between calls to this function and the system calls that
402  * process pending kernel signals cannot exist. Signal related functions are
403  * only called when a user process causes a CPU exception and from the kernel
404  * process level, which runs to completion.
405  */
406   register struct proc *rp, *sig_mgr_rp;
407   endpoint_t sig_mgr;
408   int sig_mgr_proc_nr;
409   int s;
410 
411   /* Lookup signal manager. */
412   rp = proc_addr(proc_nr);
413   sig_mgr = priv(rp)->s_sig_mgr;
414   if(sig_mgr == SELF) sig_mgr = rp->p_endpoint;
415 
416   /* If the target is the signal manager of itself, send the signal directly. */
417   if(rp->p_endpoint == sig_mgr) {
418        if(SIGS_IS_LETHAL(sig_nr)) {
419            /* If the signal is lethal, see if a backup signal manager exists. */
420            sig_mgr = priv(rp)->s_bak_sig_mgr;
421            if(sig_mgr != NONE && isokendpt(sig_mgr, &sig_mgr_proc_nr)) {
422                priv(rp)->s_sig_mgr = sig_mgr;
423                priv(rp)->s_bak_sig_mgr = NONE;
424                sig_mgr_rp = proc_addr(sig_mgr_proc_nr);
425                RTS_UNSET(sig_mgr_rp, RTS_NO_PRIV);
426                cause_sig(proc_nr, sig_nr); /* try again with the new sig mgr. */
427                return;
428            }
429            /* We are out of luck. Time to panic. */
430            proc_stacktrace(rp);
431            panic("cause_sig: sig manager %d gets lethal signal %d for itself",
432 	   	rp->p_endpoint, sig_nr);
433        }
434        sigaddset(&priv(rp)->s_sig_pending, sig_nr);
435        if(OK != send_sig(rp->p_endpoint, SIGKSIGSM))
436        	panic("send_sig failed");
437        return;
438   }
439 
440   s = sigismember(&rp->p_pending, sig_nr);
441   /* Check if the signal is already pending. Process it otherwise. */
442   if (!s) {
443       sigaddset(&rp->p_pending, sig_nr);
444       if (! (RTS_ISSET(rp, RTS_SIGNALED))) {		/* other pending */
445 	  RTS_SET(rp, RTS_SIGNALED | RTS_SIG_PENDING);
446           if(OK != send_sig(sig_mgr, SIGKSIG))
447 	  	panic("send_sig failed");
448       }
449   }
450 }
451 
452 /*===========================================================================*
453  *				sig_delay_done				     *
454  *===========================================================================*/
455 void sig_delay_done(struct proc *rp)
456 {
457 /* A process is now known not to send any direct messages.
458  * Tell PM that the stop delay has ended, by sending a signal to the process.
459  * Used for actual signal delivery.
460  */
461 
462   rp->p_misc_flags &= ~MF_SIG_DELAY;
463 
464   cause_sig(proc_nr(rp), SIGSNDELAY);
465 }
466 
467 /*===========================================================================*
468  *				send_diag_sig				     *
469  *===========================================================================*/
470 void send_diag_sig(void)
471 {
472 /* Send a SIGKMESS signal to all processes in receiving updates about new
473  * diagnostics messages.
474  */
475   struct priv *privp;
476   endpoint_t ep;
477 
478   for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; privp++) {
479 	if (privp->s_proc_nr != NONE && privp->s_diag_sig == TRUE) {
480 		ep = proc_addr(privp->s_proc_nr)->p_endpoint;
481 		send_sig(ep, SIGKMESS);
482 	}
483   }
484 }
485 
486 /*===========================================================================*
487  *			         clear_memreq				     *
488  *===========================================================================*/
489 static void clear_memreq(struct proc *rp)
490 {
491   struct proc **rpp;
492 
493   if (!RTS_ISSET(rp, RTS_VMREQUEST))
494 	return; /* nothing to do */
495 
496   for (rpp = &vmrequest; *rpp != NULL;
497      rpp = &(*rpp)->p_vmrequest.nextrequestor) {
498 	if (*rpp == rp) {
499 		*rpp = rp->p_vmrequest.nextrequestor;
500 		break;
501 	}
502   }
503 
504   RTS_UNSET(rp, RTS_VMREQUEST);
505 }
506 
507 /*===========================================================================*
508  *			         clear_ipc				     *
509  *===========================================================================*/
510 static void clear_ipc(
511   register struct proc *rc	/* slot of process to clean up */
512 )
513 {
514 /* Clear IPC data for a given process slot. */
515   struct proc **xpp;			/* iterate over caller queue */
516 
517   if (RTS_ISSET(rc, RTS_SENDING)) {
518       int target_proc;
519 
520       okendpt(rc->p_sendto_e, &target_proc);
521       xpp = &proc_addr(target_proc)->p_caller_q; /* destination's queue */
522       while (*xpp) {		/* check entire queue */
523           if (*xpp == rc) {			/* process is on the queue */
524               *xpp = (*xpp)->p_q_link;		/* replace by next process */
525 #if DEBUG_ENABLE_IPC_WARNINGS
526 	      printf("endpoint %d / %s removed from queue at %d\n",
527 	          rc->p_endpoint, rc->p_name, rc->p_sendto_e);
528 #endif
529               break;				/* can only be queued once */
530           }
531           xpp = &(*xpp)->p_q_link;		/* proceed to next queued */
532       }
533       RTS_UNSET(rc, RTS_SENDING);
534   }
535   RTS_UNSET(rc, RTS_RECEIVING);
536 }
537 
538 /*===========================================================================*
539  *			         clear_endpoint				     *
540  *===========================================================================*/
541 void clear_endpoint(rc)
542 register struct proc *rc;		/* slot of process to clean up */
543 {
544   if(isemptyp(rc)) panic("clear_proc: empty process: %d",  rc->p_endpoint);
545 
546 
547 #if DEBUG_IPC_HOOK
548   hook_ipc_clear(rc);
549 #endif
550 
551   /* Make sure that the exiting process is no longer scheduled. */
552   RTS_SET(rc, RTS_NO_ENDPOINT);
553   if (priv(rc)->s_flags & SYS_PROC)
554   {
555 	priv(rc)->s_asynsize= 0;
556   }
557 
558   /* If the process happens to be queued trying to send a
559    * message, then it must be removed from the message queues.
560    */
561   clear_ipc(rc);
562 
563   /* Likewise, if another process was sending or receive a message to or from
564    * the exiting process, it must be alerted that process no longer is alive.
565    * Check all processes.
566    */
567   clear_ipc_refs(rc, EDEADSRCDST);
568 
569   /* Finally, if the process was blocked on a VM request, remove it from the
570    * queue of processes waiting to be processed by VM.
571    */
572   clear_memreq(rc);
573 }
574 
575 /*===========================================================================*
576  *			       clear_ipc_refs				     *
577  *===========================================================================*/
578 void clear_ipc_refs(rc, caller_ret)
579 register struct proc *rc;		/* slot of process to clean up */
580 int caller_ret;				/* code to return on callers */
581 {
582 /* Clear IPC references for a given process slot. */
583   struct proc *rp;			/* iterate over process table */
584   int src_id;
585 
586   /* Tell processes that sent asynchronous messages to 'rc' they are not
587    * going to be delivered */
588   while ((src_id = has_pending_asend(rc, ANY)) != NULL_PRIV_ID)
589       cancel_async(proc_addr(id_to_nr(src_id)), rc);
590 
591   for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; rp++) {
592       if(isemptyp(rp))
593 	continue;
594 
595       /* Unset pending notification bits. */
596       unset_sys_bit(priv(rp)->s_notify_pending, priv(rc)->s_id);
597 
598       /* Unset pending asynchronous messages */
599       unset_sys_bit(priv(rp)->s_asyn_pending, priv(rc)->s_id);
600 
601       /* Check if process depends on given process. */
602       if (P_BLOCKEDON(rp) == rc->p_endpoint) {
603           rp->p_reg.retreg = caller_ret;	/* return requested code */
604 	  clear_ipc(rp);
605       }
606   }
607 }
608 
609 /*===========================================================================*
610  *                              kernel_call_resume                           *
611  *===========================================================================*/
612 void kernel_call_resume(struct proc *caller)
613 {
614 	int result;
615 
616 	assert(!RTS_ISSET(caller, RTS_SLOT_FREE));
617 	assert(!RTS_ISSET(caller, RTS_VMREQUEST));
618 
619 	assert(caller->p_vmrequest.saved.reqmsg.m_source == caller->p_endpoint);
620 
621 	/*
622 	printf("KERNEL_CALL restart from %s / %d rts 0x%08x misc 0x%08x\n",
623 			caller->p_name, caller->p_endpoint,
624 			caller->p_rts_flags, caller->p_misc_flags);
625 	 */
626 
627 	/* re-execute the kernel call, with MF_KCALL_RESUME still set so
628 	 * the call knows this is a retry.
629 	 */
630 	result = kernel_call_dispatch(caller, &caller->p_vmrequest.saved.reqmsg);
631 	/*
632 	 * we are resuming the kernel call so we have to remove this flag so it
633 	 * can be set again
634 	 */
635 	caller->p_misc_flags &= ~MF_KCALL_RESUME;
636 	kernel_call_finish(caller, &caller->p_vmrequest.saved.reqmsg, result);
637 }
638 
639 /*===========================================================================*
640  *                               sched_proc                                  *
641  *===========================================================================*/
642 int sched_proc(struct proc *p, int priority, int quantum, int cpu, int niced)
643 {
644 	/* Make sure the values given are within the allowed range.*/
645 	if ((priority < TASK_Q && priority != -1) || priority > NR_SCHED_QUEUES)
646 		return(EINVAL);
647 
648 	if (quantum < 1 && quantum != -1)
649 		return(EINVAL);
650 
651 #ifdef CONFIG_SMP
652 	if ((cpu < 0 && cpu != -1) || (cpu > 0 && (unsigned) cpu >= ncpus))
653 		return(EINVAL);
654 	if (cpu != -1 && !(cpu_is_ready(cpu)))
655 		return EBADCPU;
656 #endif
657 
658 	/* In some cases, we might be rescheduling a runnable process. In such
659 	 * a case (i.e. if we are updating the priority) we set the NO_QUANTUM
660 	 * flag before the generic unset to dequeue/enqueue the process
661 	 */
662 
663 	/* FIXME this preempts the process, do we really want to do that ?*/
664 
665 	/* FIXME this is a problem for SMP if the processes currently runs on a
666 	 * different CPU */
667 	if (proc_is_runnable(p)) {
668 #ifdef CONFIG_SMP
669 		if (p->p_cpu != cpuid && cpu != -1 && cpu != p->p_cpu) {
670 			smp_schedule_migrate_proc(p, cpu);
671 		}
672 #endif
673 
674 		RTS_SET(p, RTS_NO_QUANTUM);
675 	}
676 
677 	if (proc_is_runnable(p))
678 		RTS_SET(p, RTS_NO_QUANTUM);
679 
680 	if (priority != -1)
681 		p->p_priority = priority;
682 	if (quantum != -1) {
683 		p->p_quantum_size_ms = quantum;
684 		p->p_cpu_time_left = ms_2_cpu_time(quantum);
685 	}
686 #ifdef CONFIG_SMP
687 	if (cpu != -1)
688 		p->p_cpu = cpu;
689 #endif
690 
691 	if (niced)
692 		p->p_misc_flags |= MF_NICED;
693 	else
694 		p->p_misc_flags &= ~MF_NICED;
695 
696 	/* Clear the scheduling bit and enqueue the process */
697 	RTS_UNSET(p, RTS_NO_QUANTUM);
698 
699 	return OK;
700 }
701 
702 /*===========================================================================*
703  *				add_ipc_filter				     *
704  *===========================================================================*/
705 int add_ipc_filter(struct proc *rp, int type, vir_bytes address,
706 	size_t length)
707 {
708 	int num_elements, r;
709 	ipc_filter_t *ipcf, **ipcfp;
710 
711 	/* Validate arguments. */
712 	if (type != IPCF_BLACKLIST && type != IPCF_WHITELIST)
713 		return EINVAL;
714 
715 	if (length % sizeof(ipc_filter_el_t) != 0)
716 		return EINVAL;
717 
718 	num_elements = length / sizeof(ipc_filter_el_t);
719 	if (num_elements <= 0 || num_elements > IPCF_MAX_ELEMENTS)
720 		return E2BIG;
721 
722 	/* Allocate a new IPC filter slot. */
723 	IPCF_POOL_ALLOCATE_SLOT(type, &ipcf);
724 	if (ipcf == NULL)
725 		return ENOMEM;
726 
727 	/* Fill details. */
728 	ipcf->num_elements = num_elements;
729 	ipcf->next = NULL;
730 	r = data_copy(rp->p_endpoint, address,
731 		KERNEL, (vir_bytes)ipcf->elements, length);
732 	if (r == OK)
733 		r = check_ipc_filter(ipcf, TRUE /*fill_flags*/);
734 	if (r != OK) {
735 		IPCF_POOL_FREE_SLOT(ipcf);
736 		return r;
737 	}
738 
739 	/* Add the new filter at the end of the IPC filter chain. */
740 	for (ipcfp = &priv(rp)->s_ipcf; *ipcfp != NULL;
741 	    ipcfp = &(*ipcfp)->next)
742 		;
743 	*ipcfp = ipcf;
744 
745 	return OK;
746 }
747 
748 /*===========================================================================*
749  *				clear_ipc_filters			     *
750  *===========================================================================*/
751 void clear_ipc_filters(struct proc *rp)
752 {
753 	ipc_filter_t *curr_ipcf, *ipcf;
754 
755 	ipcf = priv(rp)->s_ipcf;
756 	while (ipcf != NULL) {
757 		curr_ipcf = ipcf;
758 		ipcf = ipcf->next;
759 		IPCF_POOL_FREE_SLOT(curr_ipcf);
760 	}
761 
762 	priv(rp)->s_ipcf = NULL;
763 
764 	/* VM is a special case here: since the cleared IPC filter may have
765 	 * blocked memory handling requests, we may now have to tell VM that
766 	 * there are "new" requests pending.
767 	 */
768 	if (rp->p_endpoint == VM_PROC_NR && vmrequest != NULL)
769 		if (send_sig(VM_PROC_NR, SIGKMEM) != OK)
770 			panic("send_sig failed");
771 }
772 
773 /*===========================================================================*
774  *				check_ipc_filter			     *
775  *===========================================================================*/
776 int check_ipc_filter(ipc_filter_t *ipcf, int fill_flags)
777 {
778 	ipc_filter_el_t *ipcf_el;
779 	int i, num_elements, flags;
780 
781 	if (ipcf == NULL)
782 		return OK;
783 
784 	num_elements = ipcf->num_elements;
785 	flags = 0;
786 	for (i = 0; i < num_elements; i++) {
787 		ipcf_el = &ipcf->elements[i];
788 		if (!IPCF_EL_CHECK(ipcf_el))
789 			return EINVAL;
790 		flags |= ipcf_el->flags;
791 	}
792 
793 	if (fill_flags)
794 		ipcf->flags = flags;
795 	else if (ipcf->flags != flags)
796 		return EINVAL;
797 	return OK;
798 }
799 
800 /*===========================================================================*
801  *				allow_ipc_filtered_msg			     *
802  *===========================================================================*/
803 int allow_ipc_filtered_msg(struct proc *rp, endpoint_t src_e,
804 	vir_bytes m_src_v, message *m_src_p)
805 {
806 	int i, r, num_elements, get_mtype, allow;
807 	ipc_filter_t *ipcf;
808 	ipc_filter_el_t *ipcf_el;
809 	message m_buff;
810 
811 	ipcf = priv(rp)->s_ipcf;
812 	if (ipcf == NULL)
813 		return TRUE; /* no IPC filters, always allow */
814 
815 	if (m_src_p == NULL) {
816 		assert(m_src_v != 0);
817 
818 		/* Should we copy in the message type? */
819 		get_mtype = FALSE;
820 		do {
821 #if DEBUG_DUMPIPCF
822 			if (TRUE) {
823 #else
824 			if (ipcf->flags & IPCF_MATCH_M_TYPE) {
825 #endif
826 				get_mtype = TRUE;
827 				break;
828 			}
829 			ipcf = ipcf->next;
830 		} while (ipcf);
831 		ipcf = priv(rp)->s_ipcf; /* reset to start */
832 
833 		/* If so, copy it in from the process. */
834 		if (get_mtype) {
835 			r = data_copy(src_e,
836 			    m_src_v + offsetof(message, m_type), KERNEL,
837 			    (vir_bytes)&m_buff.m_type, sizeof(m_buff.m_type));
838 			if (r != OK) {
839 				/* allow for now, this will fail later anyway */
840 #if DEBUG_DUMPIPCF
841 				printf("KERNEL: allow_ipc_filtered_msg: data "
842 				    "copy error %d, allowing message...\n", r);
843 #endif
844 				return TRUE;
845 			}
846 		}
847 		m_src_p = &m_buff;
848 	}
849 
850 	m_src_p->m_source = src_e;
851 
852 	/* See if the message is allowed. */
853 	allow = (ipcf->type == IPCF_BLACKLIST);
854 	do {
855 		if (allow != (ipcf->type == IPCF_WHITELIST)) {
856 			num_elements = ipcf->num_elements;
857 			for (i = 0; i < num_elements; i++) {
858 				ipcf_el = &ipcf->elements[i];
859 				if (IPCF_EL_MATCH(ipcf_el, m_src_p)) {
860 					allow = (ipcf->type == IPCF_WHITELIST);
861 					break;
862 				}
863 			}
864 		}
865 		ipcf = ipcf->next;
866 	} while (ipcf);
867 
868 #if DEBUG_DUMPIPCF
869 	printmsg(m_src_p, proc_addr(_ENDPOINT_P(src_e)), rp, allow ? '+' : '-',
870 	    TRUE /*printparams*/);
871 #endif
872 
873 	return allow;
874 }
875 
876 /*===========================================================================*
877  *			  allow_ipc_filtered_memreq			     *
878  *===========================================================================*/
879 int allow_ipc_filtered_memreq(struct proc *src_rp, struct proc *dst_rp)
880 {
881 	/* Determine whether VM should receive a request to handle memory
882 	 * that is the result of process 'src_rp' trying to access currently
883 	 * unavailable memory in process 'dst_rp'. Return TRUE if VM should
884 	 * be given the request, FALSE otherwise.
885 	 */
886 
887 	struct proc *vmp;
888 	message m_buf;
889 	int allow_src, allow_dst;
890 
891 	vmp = proc_addr(VM_PROC_NR);
892 
893 	/* If VM has no filter in place, all requests should go through. */
894 	if (priv(vmp)->s_ipcf == NULL)
895 		return TRUE;
896 
897 	/* VM obtains memory requests in response to a SIGKMEM signal, which
898 	 * is a notification sent from SYSTEM. Thus, if VM blocks such
899 	 * notifications, it also should not get any memory requests. Of
900 	 * course, VM should not be asking for requests in that case either,
901 	 * but the extra check doesn't hurt.
902 	 */
903 	m_buf.m_type = NOTIFY_MESSAGE;
904 	if (!allow_ipc_filtered_msg(vmp, SYSTEM, 0, &m_buf))
905 		return FALSE;
906 
907 	/* A more refined policy may be implemented here, for example to
908 	 * ensure that both the source and the destination (if different)
909 	 * are in the group of processes that VM wants to talk to. Since VM
910 	 * is basically not able to handle any memory requests during an
911 	 * update, we will not get here, and none of that is needed.
912 	 */
913 	return TRUE;
914 }
915 
916 /*===========================================================================*
917  *                             priv_add_irq                                  *
918  *===========================================================================*/
919 int priv_add_irq(struct proc *rp, int irq)
920 {
921         struct priv *priv = priv(rp);
922         int i;
923 
924 	priv->s_flags |= CHECK_IRQ;	/* Check IRQ */
925 
926 	/* When restarting a driver, check if it already has the permission */
927 	for (i = 0; i < priv->s_nr_irq; i++) {
928 		if (priv->s_irq_tab[i] == irq)
929 			return OK;
930 	}
931 
932 	i= priv->s_nr_irq;
933 	if (i >= NR_IRQ) {
934 		printf("do_privctl: %d already has %d irq's.\n",
935 			rp->p_endpoint, i);
936 		return ENOMEM;
937 	}
938 	priv->s_irq_tab[i]= irq;
939 	priv->s_nr_irq++;
940 	return OK;
941 }
942 
943 /*===========================================================================*
944  *                             priv_add_io                                   *
945  *===========================================================================*/
946 int priv_add_io(struct proc *rp, struct io_range *ior)
947 {
948         struct priv *priv = priv(rp);
949         int i;
950 
951 	priv->s_flags |= CHECK_IO_PORT;	/* Check I/O accesses */
952 
953 	for (i = 0; i < priv->s_nr_io_range; i++) {
954 		if (priv->s_io_tab[i].ior_base == ior->ior_base &&
955 			priv->s_io_tab[i].ior_limit == ior->ior_limit)
956 			return OK;
957 	}
958 
959 	i= priv->s_nr_io_range;
960 	if (i >= NR_IO_RANGE) {
961 		printf("do_privctl: %d already has %d i/o ranges.\n",
962 			rp->p_endpoint, i);
963 		return ENOMEM;
964 	}
965 
966 	priv->s_io_tab[i] = *ior;
967 	priv->s_nr_io_range++;
968 	return OK;
969 }
970 
971 /*===========================================================================*
972  *                             priv_add_mem                                  *
973  *===========================================================================*/
974 int priv_add_mem(struct proc *rp, struct minix_mem_range *memr)
975 {
976         struct priv *priv = priv(rp);
977         int i;
978 
979 	priv->s_flags |= CHECK_MEM;	/* Check memory mappings */
980 
981 	/* When restarting a driver, check if it already has the permission */
982 	for (i = 0; i < priv->s_nr_mem_range; i++) {
983 		if (priv->s_mem_tab[i].mr_base == memr->mr_base &&
984 			priv->s_mem_tab[i].mr_limit == memr->mr_limit)
985 			return OK;
986 	}
987 
988 	i= priv->s_nr_mem_range;
989 	if (i >= NR_MEM_RANGE) {
990 		printf("do_privctl: %d already has %d mem ranges.\n",
991 			rp->p_endpoint, i);
992 		return ENOMEM;
993 	}
994 	priv->s_mem_tab[i]= *memr;
995 	priv->s_nr_mem_range++;
996 	return OK;
997 }
998 
999