xref: /minix/minix/kernel/system.c (revision 0a6a1f1d)
1 /* This task handles the interface between the kernel and user-level servers.
2  * System services can be accessed by doing a system call. System calls are
3  * transformed into request messages, which are handled by this task. By
4  * convention, a sys_call() is transformed in a SYS_CALL request message that
5  * is handled in a function named do_call().
6  *
7  * A private call vector is used to map all system calls to the functions that
8  * handle them. The actual handler functions are contained in separate files
9  * to keep this file clean. The call vector is used in the system task's main
10  * loop to handle all incoming requests.
11  *
12  * In addition to the main sys_task() entry point, which starts the main loop,
13  * there are several other minor entry points:
14  *   get_priv:		assign privilege structure to user or system process
15  *   set_sendto_bit:	allow a process to send messages to a new target
16  *   unset_sendto_bit:	disallow a process from sending messages to a target
17  *   fill_sendto_mask:	fill the target mask of a given process
18  *   send_sig:		send a signal directly to a system process
19  *   cause_sig:		take action to cause a signal to occur via a signal mgr
20  *   sig_delay_done:	tell PM that a process is not sending
21  *   send_diag_sig:	send a diagnostics signal to interested processes
22  *   get_randomness:	accumulate randomness in a buffer
23  *   clear_endpoint:	remove a process' ability to send and receive messages
24  *   sched_proc:	schedule a process
25  *
26  * Changes:
27 *    Nov 22, 2009   get_priv supports static priv ids (Cristiano Giuffrida)
28  *   Aug 04, 2005   check if system call is allowed  (Jorrit N. Herder)
29  *   Jul 20, 2005   send signal to services with message  (Jorrit N. Herder)
30  *   Jan 15, 2005   new, generalized virtual copy function  (Jorrit N. Herder)
31  *   Oct 10, 2004   dispatch system calls from call vector  (Jorrit N. Herder)
32  *   Sep 30, 2004   source code documentation updated  (Jorrit N. Herder)
33  */
34 
35 #include "kernel/kernel.h"
36 #include "kernel/system.h"
37 #include "kernel/vm.h"
38 #include "kernel/clock.h"
39 #include <stdlib.h>
40 #include <stddef.h>
41 #include <assert.h>
42 #include <signal.h>
43 #include <unistd.h>
44 #include <minix/endpoint.h>
45 #include <minix/safecopies.h>
46 
47 /* Declaration of the call vector that defines the mapping of system calls
48  * to handler functions. The vector is initialized in sys_init() with map(),
49  * which makes sure the system call numbers are ok. No space is allocated,
50  * because the dummy is declared extern. If an illegal call is given, the
51  * array size will be negative and this won't compile.
52  */
53 static int (*call_vec[NR_SYS_CALLS])(struct proc * caller, message *m_ptr);
54 
55 #define map(call_nr, handler) 					\
56     {	int call_index = call_nr-KERNEL_CALL; 				\
57     	assert(call_index >= 0 && call_index < NR_SYS_CALLS);			\
58     call_vec[call_index] = (handler)  ; }
59 
60 static void kernel_call_finish(struct proc * caller, message *msg, int result)
61 {
62   if(result == VMSUSPEND) {
63 	  /* Special case: message has to be saved for handling
64 	   * until VM tells us it's allowed. VM has been notified
65 	   * and we must wait for its reply to restart the call.
66 	   */
67 	  assert(RTS_ISSET(caller, RTS_VMREQUEST));
68 	  assert(caller->p_vmrequest.type == VMSTYPE_KERNELCALL);
69 	  caller->p_vmrequest.saved.reqmsg = *msg;
70 	  caller->p_misc_flags |= MF_KCALL_RESUME;
71   } else {
72 	  /*
73 	   * call is finished, we could have been suspended because of VM,
74 	   * remove the request message
75 	   */
76 	  caller->p_vmrequest.saved.reqmsg.m_source = NONE;
77 	  if (result != EDONTREPLY) {
78 		  /* copy the result as a message to the original user buffer */
79 		  msg->m_source = SYSTEM;
80 		  msg->m_type = result;		/* report status of call */
81 #if DEBUG_IPC_HOOK
82 	hook_ipc_msgkresult(msg, caller);
83 #endif
84 		  if (copy_msg_to_user(msg, (message *)caller->p_delivermsg_vir)) {
85 			  printf("WARNING wrong user pointer 0x%08x from "
86 					  "process %s / %d\n",
87 					  caller->p_delivermsg_vir,
88 					  caller->p_name,
89 					  caller->p_endpoint);
90 			  cause_sig(proc_nr(caller), SIGSEGV);
91 		  }
92 	  }
93   }
94 }
95 
96 static int kernel_call_dispatch(struct proc * caller, message *msg)
97 {
98   int result = OK;
99   int call_nr;
100 
101 #if DEBUG_IPC_HOOK
102 	hook_ipc_msgkcall(msg, caller);
103 #endif
104   call_nr = msg->m_type - KERNEL_CALL;
105 
106   /* See if the caller made a valid request and try to handle it. */
107   if (call_nr < 0 || call_nr >= NR_SYS_CALLS) {	/* check call number */
108 	  printf("SYSTEM: illegal request %d from %d.\n",
109 			  call_nr,msg->m_source);
110 	  result = EBADREQUEST;			/* illegal message type */
111   }
112   else if (!GET_BIT(priv(caller)->s_k_call_mask, call_nr)) {
113 	  printf("SYSTEM: denied request %d from %d.\n",
114 			  call_nr,msg->m_source);
115 	  result = ECALLDENIED;			/* illegal message type */
116   } else {
117 	  /* handle the system call */
118 	  if (call_vec[call_nr])
119 		  result = (*call_vec[call_nr])(caller, msg);
120 	  else {
121 		  printf("Unused kernel call %d from %d\n",
122 				  call_nr, caller->p_endpoint);
123 		  result = EBADREQUEST;
124 	  }
125   }
126 
127   return result;
128 }
129 
130 /*===========================================================================*
131  *				kernel_call				     *
132  *===========================================================================*/
133 /*
134  * this function checks the basic syscall parameters and if accepted it
135  * dispatches its handling to the right handler
136  */
137 void kernel_call(message *m_user, struct proc * caller)
138 {
139   int result = OK;
140   message msg;
141 
142   caller->p_delivermsg_vir = (vir_bytes) m_user;
143   /*
144    * the ldt and cr3 of the caller process is loaded because it just've trapped
145    * into the kernel or was already set in switch_to_user() before we resume
146    * execution of an interrupted kernel call
147    */
148   if (copy_msg_from_user(m_user, &msg) == 0) {
149 	  msg.m_source = caller->p_endpoint;
150 	  result = kernel_call_dispatch(caller, &msg);
151   }
152   else {
153 	  printf("WARNING wrong user pointer 0x%08x from process %s / %d\n",
154 			  m_user, caller->p_name, caller->p_endpoint);
155 	  cause_sig(proc_nr(caller), SIGSEGV);
156 	  return;
157   }
158 
159 
160   /* remember who invoked the kcall so we can bill it its time */
161   kbill_kcall = caller;
162 
163   kernel_call_finish(caller, &msg, result);
164 }
165 
166 /*===========================================================================*
167  *				initialize				     *
168  *===========================================================================*/
169 void system_init(void)
170 {
171   register struct priv *sp;
172   int i;
173 
174   /* Initialize IRQ handler hooks. Mark all hooks available. */
175   for (i=0; i<NR_IRQ_HOOKS; i++) {
176       irq_hooks[i].proc_nr_e = NONE;
177   }
178 
179   /* Initialize all alarm timers for all processes. */
180   for (sp=BEG_PRIV_ADDR; sp < END_PRIV_ADDR; sp++) {
181     tmr_inittimer(&(sp->s_alarm_timer));
182   }
183 
184   /* Initialize the call vector to a safe default handler. Some system calls
185    * may be disabled or nonexistant. Then explicitly map known calls to their
186    * handler functions. This is done with a macro that gives a compile error
187    * if an illegal call number is used. The ordering is not important here.
188    */
189   for (i=0; i<NR_SYS_CALLS; i++) {
190       call_vec[i] = NULL;
191   }
192 
193   /* Process management. */
194   map(SYS_FORK, do_fork); 		/* a process forked a new process */
195   map(SYS_EXEC, do_exec);		/* update process after execute */
196   map(SYS_CLEAR, do_clear);		/* clean up after process exit */
197   map(SYS_EXIT, do_exit);		/* a system process wants to exit */
198   map(SYS_PRIVCTL, do_privctl);		/* system privileges control */
199   map(SYS_TRACE, do_trace);		/* request a trace operation */
200   map(SYS_SETGRANT, do_setgrant);	/* get/set own parameters */
201   map(SYS_RUNCTL, do_runctl);		/* set/clear stop flag of a process */
202   map(SYS_UPDATE, do_update);		/* update a process into another */
203   map(SYS_STATECTL, do_statectl);	/* let a process control its state */
204 
205   /* Signal handling. */
206   map(SYS_KILL, do_kill); 		/* cause a process to be signaled */
207   map(SYS_GETKSIG, do_getksig);		/* signal manager checks for signals */
208   map(SYS_ENDKSIG, do_endksig);		/* signal manager finished signal */
209   map(SYS_SIGSEND, do_sigsend);		/* start POSIX-style signal */
210   map(SYS_SIGRETURN, do_sigreturn);	/* return from POSIX-style signal */
211 
212   /* Device I/O. */
213   map(SYS_IRQCTL, do_irqctl);  		/* interrupt control operations */
214 #if defined(__i386__)
215   map(SYS_DEVIO, do_devio);   		/* inb, inw, inl, outb, outw, outl */
216   map(SYS_VDEVIO, do_vdevio);  		/* vector with devio requests */
217 #endif
218 
219   /* Memory management. */
220   map(SYS_MEMSET, do_memset);		/* write char to memory area */
221   map(SYS_VMCTL, do_vmctl);		/* various VM process settings */
222 
223   /* Copying. */
224   map(SYS_UMAP, do_umap);		/* map virtual to physical address */
225   map(SYS_UMAP_REMOTE, do_umap_remote);	/* do_umap for non-caller process */
226   map(SYS_VUMAP, do_vumap);		/* vectored virtual to physical map */
227   map(SYS_VIRCOPY, do_vircopy); 	/* use pure virtual addressing */
228   map(SYS_PHYSCOPY, do_copy);	 	/* use physical addressing */
229   map(SYS_SAFECOPYFROM, do_safecopy_from);/* copy with pre-granted permission */
230   map(SYS_SAFECOPYTO, do_safecopy_to);	/* copy with pre-granted permission */
231   map(SYS_VSAFECOPY, do_vsafecopy);	/* vectored safecopy */
232 
233   /* safe memset */
234   map(SYS_SAFEMEMSET, do_safememset);	/* safememset */
235 
236   /* Clock functionality. */
237   map(SYS_TIMES, do_times);		/* get uptime and process times */
238   map(SYS_SETALARM, do_setalarm);	/* schedule a synchronous alarm */
239   map(SYS_STIME, do_stime);		/* set the boottime */
240   map(SYS_SETTIME, do_settime);		/* set the system time (realtime) */
241   map(SYS_VTIMER, do_vtimer);		/* set or retrieve a virtual timer */
242 
243   /* System control. */
244   map(SYS_ABORT, do_abort);		/* abort MINIX */
245   map(SYS_GETINFO, do_getinfo); 	/* request system information */
246   map(SYS_DIAGCTL, do_diagctl);		/* diagnostics-related functionality */
247 
248   /* Profiling. */
249   map(SYS_SPROF, do_sprofile);         /* start/stop statistical profiling */
250 
251   /* arm-specific. */
252 #if defined(__arm__)
253   map(SYS_PADCONF, do_padconf);		/* configure pinmux */
254 #endif
255 
256   /* i386-specific. */
257 #if defined(__i386__)
258   map(SYS_READBIOS, do_readbios);	/* read from BIOS locations */
259   map(SYS_IOPENABLE, do_iopenable); 	/* Enable I/O */
260   map(SYS_SDEVIO, do_sdevio);		/* phys_insb, _insw, _outsb, _outsw */
261 #endif
262 
263   /* Machine state switching. */
264   map(SYS_SETMCONTEXT, do_setmcontext); /* set machine context */
265   map(SYS_GETMCONTEXT, do_getmcontext); /* get machine context */
266 
267   /* Scheduling */
268   map(SYS_SCHEDULE, do_schedule);	/* reschedule a process */
269   map(SYS_SCHEDCTL, do_schedctl);	/* change process scheduler */
270 
271 }
272 /*===========================================================================*
273  *				get_priv				     *
274  *===========================================================================*/
275 int get_priv(rc, priv_id)
276 register struct proc *rc;		/* new (child) process pointer */
277 int priv_id;				/* privilege id */
278 {
279 /* Allocate a new privilege structure for a system process. Privilege ids
280  * can be assigned either statically or dynamically.
281  */
282   register struct priv *sp;                 /* privilege structure */
283 
284   if(priv_id == NULL_PRIV_ID) {             /* allocate slot dynamically */
285       for (sp = BEG_DYN_PRIV_ADDR; sp < END_DYN_PRIV_ADDR; ++sp)
286           if (sp->s_proc_nr == NONE) break;
287       if (sp >= END_DYN_PRIV_ADDR) return(ENOSPC);
288   }
289   else {                                    /* allocate slot from id */
290       if(!is_static_priv_id(priv_id)) {
291           return EINVAL;                    /* invalid static priv id */
292       }
293       if(priv[priv_id].s_proc_nr != NONE) {
294           return EBUSY;                     /* slot already in use */
295       }
296       sp = &priv[priv_id];
297   }
298   rc->p_priv = sp;			    /* assign new slot */
299   rc->p_priv->s_proc_nr = proc_nr(rc);	    /* set association */
300 
301   return(OK);
302 }
303 
304 /*===========================================================================*
305  *				set_sendto_bit				     *
306  *===========================================================================*/
307 void set_sendto_bit(const struct proc *rp, int id)
308 {
309 /* Allow a process to send messages to the process(es) associated with the
310  * system privilege structure with the given ID.
311  */
312 
313   /* Disallow the process from sending to a process privilege structure with no
314    * associated process, and disallow the process from sending to itself.
315    */
316   if (id_to_nr(id) == NONE || priv_id(rp) == id) {
317 	unset_sys_bit(priv(rp)->s_ipc_to, id);
318 	return;
319   }
320 
321   set_sys_bit(priv(rp)->s_ipc_to, id);
322 
323   /* The process that this process can now send to, must be able to reply (or
324    * vice versa). Therefore, its send mask should be updated as well. Ignore
325    * receivers that don't support traps other than RECEIVE, they can't reply
326    * or send messages anyway.
327    */
328   if (priv_addr(id)->s_trap_mask & ~((1 << RECEIVE)))
329       set_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
330 }
331 
332 /*===========================================================================*
333  *				unset_sendto_bit			     *
334  *===========================================================================*/
335 void unset_sendto_bit(const struct proc *rp, int id)
336 {
337 /* Prevent a process from sending to another process. Retain the send mask
338  * symmetry by also unsetting the bit for the other direction.
339  */
340 
341   unset_sys_bit(priv(rp)->s_ipc_to, id);
342 
343   unset_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
344 }
345 
346 /*===========================================================================*
347  *			      fill_sendto_mask				     *
348  *===========================================================================*/
349 void fill_sendto_mask(const struct proc *rp, sys_map_t *map)
350 {
351   int i;
352 
353   for (i=0; i < NR_SYS_PROCS; i++) {
354   	if (get_sys_bit(*map, i))
355   		set_sendto_bit(rp, i);
356   	else
357   		unset_sendto_bit(rp, i);
358   }
359 }
360 
361 /*===========================================================================*
362  *				send_sig				     *
363  *===========================================================================*/
364 int send_sig(endpoint_t ep, int sig_nr)
365 {
366 /* Notify a system process about a signal. This is straightforward. Simply
367  * set the signal that is to be delivered in the pending signals map and
368  * send a notification with source SYSTEM.
369  */
370   register struct proc *rp;
371   struct priv *priv;
372   int proc_nr;
373 
374   if(!isokendpt(ep, &proc_nr) || isemptyn(proc_nr))
375 	return EINVAL;
376 
377   rp = proc_addr(proc_nr);
378   priv = priv(rp);
379   if(!priv) return ENOENT;
380   sigaddset(&priv->s_sig_pending, sig_nr);
381   mini_notify(proc_addr(SYSTEM), rp->p_endpoint);
382 
383   return OK;
384 }
385 
386 /*===========================================================================*
387  *				cause_sig				     *
388  *===========================================================================*/
389 void cause_sig(proc_nr, sig_nr)
390 proc_nr_t proc_nr;		/* process to be signalled */
391 int sig_nr;			/* signal to be sent */
392 {
393 /* A system process wants to send a signal to a process.  Examples are:
394  *  - HARDWARE wanting to cause a SIGSEGV after a CPU exception
395  *  - TTY wanting to cause SIGINT upon getting a DEL
396  *  - FS wanting to cause SIGPIPE for a broken pipe
397  * Signals are handled by sending a message to the signal manager assigned to
398  * the process. This function handles the signals and makes sure the signal
399  * manager gets them by sending a notification. The process being signaled
400  * is blocked while the signal manager has not finished all signals for it.
401  * Race conditions between calls to this function and the system calls that
402  * process pending kernel signals cannot exist. Signal related functions are
403  * only called when a user process causes a CPU exception and from the kernel
404  * process level, which runs to completion.
405  */
406   register struct proc *rp, *sig_mgr_rp;
407   endpoint_t sig_mgr;
408   int sig_mgr_proc_nr;
409   int s;
410 
411   /* Lookup signal manager. */
412   rp = proc_addr(proc_nr);
413   sig_mgr = priv(rp)->s_sig_mgr;
414   if(sig_mgr == SELF) sig_mgr = rp->p_endpoint;
415 
416   /* If the target is the signal manager of itself, send the signal directly. */
417   if(rp->p_endpoint == sig_mgr) {
418        if(SIGS_IS_LETHAL(sig_nr)) {
419            /* If the signal is lethal, see if a backup signal manager exists. */
420            sig_mgr = priv(rp)->s_bak_sig_mgr;
421            if(sig_mgr != NONE && isokendpt(sig_mgr, &sig_mgr_proc_nr)) {
422                priv(rp)->s_sig_mgr = sig_mgr;
423                priv(rp)->s_bak_sig_mgr = NONE;
424                sig_mgr_rp = proc_addr(sig_mgr_proc_nr);
425                RTS_UNSET(sig_mgr_rp, RTS_NO_PRIV);
426                cause_sig(proc_nr, sig_nr); /* try again with the new sig mgr. */
427                return;
428            }
429            /* We are out of luck. Time to panic. */
430            proc_stacktrace(rp);
431            panic("cause_sig: sig manager %d gets lethal signal %d for itself",
432 	   	rp->p_endpoint, sig_nr);
433        }
434        sigaddset(&priv(rp)->s_sig_pending, sig_nr);
435        if(OK != send_sig(rp->p_endpoint, SIGKSIGSM))
436        	panic("send_sig failed");
437        return;
438   }
439 
440   s = sigismember(&rp->p_pending, sig_nr);
441   /* Check if the signal is already pending. Process it otherwise. */
442   if (!s) {
443       sigaddset(&rp->p_pending, sig_nr);
444       if (! (RTS_ISSET(rp, RTS_SIGNALED))) {		/* other pending */
445 	  RTS_SET(rp, RTS_SIGNALED | RTS_SIG_PENDING);
446           if(OK != send_sig(sig_mgr, SIGKSIG))
447 	  	panic("send_sig failed");
448       }
449   }
450 }
451 
452 /*===========================================================================*
453  *				sig_delay_done				     *
454  *===========================================================================*/
455 void sig_delay_done(struct proc *rp)
456 {
457 /* A process is now known not to send any direct messages.
458  * Tell PM that the stop delay has ended, by sending a signal to the process.
459  * Used for actual signal delivery.
460  */
461 
462   rp->p_misc_flags &= ~MF_SIG_DELAY;
463 
464   cause_sig(proc_nr(rp), SIGSNDELAY);
465 }
466 
467 /*===========================================================================*
468  *				send_diag_sig				     *
469  *===========================================================================*/
470 void send_diag_sig(void)
471 {
472 /* Send a SIGKMESS signal to all processes in receiving updates about new
473  * diagnostics messages.
474  */
475   struct priv *privp;
476   endpoint_t ep;
477 
478   for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; privp++) {
479 	if (privp->s_proc_nr != NONE && privp->s_diag_sig == TRUE) {
480 		ep = proc_addr(privp->s_proc_nr)->p_endpoint;
481 		send_sig(ep, SIGKMESS);
482 	}
483   }
484 }
485 
486 /*===========================================================================*
487  *			         clear_memreq				     *
488  *===========================================================================*/
489 static void clear_memreq(struct proc *rp)
490 {
491   struct proc **rpp;
492 
493   if (!RTS_ISSET(rp, RTS_VMREQUEST))
494 	return; /* nothing to do */
495 
496   for (rpp = &vmrequest; *rpp != NULL;
497      rpp = &(*rpp)->p_vmrequest.nextrequestor) {
498 	if (*rpp == rp) {
499 		*rpp = rp->p_vmrequest.nextrequestor;
500 		break;
501 	}
502   }
503 
504   RTS_UNSET(rp, RTS_VMREQUEST);
505 }
506 
507 /*===========================================================================*
508  *			         clear_ipc				     *
509  *===========================================================================*/
510 static void clear_ipc(
511   register struct proc *rc	/* slot of process to clean up */
512 )
513 {
514 /* Clear IPC data for a given process slot. */
515   struct proc **xpp;			/* iterate over caller queue */
516 
517   if (RTS_ISSET(rc, RTS_SENDING)) {
518       int target_proc;
519 
520       okendpt(rc->p_sendto_e, &target_proc);
521       xpp = &proc_addr(target_proc)->p_caller_q; /* destination's queue */
522       while (*xpp) {		/* check entire queue */
523           if (*xpp == rc) {			/* process is on the queue */
524               *xpp = (*xpp)->p_q_link;		/* replace by next process */
525 #if DEBUG_ENABLE_IPC_WARNINGS
526 	      printf("endpoint %d / %s removed from queue at %d\n",
527 	          rc->p_endpoint, rc->p_name, rc->p_sendto_e);
528 #endif
529               break;				/* can only be queued once */
530           }
531           xpp = &(*xpp)->p_q_link;		/* proceed to next queued */
532       }
533       RTS_UNSET(rc, RTS_SENDING);
534   }
535   RTS_UNSET(rc, RTS_RECEIVING);
536 }
537 
538 /*===========================================================================*
539  *			         clear_endpoint				     *
540  *===========================================================================*/
541 void clear_endpoint(rc)
542 register struct proc *rc;		/* slot of process to clean up */
543 {
544   if(isemptyp(rc)) panic("clear_proc: empty process: %d",  rc->p_endpoint);
545 
546 
547 #if DEBUG_IPC_HOOK
548   hook_ipc_clear(rc);
549 #endif
550 
551   /* Make sure that the exiting process is no longer scheduled. */
552   RTS_SET(rc, RTS_NO_ENDPOINT);
553   if (priv(rc)->s_flags & SYS_PROC)
554   {
555 	priv(rc)->s_asynsize= 0;
556   }
557 
558   /* If the process happens to be queued trying to send a
559    * message, then it must be removed from the message queues.
560    */
561   clear_ipc(rc);
562 
563   /* Likewise, if another process was sending or receive a message to or from
564    * the exiting process, it must be alerted that process no longer is alive.
565    * Check all processes.
566    */
567   clear_ipc_refs(rc, EDEADSRCDST);
568 
569   /* Finally, if the process was blocked on a VM request, remove it from the
570    * queue of processes waiting to be processed by VM.
571    */
572   clear_memreq(rc);
573 }
574 
575 /*===========================================================================*
576  *			       clear_ipc_refs				     *
577  *===========================================================================*/
578 void clear_ipc_refs(rc, caller_ret)
579 register struct proc *rc;		/* slot of process to clean up */
580 int caller_ret;				/* code to return on callers */
581 {
582 /* Clear IPC references for a given process slot. */
583   struct proc *rp;			/* iterate over process table */
584   int src_id;
585 
586   /* Tell processes that sent asynchronous messages to 'rc' they are not
587    * going to be delivered */
588   while ((src_id = has_pending_asend(rc, ANY)) != NULL_PRIV_ID)
589       cancel_async(proc_addr(id_to_nr(src_id)), rc);
590 
591   for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; rp++) {
592       if(isemptyp(rp))
593 	continue;
594 
595       /* Unset pending notification bits. */
596       unset_sys_bit(priv(rp)->s_notify_pending, priv(rc)->s_id);
597 
598       /* Unset pending asynchronous messages */
599       unset_sys_bit(priv(rp)->s_asyn_pending, priv(rc)->s_id);
600 
601       /* Check if process depends on given process. */
602       if (P_BLOCKEDON(rp) == rc->p_endpoint) {
603           rp->p_reg.retreg = caller_ret;	/* return requested code */
604 	  clear_ipc(rp);
605       }
606   }
607 }
608 
609 /*===========================================================================*
610  *                              kernel_call_resume                           *
611  *===========================================================================*/
612 void kernel_call_resume(struct proc *caller)
613 {
614 	int result;
615 
616 	assert(!RTS_ISSET(caller, RTS_SLOT_FREE));
617 	assert(!RTS_ISSET(caller, RTS_VMREQUEST));
618 
619 	assert(caller->p_vmrequest.saved.reqmsg.m_source == caller->p_endpoint);
620 
621 	/*
622 	printf("KERNEL_CALL restart from %s / %d rts 0x%08x misc 0x%08x\n",
623 			caller->p_name, caller->p_endpoint,
624 			caller->p_rts_flags, caller->p_misc_flags);
625 	 */
626 
627 	/* re-execute the kernel call, with MF_KCALL_RESUME still set so
628 	 * the call knows this is a retry.
629 	 */
630 	result = kernel_call_dispatch(caller, &caller->p_vmrequest.saved.reqmsg);
631 	/*
632 	 * we are resuming the kernel call so we have to remove this flag so it
633 	 * can be set again
634 	 */
635 	caller->p_misc_flags &= ~MF_KCALL_RESUME;
636 	kernel_call_finish(caller, &caller->p_vmrequest.saved.reqmsg, result);
637 }
638 
639 /*===========================================================================*
640  *                               sched_proc                                  *
641  *===========================================================================*/
642 int sched_proc(struct proc *p,
643 			int priority,
644 			int quantum,
645 			int cpu)
646 {
647 	/* Make sure the values given are within the allowed range.*/
648 	if ((priority < TASK_Q && priority != -1) || priority > NR_SCHED_QUEUES)
649 		return(EINVAL);
650 
651 	if (quantum < 1 && quantum != -1)
652 		return(EINVAL);
653 
654 #ifdef CONFIG_SMP
655 	if ((cpu < 0 && cpu != -1) || (cpu > 0 && (unsigned) cpu >= ncpus))
656 		return(EINVAL);
657 	if (cpu != -1 && !(cpu_is_ready(cpu)))
658 		return EBADCPU;
659 #endif
660 
661 	/* In some cases, we might be rescheduling a runnable process. In such
662 	 * a case (i.e. if we are updating the priority) we set the NO_QUANTUM
663 	 * flag before the generic unset to dequeue/enqueue the process
664 	 */
665 
666 	/* FIXME this preempts the process, do we really want to do that ?*/
667 
668 	/* FIXME this is a problem for SMP if the processes currently runs on a
669 	 * different CPU */
670 	if (proc_is_runnable(p)) {
671 #ifdef CONFIG_SMP
672 		if (p->p_cpu != cpuid && cpu != -1 && cpu != p->p_cpu) {
673 			smp_schedule_migrate_proc(p, cpu);
674 		}
675 #endif
676 
677 		RTS_SET(p, RTS_NO_QUANTUM);
678 	}
679 
680 	if (proc_is_runnable(p))
681 		RTS_SET(p, RTS_NO_QUANTUM);
682 
683 	if (priority != -1)
684 		p->p_priority = priority;
685 	if (quantum != -1) {
686 		p->p_quantum_size_ms = quantum;
687 		p->p_cpu_time_left = ms_2_cpu_time(quantum);
688 	}
689 #ifdef CONFIG_SMP
690 	if (cpu != -1)
691 		p->p_cpu = cpu;
692 #endif
693 
694 	/* Clear the scheduling bit and enqueue the process */
695 	RTS_UNSET(p, RTS_NO_QUANTUM);
696 
697 	return OK;
698 }
699 
700 /*===========================================================================*
701  *				add_ipc_filter				     *
702  *===========================================================================*/
703 int add_ipc_filter(struct proc *rp, int type, vir_bytes address,
704 	size_t length)
705 {
706 	int num_elements, r;
707 	ipc_filter_t *ipcf, **ipcfp;
708 
709 	/* Validate arguments. */
710 	if (type != IPCF_BLACKLIST && type != IPCF_WHITELIST)
711 		return EINVAL;
712 
713 	if (length % sizeof(ipc_filter_el_t) != 0)
714 		return EINVAL;
715 
716 	num_elements = length / sizeof(ipc_filter_el_t);
717 	if (num_elements <= 0 || num_elements > IPCF_MAX_ELEMENTS)
718 		return E2BIG;
719 
720 	/* Allocate a new IPC filter slot. */
721 	IPCF_POOL_ALLOCATE_SLOT(type, &ipcf);
722 	if (ipcf == NULL)
723 		return ENOMEM;
724 
725 	/* Fill details. */
726 	ipcf->num_elements = num_elements;
727 	ipcf->next = NULL;
728 	r = data_copy(rp->p_endpoint, address,
729 		KERNEL, (vir_bytes)ipcf->elements, length);
730 	if (r == OK)
731 		r = check_ipc_filter(ipcf, TRUE /*fill_flags*/);
732 	if (r != OK) {
733 		IPCF_POOL_FREE_SLOT(ipcf);
734 		return r;
735 	}
736 
737 	/* Add the new filter at the end of the IPC filter chain. */
738 	for (ipcfp = &priv(rp)->s_ipcf; *ipcfp != NULL;
739 	    ipcfp = &(*ipcfp)->next)
740 		;
741 	*ipcfp = ipcf;
742 
743 	return OK;
744 }
745 
746 /*===========================================================================*
747  *				clear_ipc_filters			     *
748  *===========================================================================*/
749 void clear_ipc_filters(struct proc *rp)
750 {
751 	ipc_filter_t *curr_ipcf, *ipcf;
752 
753 	ipcf = priv(rp)->s_ipcf;
754 	while (ipcf != NULL) {
755 		curr_ipcf = ipcf;
756 		ipcf = ipcf->next;
757 		IPCF_POOL_FREE_SLOT(curr_ipcf);
758 	}
759 
760 	priv(rp)->s_ipcf = NULL;
761 
762 	/* VM is a special case here: since the cleared IPC filter may have
763 	 * blocked memory handling requests, we may now have to tell VM that
764 	 * there are "new" requests pending.
765 	 */
766 	if (rp->p_endpoint == VM_PROC_NR && vmrequest != NULL)
767 		if (send_sig(VM_PROC_NR, SIGKMEM) != OK)
768 			panic("send_sig failed");
769 }
770 
771 /*===========================================================================*
772  *				check_ipc_filter			     *
773  *===========================================================================*/
774 int check_ipc_filter(ipc_filter_t *ipcf, int fill_flags)
775 {
776 	ipc_filter_el_t *ipcf_el;
777 	int i, num_elements, flags;
778 
779 	if (ipcf == NULL)
780 		return OK;
781 
782 	num_elements = ipcf->num_elements;
783 	flags = 0;
784 	for (i = 0; i < num_elements; i++) {
785 		ipcf_el = &ipcf->elements[i];
786 		if (!IPCF_EL_CHECK(ipcf_el))
787 			return EINVAL;
788 		flags |= ipcf_el->flags;
789 	}
790 
791 	if (fill_flags)
792 		ipcf->flags = flags;
793 	else if (ipcf->flags != flags)
794 		return EINVAL;
795 	return OK;
796 }
797 
798 /*===========================================================================*
799  *				allow_ipc_filtered_msg			     *
800  *===========================================================================*/
801 int allow_ipc_filtered_msg(struct proc *rp, endpoint_t src_e,
802 	vir_bytes m_src_v, message *m_src_p)
803 {
804 	int i, r, num_elements, get_mtype, allow;
805 	ipc_filter_t *ipcf;
806 	ipc_filter_el_t *ipcf_el;
807 	message m_buff;
808 
809 	ipcf = priv(rp)->s_ipcf;
810 	if (ipcf == NULL)
811 		return TRUE; /* no IPC filters, always allow */
812 
813 	if (m_src_p == NULL) {
814 		assert(m_src_v != 0);
815 
816 		/* Should we copy in the message type? */
817 		get_mtype = FALSE;
818 		do {
819 #if DEBUG_DUMPIPCF
820 			if (TRUE) {
821 #else
822 			if (ipcf->flags & IPCF_MATCH_M_TYPE) {
823 #endif
824 				get_mtype = TRUE;
825 				break;
826 			}
827 			ipcf = ipcf->next;
828 		} while (ipcf);
829 		ipcf = priv(rp)->s_ipcf; /* reset to start */
830 
831 		/* If so, copy it in from the process. */
832 		if (get_mtype) {
833 			r = data_copy(src_e,
834 			    m_src_v + offsetof(message, m_type), KERNEL,
835 			    (vir_bytes)&m_buff.m_type, sizeof(m_buff.m_type));
836 			if (r != OK) {
837 				/* allow for now, this will fail later anyway */
838 #if DEBUG_DUMPIPCF
839 				printf("KERNEL: allow_ipc_filtered_msg: data "
840 				    "copy error %d, allowing message...\n", r);
841 #endif
842 				return TRUE;
843 			}
844 		}
845 		m_src_p = &m_buff;
846 	}
847 
848 	m_src_p->m_source = src_e;
849 
850 	/* See if the message is allowed. */
851 	allow = (ipcf->type == IPCF_BLACKLIST);
852 	do {
853 		if (allow != (ipcf->type == IPCF_WHITELIST)) {
854 			num_elements = ipcf->num_elements;
855 			for (i = 0; i < num_elements; i++) {
856 				ipcf_el = &ipcf->elements[i];
857 				if (IPCF_EL_MATCH(ipcf_el, m_src_p)) {
858 					allow = (ipcf->type == IPCF_WHITELIST);
859 					break;
860 				}
861 			}
862 		}
863 		ipcf = ipcf->next;
864 	} while (ipcf);
865 
866 #if DEBUG_DUMPIPCF
867 	printmsg(m_src_p, proc_addr(_ENDPOINT_P(src_e)), rp, allow ? '+' : '-',
868 	    TRUE /*printparams*/);
869 #endif
870 
871 	return allow;
872 }
873 
874 /*===========================================================================*
875  *			  allow_ipc_filtered_memreq			     *
876  *===========================================================================*/
877 int allow_ipc_filtered_memreq(struct proc *src_rp, struct proc *dst_rp)
878 {
879 	/* Determine whether VM should receive a request to handle memory
880 	 * that is the result of process 'src_rp' trying to access currently
881 	 * unavailable memory in process 'dst_rp'. Return TRUE if VM should
882 	 * be given the request, FALSE otherwise.
883 	 */
884 
885 	struct proc *vmp;
886 	message m_buf;
887 	int allow_src, allow_dst;
888 
889 	vmp = proc_addr(VM_PROC_NR);
890 
891 	/* If VM has no filter in place, all requests should go through. */
892 	if (priv(vmp)->s_ipcf == NULL)
893 		return TRUE;
894 
895 	/* VM obtains memory requests in response to a SIGKMEM signal, which
896 	 * is a notification sent from SYSTEM. Thus, if VM blocks such
897 	 * notifications, it also should not get any memory requests. Of
898 	 * course, VM should not be asking for requests in that case either,
899 	 * but the extra check doesn't hurt.
900 	 */
901 	m_buf.m_type = NOTIFY_MESSAGE;
902 	if (!allow_ipc_filtered_msg(vmp, SYSTEM, 0, &m_buf))
903 		return FALSE;
904 
905 	/* A more refined policy may be implemented here, for example to
906 	 * ensure that both the source and the destination (if different)
907 	 * are in the group of processes that VM wants to talk to. Since VM
908 	 * is basically not able to handle any memory requests during an
909 	 * update, we will not get here, and none of that is needed.
910 	 */
911 	return TRUE;
912 }
913 
914 /*===========================================================================*
915  *                             priv_add_irq                                  *
916  *===========================================================================*/
917 int priv_add_irq(struct proc *rp, int irq)
918 {
919         struct priv *priv = priv(rp);
920         int i;
921 
922 	priv->s_flags |= CHECK_IRQ;	/* Check IRQ */
923 
924 	/* When restarting a driver, check if it already has the permission */
925 	for (i = 0; i < priv->s_nr_irq; i++) {
926 		if (priv->s_irq_tab[i] == irq)
927 			return OK;
928 	}
929 
930 	i= priv->s_nr_irq;
931 	if (i >= NR_IRQ) {
932 		printf("do_privctl: %d already has %d irq's.\n",
933 			rp->p_endpoint, i);
934 		return ENOMEM;
935 	}
936 	priv->s_irq_tab[i]= irq;
937 	priv->s_nr_irq++;
938 	return OK;
939 }
940 
941 /*===========================================================================*
942  *                             priv_add_io                                   *
943  *===========================================================================*/
944 int priv_add_io(struct proc *rp, struct io_range *ior)
945 {
946         struct priv *priv = priv(rp);
947         int i;
948 
949 	priv->s_flags |= CHECK_IO_PORT;	/* Check I/O accesses */
950 
951 	for (i = 0; i < priv->s_nr_io_range; i++) {
952 		if (priv->s_io_tab[i].ior_base == ior->ior_base &&
953 			priv->s_io_tab[i].ior_limit == ior->ior_limit)
954 			return OK;
955 	}
956 
957 	i= priv->s_nr_io_range;
958 	if (i >= NR_IO_RANGE) {
959 		printf("do_privctl: %d already has %d i/o ranges.\n",
960 			rp->p_endpoint, i);
961 		return ENOMEM;
962 	}
963 
964 	priv->s_io_tab[i] = *ior;
965 	priv->s_nr_io_range++;
966 	return OK;
967 }
968 
969 /*===========================================================================*
970  *                             priv_add_mem                                  *
971  *===========================================================================*/
972 int priv_add_mem(struct proc *rp, struct minix_mem_range *memr)
973 {
974         struct priv *priv = priv(rp);
975         int i;
976 
977 	priv->s_flags |= CHECK_MEM;	/* Check memory mappings */
978 
979 	/* When restarting a driver, check if it already has the permission */
980 	for (i = 0; i < priv->s_nr_mem_range; i++) {
981 		if (priv->s_mem_tab[i].mr_base == memr->mr_base &&
982 			priv->s_mem_tab[i].mr_limit == memr->mr_limit)
983 			return OK;
984 	}
985 
986 	i= priv->s_nr_mem_range;
987 	if (i >= NR_MEM_RANGE) {
988 		printf("do_privctl: %d already has %d mem ranges.\n",
989 			rp->p_endpoint, i);
990 		return ENOMEM;
991 	}
992 	priv->s_mem_tab[i]= *memr;
993 	priv->s_nr_mem_range++;
994 	return OK;
995 }
996 
997