xref: /minix/minix/kernel/system.c (revision e1cdaee1)
1 /* This task handles the interface between the kernel and user-level servers.
2  * System services can be accessed by doing a system call. System calls are
3  * transformed into request messages, which are handled by this task. By
4  * convention, a sys_call() is transformed in a SYS_CALL request message that
5  * is handled in a function named do_call().
6  *
7  * A private call vector is used to map all system calls to the functions that
8  * handle them. The actual handler functions are contained in separate files
9  * to keep this file clean. The call vector is used in the system task's main
10  * loop to handle all incoming requests.
11  *
12  * In addition to the main sys_task() entry point, which starts the main loop,
13  * there are several other minor entry points:
14  *   get_priv:		assign privilege structure to user or system process
15  *   set_sendto_bit:	allow a process to send messages to a new target
16  *   unset_sendto_bit:	disallow a process from sending messages to a target
17  *   fill_sendto_mask:	fill the target mask of a given process
18  *   send_sig:		send a signal directly to a system process
19  *   cause_sig:		take action to cause a signal to occur via a signal mgr
20  *   sig_delay_done:	tell PM that a process is not sending
21  *   send_diag_sig:	send a diagnostics signal to interested processes
22  *   get_randomness:	accumulate randomness in a buffer
23  *   clear_endpoint:	remove a process' ability to send and receive messages
24  *   sched_proc:	schedule a process
25  *
26  * Changes:
27 *    Nov 22, 2009   get_priv supports static priv ids (Cristiano Giuffrida)
28  *   Aug 04, 2005   check if system call is allowed  (Jorrit N. Herder)
29  *   Jul 20, 2005   send signal to services with message  (Jorrit N. Herder)
30  *   Jan 15, 2005   new, generalized virtual copy function  (Jorrit N. Herder)
31  *   Oct 10, 2004   dispatch system calls from call vector  (Jorrit N. Herder)
32  *   Sep 30, 2004   source code documentation updated  (Jorrit N. Herder)
33  */
34 
35 #include "kernel/kernel.h"
36 #include "kernel/system.h"
37 #include "kernel/vm.h"
38 #include "kernel/clock.h"
39 #include <stdlib.h>
40 #include <stddef.h>
41 #include <assert.h>
42 #include <signal.h>
43 #include <unistd.h>
44 #include <minix/endpoint.h>
45 #include <minix/safecopies.h>
46 
47 /* Declaration of the call vector that defines the mapping of system calls
48  * to handler functions. The vector is initialized in sys_init() with map(),
49  * which makes sure the system call numbers are ok. No space is allocated,
50  * because the dummy is declared extern. If an illegal call is given, the
51  * array size will be negative and this won't compile.
52  */
53 static int (*call_vec[NR_SYS_CALLS])(struct proc * caller, message *m_ptr);
54 
55 #define map(call_nr, handler) 					\
56     {	int call_index = call_nr-KERNEL_CALL; 				\
57     	assert(call_index >= 0 && call_index < NR_SYS_CALLS);			\
58     call_vec[call_index] = (handler)  ; }
59 
60 static void kernel_call_finish(struct proc * caller, message *msg, int result)
61 {
62   if(result == VMSUSPEND) {
63 	  /* Special case: message has to be saved for handling
64 	   * until VM tells us it's allowed. VM has been notified
65 	   * and we must wait for its reply to restart the call.
66 	   */
67 	  assert(RTS_ISSET(caller, RTS_VMREQUEST));
68 	  assert(caller->p_vmrequest.type == VMSTYPE_KERNELCALL);
69 	  caller->p_vmrequest.saved.reqmsg = *msg;
70 	  caller->p_misc_flags |= MF_KCALL_RESUME;
71   } else {
72 	  /*
73 	   * call is finished, we could have been suspended because of VM,
74 	   * remove the request message
75 	   */
76 	  caller->p_vmrequest.saved.reqmsg.m_source = NONE;
77 	  if (result != EDONTREPLY) {
78 		  /* copy the result as a message to the original user buffer */
79 		  msg->m_source = SYSTEM;
80 		  msg->m_type = result;		/* report status of call */
81 #if DEBUG_IPC_HOOK
82 	hook_ipc_msgkresult(msg, caller);
83 #endif
84 		  if (copy_msg_to_user(msg, (message *)caller->p_delivermsg_vir)) {
85 			  printf("WARNING wrong user pointer 0x%08x from "
86 					  "process %s / %d\n",
87 					  caller->p_delivermsg_vir,
88 					  caller->p_name,
89 					  caller->p_endpoint);
90 			  cause_sig(proc_nr(caller), SIGSEGV);
91 		  }
92 	  }
93   }
94 }
95 
96 static int kernel_call_dispatch(struct proc * caller, message *msg)
97 {
98   int result = OK;
99   int call_nr;
100 
101 #if DEBUG_IPC_HOOK
102 	hook_ipc_msgkcall(msg, caller);
103 #endif
104   call_nr = msg->m_type - KERNEL_CALL;
105 
106   /* See if the caller made a valid request and try to handle it. */
107   if (call_nr < 0 || call_nr >= NR_SYS_CALLS) {	/* check call number */
108 	  printf("SYSTEM: illegal request %d from %d.\n",
109 			  call_nr,msg->m_source);
110 	  result = EBADREQUEST;			/* illegal message type */
111   }
112   else if (!GET_BIT(priv(caller)->s_k_call_mask, call_nr)) {
113 	  printf("SYSTEM: denied request %d from %d.\n",
114 			  call_nr,msg->m_source);
115 	  result = ECALLDENIED;			/* illegal message type */
116   } else {
117 	  /* handle the system call */
118 	  if (call_vec[call_nr])
119 		  result = (*call_vec[call_nr])(caller, msg);
120 	  else {
121 		  printf("Unused kernel call %d from %d\n",
122 				  call_nr, caller->p_endpoint);
123 		  result = EBADREQUEST;
124 	  }
125   }
126 
127   return result;
128 }
129 
130 /*===========================================================================*
131  *				kernel_call				     *
132  *===========================================================================*/
133 /*
134  * this function checks the basic syscall parameters and if accepted it
135  * dispatches its handling to the right handler
136  */
137 void kernel_call(message *m_user, struct proc * caller)
138 {
139   int result = OK;
140   message msg;
141 
142   caller->p_delivermsg_vir = (vir_bytes) m_user;
143   /*
144    * the ldt and cr3 of the caller process is loaded because it just've trapped
145    * into the kernel or was already set in switch_to_user() before we resume
146    * execution of an interrupted kernel call
147    */
148   if (copy_msg_from_user(m_user, &msg) == 0) {
149 	  msg.m_source = caller->p_endpoint;
150 	  result = kernel_call_dispatch(caller, &msg);
151   }
152   else {
153 	  printf("WARNING wrong user pointer 0x%08x from process %s / %d\n",
154 			  m_user, caller->p_name, caller->p_endpoint);
155 	  cause_sig(proc_nr(caller), SIGSEGV);
156 	  return;
157   }
158 
159 
160   /* remember who invoked the kcall so we can bill it its time */
161   kbill_kcall = caller;
162 
163   kernel_call_finish(caller, &msg, result);
164 }
165 
166 /*===========================================================================*
167  *				initialize				     *
168  *===========================================================================*/
169 void system_init(void)
170 {
171   register struct priv *sp;
172   int i;
173 
174   /* Initialize IRQ handler hooks. Mark all hooks available. */
175   for (i=0; i<NR_IRQ_HOOKS; i++) {
176       irq_hooks[i].proc_nr_e = NONE;
177   }
178 
179   /* Initialize all alarm timers for all processes. */
180   for (sp=BEG_PRIV_ADDR; sp < END_PRIV_ADDR; sp++) {
181     tmr_inittimer(&(sp->s_alarm_timer));
182   }
183 
184   /* Initialize the call vector to a safe default handler. Some system calls
185    * may be disabled or nonexistant. Then explicitly map known calls to their
186    * handler functions. This is done with a macro that gives a compile error
187    * if an illegal call number is used. The ordering is not important here.
188    */
189   for (i=0; i<NR_SYS_CALLS; i++) {
190       call_vec[i] = NULL;
191   }
192 
193   /* Process management. */
194   map(SYS_FORK, do_fork); 		/* a process forked a new process */
195   map(SYS_EXEC, do_exec);		/* update process after execute */
196   map(SYS_CLEAR, do_clear);		/* clean up after process exit */
197   map(SYS_EXIT, do_exit);		/* a system process wants to exit */
198   map(SYS_PRIVCTL, do_privctl);		/* system privileges control */
199   map(SYS_TRACE, do_trace);		/* request a trace operation */
200   map(SYS_SETGRANT, do_setgrant);	/* get/set own parameters */
201   map(SYS_RUNCTL, do_runctl);		/* set/clear stop flag of a process */
202   map(SYS_UPDATE, do_update);		/* update a process into another */
203   map(SYS_STATECTL, do_statectl);	/* let a process control its state */
204 
205   /* Signal handling. */
206   map(SYS_KILL, do_kill); 		/* cause a process to be signaled */
207   map(SYS_GETKSIG, do_getksig);		/* signal manager checks for signals */
208   map(SYS_ENDKSIG, do_endksig);		/* signal manager finished signal */
209   map(SYS_SIGSEND, do_sigsend);		/* start POSIX-style signal */
210   map(SYS_SIGRETURN, do_sigreturn);	/* return from POSIX-style signal */
211 
212   /* Device I/O. */
213   map(SYS_IRQCTL, do_irqctl);  		/* interrupt control operations */
214 #if defined(__i386__)
215   map(SYS_DEVIO, do_devio);   		/* inb, inw, inl, outb, outw, outl */
216   map(SYS_VDEVIO, do_vdevio);  		/* vector with devio requests */
217 #endif
218 
219   /* Memory management. */
220   map(SYS_MEMSET, do_memset);		/* write char to memory area */
221   map(SYS_VMCTL, do_vmctl);		/* various VM process settings */
222 
223   /* Copying. */
224   map(SYS_UMAP, do_umap);		/* map virtual to physical address */
225   map(SYS_UMAP_REMOTE, do_umap_remote);	/* do_umap for non-caller process */
226   map(SYS_VUMAP, do_vumap);		/* vectored virtual to physical map */
227   map(SYS_VIRCOPY, do_vircopy); 	/* use pure virtual addressing */
228   map(SYS_PHYSCOPY, do_copy);	 	/* use physical addressing */
229   map(SYS_SAFECOPYFROM, do_safecopy_from);/* copy with pre-granted permission */
230   map(SYS_SAFECOPYTO, do_safecopy_to);	/* copy with pre-granted permission */
231   map(SYS_VSAFECOPY, do_vsafecopy);	/* vectored safecopy */
232 
233   /* safe memset */
234   map(SYS_SAFEMEMSET, do_safememset);	/* safememset */
235 
236   /* Clock functionality. */
237   map(SYS_TIMES, do_times);		/* get uptime and process times */
238   map(SYS_SETALARM, do_setalarm);	/* schedule a synchronous alarm */
239   map(SYS_STIME, do_stime);		/* set the boottime */
240   map(SYS_SETTIME, do_settime);		/* set the system time (realtime) */
241   map(SYS_VTIMER, do_vtimer);		/* set or retrieve a virtual timer */
242 
243   /* System control. */
244   map(SYS_ABORT, do_abort);		/* abort MINIX */
245   map(SYS_GETINFO, do_getinfo); 	/* request system information */
246   map(SYS_DIAGCTL, do_diagctl);		/* diagnostics-related functionality */
247 
248   /* Profiling. */
249   map(SYS_SPROF, do_sprofile);         /* start/stop statistical profiling */
250 
251   /* arm-specific. */
252 #if defined(__arm__)
253   map(SYS_PADCONF, do_padconf);		/* configure pinmux */
254 #endif
255 
256   /* i386-specific. */
257 #if defined(__i386__)
258   map(SYS_READBIOS, do_readbios);	/* read from BIOS locations */
259   map(SYS_IOPENABLE, do_iopenable); 	/* Enable I/O */
260   map(SYS_SDEVIO, do_sdevio);		/* phys_insb, _insw, _outsb, _outsw */
261 #endif
262 
263   /* Machine state switching. */
264   map(SYS_SETMCONTEXT, do_setmcontext); /* set machine context */
265   map(SYS_GETMCONTEXT, do_getmcontext); /* get machine context */
266 
267   /* Scheduling */
268   map(SYS_SCHEDULE, do_schedule);	/* reschedule a process */
269   map(SYS_SCHEDCTL, do_schedctl);	/* change process scheduler */
270 
271 }
272 /*===========================================================================*
273  *				get_priv				     *
274  *===========================================================================*/
275 int get_priv(
276   register struct proc *rc,		/* new (child) process pointer */
277   int priv_id				/* privilege id */
278 )
279 {
280 /* Allocate a new privilege structure for a system process. Privilege ids
281  * can be assigned either statically or dynamically.
282  */
283   register struct priv *sp;                 /* privilege structure */
284 
285   if(priv_id == NULL_PRIV_ID) {             /* allocate slot dynamically */
286       for (sp = BEG_DYN_PRIV_ADDR; sp < END_DYN_PRIV_ADDR; ++sp)
287           if (sp->s_proc_nr == NONE) break;
288       if (sp >= END_DYN_PRIV_ADDR) return(ENOSPC);
289   }
290   else {                                    /* allocate slot from id */
291       if(!is_static_priv_id(priv_id)) {
292           return EINVAL;                    /* invalid static priv id */
293       }
294       if(priv[priv_id].s_proc_nr != NONE) {
295           return EBUSY;                     /* slot already in use */
296       }
297       sp = &priv[priv_id];
298   }
299   rc->p_priv = sp;			    /* assign new slot */
300   rc->p_priv->s_proc_nr = proc_nr(rc);	    /* set association */
301 
302   return(OK);
303 }
304 
305 /*===========================================================================*
306  *				set_sendto_bit				     *
307  *===========================================================================*/
308 void set_sendto_bit(const struct proc *rp, int id)
309 {
310 /* Allow a process to send messages to the process(es) associated with the
311  * system privilege structure with the given ID.
312  */
313 
314   /* Disallow the process from sending to a process privilege structure with no
315    * associated process, and disallow the process from sending to itself.
316    */
317   if (id_to_nr(id) == NONE || priv_id(rp) == id) {
318 	unset_sys_bit(priv(rp)->s_ipc_to, id);
319 	return;
320   }
321 
322   set_sys_bit(priv(rp)->s_ipc_to, id);
323 
324   /* The process that this process can now send to, must be able to reply (or
325    * vice versa). Therefore, its send mask should be updated as well. Ignore
326    * receivers that don't support traps other than RECEIVE, they can't reply
327    * or send messages anyway.
328    */
329   if (priv_addr(id)->s_trap_mask & ~((1 << RECEIVE)))
330       set_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
331 }
332 
333 /*===========================================================================*
334  *				unset_sendto_bit			     *
335  *===========================================================================*/
336 void unset_sendto_bit(const struct proc *rp, int id)
337 {
338 /* Prevent a process from sending to another process. Retain the send mask
339  * symmetry by also unsetting the bit for the other direction.
340  */
341 
342   unset_sys_bit(priv(rp)->s_ipc_to, id);
343 
344   unset_sys_bit(priv_addr(id)->s_ipc_to, priv_id(rp));
345 }
346 
347 /*===========================================================================*
348  *			      fill_sendto_mask				     *
349  *===========================================================================*/
350 void fill_sendto_mask(const struct proc *rp, sys_map_t *map)
351 {
352   int i;
353 
354   for (i=0; i < NR_SYS_PROCS; i++) {
355   	if (get_sys_bit(*map, i))
356   		set_sendto_bit(rp, i);
357   	else
358   		unset_sendto_bit(rp, i);
359   }
360 }
361 
362 /*===========================================================================*
363  *				send_sig				     *
364  *===========================================================================*/
365 int send_sig(endpoint_t ep, int sig_nr)
366 {
367 /* Notify a system process about a signal. This is straightforward. Simply
368  * set the signal that is to be delivered in the pending signals map and
369  * send a notification with source SYSTEM.
370  */
371   register struct proc *rp;
372   struct priv *priv;
373   int proc_nr;
374 
375   if(!isokendpt(ep, &proc_nr) || isemptyn(proc_nr))
376 	return EINVAL;
377 
378   rp = proc_addr(proc_nr);
379   priv = priv(rp);
380   if(!priv) return ENOENT;
381   sigaddset(&priv->s_sig_pending, sig_nr);
382   mini_notify(proc_addr(SYSTEM), rp->p_endpoint);
383 
384   return OK;
385 }
386 
387 /*===========================================================================*
388  *				cause_sig				     *
389  *===========================================================================*/
390 void cause_sig(proc_nr_t proc_nr, int sig_nr)
391 {
392 /* A system process wants to send signal 'sig_nr' to process 'proc_nr'.
393  * Examples are:
394  *  - HARDWARE wanting to cause a SIGSEGV after a CPU exception
395  *  - TTY wanting to cause SIGINT upon getting a DEL
396  *  - FS wanting to cause SIGPIPE for a broken pipe
397  * Signals are handled by sending a message to the signal manager assigned to
398  * the process. This function handles the signals and makes sure the signal
399  * manager gets them by sending a notification. The process being signaled
400  * is blocked while the signal manager has not finished all signals for it.
401  * Race conditions between calls to this function and the system calls that
402  * process pending kernel signals cannot exist. Signal related functions are
403  * only called when a user process causes a CPU exception and from the kernel
404  * process level, which runs to completion.
405  */
406   register struct proc *rp, *sig_mgr_rp;
407   endpoint_t sig_mgr;
408   int sig_mgr_proc_nr;
409   int s;
410 
411   /* Lookup signal manager. */
412   rp = proc_addr(proc_nr);
413   sig_mgr = priv(rp)->s_sig_mgr;
414   if(sig_mgr == SELF) sig_mgr = rp->p_endpoint;
415 
416   /* If the target is the signal manager of itself, send the signal directly. */
417   if(rp->p_endpoint == sig_mgr) {
418        if(SIGS_IS_LETHAL(sig_nr)) {
419            /* If the signal is lethal, see if a backup signal manager exists. */
420            sig_mgr = priv(rp)->s_bak_sig_mgr;
421            if(sig_mgr != NONE && isokendpt(sig_mgr, &sig_mgr_proc_nr)) {
422                priv(rp)->s_sig_mgr = sig_mgr;
423                priv(rp)->s_bak_sig_mgr = NONE;
424                sig_mgr_rp = proc_addr(sig_mgr_proc_nr);
425                RTS_UNSET(sig_mgr_rp, RTS_NO_PRIV);
426                cause_sig(proc_nr, sig_nr); /* try again with the new sig mgr. */
427                return;
428            }
429            /* We are out of luck. Time to panic. */
430            proc_stacktrace(rp);
431            panic("cause_sig: sig manager %d gets lethal signal %d for itself",
432 	   	rp->p_endpoint, sig_nr);
433        }
434        sigaddset(&priv(rp)->s_sig_pending, sig_nr);
435        if(OK != send_sig(rp->p_endpoint, SIGKSIGSM))
436        	panic("send_sig failed");
437        return;
438   }
439 
440   s = sigismember(&rp->p_pending, sig_nr);
441   /* Check if the signal is already pending. Process it otherwise. */
442   if (!s) {
443       sigaddset(&rp->p_pending, sig_nr);
444       if (! (RTS_ISSET(rp, RTS_SIGNALED))) {		/* other pending */
445 	  RTS_SET(rp, RTS_SIGNALED | RTS_SIG_PENDING);
446           if(OK != send_sig(sig_mgr, SIGKSIG))
447 	  	panic("send_sig failed");
448       }
449   }
450 }
451 
452 /*===========================================================================*
453  *				sig_delay_done				     *
454  *===========================================================================*/
455 void sig_delay_done(struct proc *rp)
456 {
457 /* A process is now known not to send any direct messages.
458  * Tell PM that the stop delay has ended, by sending a signal to the process.
459  * Used for actual signal delivery.
460  */
461 
462   rp->p_misc_flags &= ~MF_SIG_DELAY;
463 
464   cause_sig(proc_nr(rp), SIGSNDELAY);
465 }
466 
467 /*===========================================================================*
468  *				send_diag_sig				     *
469  *===========================================================================*/
470 void send_diag_sig(void)
471 {
472 /* Send a SIGKMESS signal to all processes in receiving updates about new
473  * diagnostics messages.
474  */
475   struct priv *privp;
476   endpoint_t ep;
477 
478   for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; privp++) {
479 	if (privp->s_proc_nr != NONE && privp->s_diag_sig == TRUE) {
480 		ep = proc_addr(privp->s_proc_nr)->p_endpoint;
481 		send_sig(ep, SIGKMESS);
482 	}
483   }
484 }
485 
486 /*===========================================================================*
487  *			         clear_memreq				     *
488  *===========================================================================*/
489 static void clear_memreq(struct proc *rp)
490 {
491   struct proc **rpp;
492 
493   if (!RTS_ISSET(rp, RTS_VMREQUEST))
494 	return; /* nothing to do */
495 
496   for (rpp = &vmrequest; *rpp != NULL;
497      rpp = &(*rpp)->p_vmrequest.nextrequestor) {
498 	if (*rpp == rp) {
499 		*rpp = rp->p_vmrequest.nextrequestor;
500 		break;
501 	}
502   }
503 
504   RTS_UNSET(rp, RTS_VMREQUEST);
505 }
506 
507 /*===========================================================================*
508  *			         clear_ipc				     *
509  *===========================================================================*/
510 static void clear_ipc(
511   register struct proc *rc	/* slot of process to clean up */
512 )
513 {
514 /* Clear IPC data for a given process slot. */
515   struct proc **xpp;			/* iterate over caller queue */
516 
517   if (RTS_ISSET(rc, RTS_SENDING)) {
518       int target_proc;
519 
520       okendpt(rc->p_sendto_e, &target_proc);
521       xpp = &proc_addr(target_proc)->p_caller_q; /* destination's queue */
522       while (*xpp) {		/* check entire queue */
523           if (*xpp == rc) {			/* process is on the queue */
524               *xpp = (*xpp)->p_q_link;		/* replace by next process */
525 #if DEBUG_ENABLE_IPC_WARNINGS
526 	      printf("endpoint %d / %s removed from queue at %d\n",
527 	          rc->p_endpoint, rc->p_name, rc->p_sendto_e);
528 #endif
529               break;				/* can only be queued once */
530           }
531           xpp = &(*xpp)->p_q_link;		/* proceed to next queued */
532       }
533       RTS_UNSET(rc, RTS_SENDING);
534   }
535   RTS_UNSET(rc, RTS_RECEIVING);
536 }
537 
538 /*===========================================================================*
539  *			         clear_endpoint				     *
540  *===========================================================================*/
541 void clear_endpoint(struct proc * rc)
542 {
543 /* Clean up the slot of the process given as 'rc'. */
544   if(isemptyp(rc)) panic("clear_proc: empty process: %d",  rc->p_endpoint);
545 
546 
547 #if DEBUG_IPC_HOOK
548   hook_ipc_clear(rc);
549 #endif
550 
551   /* Make sure that the exiting process is no longer scheduled. */
552   RTS_SET(rc, RTS_NO_ENDPOINT);
553   if (priv(rc)->s_flags & SYS_PROC)
554   {
555 	priv(rc)->s_asynsize= 0;
556   }
557 
558   /* If the process happens to be queued trying to send a
559    * message, then it must be removed from the message queues.
560    */
561   clear_ipc(rc);
562 
563   /* Likewise, if another process was sending or receive a message to or from
564    * the exiting process, it must be alerted that process no longer is alive.
565    * Check all processes.
566    */
567   clear_ipc_refs(rc, EDEADSRCDST);
568 
569   /* Finally, if the process was blocked on a VM request, remove it from the
570    * queue of processes waiting to be processed by VM.
571    */
572   clear_memreq(rc);
573 }
574 
575 /*===========================================================================*
576  *			       clear_ipc_refs				     *
577  *===========================================================================*/
578 void clear_ipc_refs(
579   register struct proc *rc,		/* slot of process to clean up */
580   int caller_ret			/* code to return on callers */
581 )
582 {
583 /* Clear IPC references for a given process slot. */
584   struct proc *rp;			/* iterate over process table */
585   int src_id;
586 
587   /* Tell processes that sent asynchronous messages to 'rc' they are not
588    * going to be delivered */
589   while ((src_id = has_pending_asend(rc, ANY)) != NULL_PRIV_ID)
590       cancel_async(proc_addr(id_to_nr(src_id)), rc);
591 
592   for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; rp++) {
593       if(isemptyp(rp))
594 	continue;
595 
596       /* Unset pending notification bits. */
597       unset_sys_bit(priv(rp)->s_notify_pending, priv(rc)->s_id);
598 
599       /* Unset pending asynchronous messages */
600       unset_sys_bit(priv(rp)->s_asyn_pending, priv(rc)->s_id);
601 
602       /* Check if process depends on given process. */
603       if (P_BLOCKEDON(rp) == rc->p_endpoint) {
604           rp->p_reg.retreg = caller_ret;	/* return requested code */
605 	  clear_ipc(rp);
606       }
607   }
608 }
609 
610 /*===========================================================================*
611  *                              kernel_call_resume                           *
612  *===========================================================================*/
613 void kernel_call_resume(struct proc *caller)
614 {
615 	int result;
616 
617 	assert(!RTS_ISSET(caller, RTS_SLOT_FREE));
618 	assert(!RTS_ISSET(caller, RTS_VMREQUEST));
619 
620 	assert(caller->p_vmrequest.saved.reqmsg.m_source == caller->p_endpoint);
621 
622 	/*
623 	printf("KERNEL_CALL restart from %s / %d rts 0x%08x misc 0x%08x\n",
624 			caller->p_name, caller->p_endpoint,
625 			caller->p_rts_flags, caller->p_misc_flags);
626 	 */
627 
628 	/* re-execute the kernel call, with MF_KCALL_RESUME still set so
629 	 * the call knows this is a retry.
630 	 */
631 	result = kernel_call_dispatch(caller, &caller->p_vmrequest.saved.reqmsg);
632 	/*
633 	 * we are resuming the kernel call so we have to remove this flag so it
634 	 * can be set again
635 	 */
636 	caller->p_misc_flags &= ~MF_KCALL_RESUME;
637 	kernel_call_finish(caller, &caller->p_vmrequest.saved.reqmsg, result);
638 }
639 
640 /*===========================================================================*
641  *                               sched_proc                                  *
642  *===========================================================================*/
643 int sched_proc(struct proc *p, int priority, int quantum, int cpu, int niced)
644 {
645 	/* Make sure the values given are within the allowed range.*/
646 	if ((priority < TASK_Q && priority != -1) || priority > NR_SCHED_QUEUES)
647 		return(EINVAL);
648 
649 	if (quantum < 1 && quantum != -1)
650 		return(EINVAL);
651 
652 #ifdef CONFIG_SMP
653 	if ((cpu < 0 && cpu != -1) || (cpu > 0 && (unsigned) cpu >= ncpus))
654 		return(EINVAL);
655 	if (cpu != -1 && !(cpu_is_ready(cpu)))
656 		return EBADCPU;
657 #endif
658 
659 	/* In some cases, we might be rescheduling a runnable process. In such
660 	 * a case (i.e. if we are updating the priority) we set the NO_QUANTUM
661 	 * flag before the generic unset to dequeue/enqueue the process
662 	 */
663 
664 	/* FIXME this preempts the process, do we really want to do that ?*/
665 
666 	/* FIXME this is a problem for SMP if the processes currently runs on a
667 	 * different CPU */
668 	if (proc_is_runnable(p)) {
669 #ifdef CONFIG_SMP
670 		if (p->p_cpu != cpuid && cpu != -1 && cpu != p->p_cpu) {
671 			smp_schedule_migrate_proc(p, cpu);
672 		}
673 #endif
674 
675 		RTS_SET(p, RTS_NO_QUANTUM);
676 	}
677 
678 	if (proc_is_runnable(p))
679 		RTS_SET(p, RTS_NO_QUANTUM);
680 
681 	if (priority != -1)
682 		p->p_priority = priority;
683 	if (quantum != -1) {
684 		p->p_quantum_size_ms = quantum;
685 		p->p_cpu_time_left = ms_2_cpu_time(quantum);
686 	}
687 #ifdef CONFIG_SMP
688 	if (cpu != -1)
689 		p->p_cpu = cpu;
690 #endif
691 
692 	if (niced)
693 		p->p_misc_flags |= MF_NICED;
694 	else
695 		p->p_misc_flags &= ~MF_NICED;
696 
697 	/* Clear the scheduling bit and enqueue the process */
698 	RTS_UNSET(p, RTS_NO_QUANTUM);
699 
700 	return OK;
701 }
702 
703 /*===========================================================================*
704  *				add_ipc_filter				     *
705  *===========================================================================*/
706 int add_ipc_filter(struct proc *rp, int type, vir_bytes address,
707 	size_t length)
708 {
709 	int num_elements, r;
710 	ipc_filter_t *ipcf, **ipcfp;
711 
712 	/* Validate arguments. */
713 	if (type != IPCF_BLACKLIST && type != IPCF_WHITELIST)
714 		return EINVAL;
715 
716 	if (length % sizeof(ipc_filter_el_t) != 0)
717 		return EINVAL;
718 
719 	num_elements = length / sizeof(ipc_filter_el_t);
720 	if (num_elements <= 0 || num_elements > IPCF_MAX_ELEMENTS)
721 		return E2BIG;
722 
723 	/* Allocate a new IPC filter slot. */
724 	IPCF_POOL_ALLOCATE_SLOT(type, &ipcf);
725 	if (ipcf == NULL)
726 		return ENOMEM;
727 
728 	/* Fill details. */
729 	ipcf->num_elements = num_elements;
730 	ipcf->next = NULL;
731 	r = data_copy(rp->p_endpoint, address,
732 		KERNEL, (vir_bytes)ipcf->elements, length);
733 	if (r == OK)
734 		r = check_ipc_filter(ipcf, TRUE /*fill_flags*/);
735 	if (r != OK) {
736 		IPCF_POOL_FREE_SLOT(ipcf);
737 		return r;
738 	}
739 
740 	/* Add the new filter at the end of the IPC filter chain. */
741 	for (ipcfp = &priv(rp)->s_ipcf; *ipcfp != NULL;
742 	    ipcfp = &(*ipcfp)->next)
743 		;
744 	*ipcfp = ipcf;
745 
746 	return OK;
747 }
748 
749 /*===========================================================================*
750  *				clear_ipc_filters			     *
751  *===========================================================================*/
752 void clear_ipc_filters(struct proc *rp)
753 {
754 	ipc_filter_t *curr_ipcf, *ipcf;
755 
756 	ipcf = priv(rp)->s_ipcf;
757 	while (ipcf != NULL) {
758 		curr_ipcf = ipcf;
759 		ipcf = ipcf->next;
760 		IPCF_POOL_FREE_SLOT(curr_ipcf);
761 	}
762 
763 	priv(rp)->s_ipcf = NULL;
764 
765 	/* VM is a special case here: since the cleared IPC filter may have
766 	 * blocked memory handling requests, we may now have to tell VM that
767 	 * there are "new" requests pending.
768 	 */
769 	if (rp->p_endpoint == VM_PROC_NR && vmrequest != NULL)
770 		if (send_sig(VM_PROC_NR, SIGKMEM) != OK)
771 			panic("send_sig failed");
772 }
773 
774 /*===========================================================================*
775  *				check_ipc_filter			     *
776  *===========================================================================*/
777 int check_ipc_filter(ipc_filter_t *ipcf, int fill_flags)
778 {
779 	ipc_filter_el_t *ipcf_el;
780 	int i, num_elements, flags;
781 
782 	if (ipcf == NULL)
783 		return OK;
784 
785 	num_elements = ipcf->num_elements;
786 	flags = 0;
787 	for (i = 0; i < num_elements; i++) {
788 		ipcf_el = &ipcf->elements[i];
789 		if (!IPCF_EL_CHECK(ipcf_el))
790 			return EINVAL;
791 		flags |= ipcf_el->flags;
792 	}
793 
794 	if (fill_flags)
795 		ipcf->flags = flags;
796 	else if (ipcf->flags != flags)
797 		return EINVAL;
798 	return OK;
799 }
800 
801 /*===========================================================================*
802  *				allow_ipc_filtered_msg			     *
803  *===========================================================================*/
804 int allow_ipc_filtered_msg(struct proc *rp, endpoint_t src_e,
805 	vir_bytes m_src_v, message *m_src_p)
806 {
807 	int i, r, num_elements, get_mtype, allow;
808 	ipc_filter_t *ipcf;
809 	ipc_filter_el_t *ipcf_el;
810 	message m_buff;
811 
812 	ipcf = priv(rp)->s_ipcf;
813 	if (ipcf == NULL)
814 		return TRUE; /* no IPC filters, always allow */
815 
816 	if (m_src_p == NULL) {
817 		assert(m_src_v != 0);
818 
819 		/* Should we copy in the message type? */
820 		get_mtype = FALSE;
821 		do {
822 #if DEBUG_DUMPIPCF
823 			if (TRUE) {
824 #else
825 			if (ipcf->flags & IPCF_MATCH_M_TYPE) {
826 #endif
827 				get_mtype = TRUE;
828 				break;
829 			}
830 			ipcf = ipcf->next;
831 		} while (ipcf);
832 		ipcf = priv(rp)->s_ipcf; /* reset to start */
833 
834 		/* If so, copy it in from the process. */
835 		if (get_mtype) {
836 			r = data_copy(src_e,
837 			    m_src_v + offsetof(message, m_type), KERNEL,
838 			    (vir_bytes)&m_buff.m_type, sizeof(m_buff.m_type));
839 			if (r != OK) {
840 				/* allow for now, this will fail later anyway */
841 #if DEBUG_DUMPIPCF
842 				printf("KERNEL: allow_ipc_filtered_msg: data "
843 				    "copy error %d, allowing message...\n", r);
844 #endif
845 				return TRUE;
846 			}
847 		}
848 		m_src_p = &m_buff;
849 	}
850 
851 	m_src_p->m_source = src_e;
852 
853 	/* See if the message is allowed. */
854 	allow = (ipcf->type == IPCF_BLACKLIST);
855 	do {
856 		if (allow != (ipcf->type == IPCF_WHITELIST)) {
857 			num_elements = ipcf->num_elements;
858 			for (i = 0; i < num_elements; i++) {
859 				ipcf_el = &ipcf->elements[i];
860 				if (IPCF_EL_MATCH(ipcf_el, m_src_p)) {
861 					allow = (ipcf->type == IPCF_WHITELIST);
862 					break;
863 				}
864 			}
865 		}
866 		ipcf = ipcf->next;
867 	} while (ipcf);
868 
869 #if DEBUG_DUMPIPCF
870 	printmsg(m_src_p, proc_addr(_ENDPOINT_P(src_e)), rp, allow ? '+' : '-',
871 	    TRUE /*printparams*/);
872 #endif
873 
874 	return allow;
875 }
876 
877 /*===========================================================================*
878  *			  allow_ipc_filtered_memreq			     *
879  *===========================================================================*/
880 int allow_ipc_filtered_memreq(struct proc *src_rp, struct proc *dst_rp)
881 {
882 	/* Determine whether VM should receive a request to handle memory
883 	 * that is the result of process 'src_rp' trying to access currently
884 	 * unavailable memory in process 'dst_rp'. Return TRUE if VM should
885 	 * be given the request, FALSE otherwise.
886 	 */
887 
888 	struct proc *vmp;
889 	message m_buf;
890 	int allow_src, allow_dst;
891 
892 	vmp = proc_addr(VM_PROC_NR);
893 
894 	/* If VM has no filter in place, all requests should go through. */
895 	if (priv(vmp)->s_ipcf == NULL)
896 		return TRUE;
897 
898 	/* VM obtains memory requests in response to a SIGKMEM signal, which
899 	 * is a notification sent from SYSTEM. Thus, if VM blocks such
900 	 * notifications, it also should not get any memory requests. Of
901 	 * course, VM should not be asking for requests in that case either,
902 	 * but the extra check doesn't hurt.
903 	 */
904 	m_buf.m_type = NOTIFY_MESSAGE;
905 	if (!allow_ipc_filtered_msg(vmp, SYSTEM, 0, &m_buf))
906 		return FALSE;
907 
908 	/* A more refined policy may be implemented here, for example to
909 	 * ensure that both the source and the destination (if different)
910 	 * are in the group of processes that VM wants to talk to. Since VM
911 	 * is basically not able to handle any memory requests during an
912 	 * update, we will not get here, and none of that is needed.
913 	 */
914 	return TRUE;
915 }
916 
917 /*===========================================================================*
918  *                             priv_add_irq                                  *
919  *===========================================================================*/
920 int priv_add_irq(struct proc *rp, int irq)
921 {
922         struct priv *priv = priv(rp);
923         int i;
924 
925 	priv->s_flags |= CHECK_IRQ;	/* Check IRQ */
926 
927 	/* When restarting a driver, check if it already has the permission */
928 	for (i = 0; i < priv->s_nr_irq; i++) {
929 		if (priv->s_irq_tab[i] == irq)
930 			return OK;
931 	}
932 
933 	i= priv->s_nr_irq;
934 	if (i >= NR_IRQ) {
935 		printf("do_privctl: %d already has %d irq's.\n",
936 			rp->p_endpoint, i);
937 		return ENOMEM;
938 	}
939 	priv->s_irq_tab[i]= irq;
940 	priv->s_nr_irq++;
941 	return OK;
942 }
943 
944 /*===========================================================================*
945  *                             priv_add_io                                   *
946  *===========================================================================*/
947 int priv_add_io(struct proc *rp, struct io_range *ior)
948 {
949         struct priv *priv = priv(rp);
950         int i;
951 
952 	priv->s_flags |= CHECK_IO_PORT;	/* Check I/O accesses */
953 
954 	for (i = 0; i < priv->s_nr_io_range; i++) {
955 		if (priv->s_io_tab[i].ior_base == ior->ior_base &&
956 			priv->s_io_tab[i].ior_limit == ior->ior_limit)
957 			return OK;
958 	}
959 
960 	i= priv->s_nr_io_range;
961 	if (i >= NR_IO_RANGE) {
962 		printf("do_privctl: %d already has %d i/o ranges.\n",
963 			rp->p_endpoint, i);
964 		return ENOMEM;
965 	}
966 
967 	priv->s_io_tab[i] = *ior;
968 	priv->s_nr_io_range++;
969 	return OK;
970 }
971 
972 /*===========================================================================*
973  *                             priv_add_mem                                  *
974  *===========================================================================*/
975 int priv_add_mem(struct proc *rp, struct minix_mem_range *memr)
976 {
977         struct priv *priv = priv(rp);
978         int i;
979 
980 	priv->s_flags |= CHECK_MEM;	/* Check memory mappings */
981 
982 	/* When restarting a driver, check if it already has the permission */
983 	for (i = 0; i < priv->s_nr_mem_range; i++) {
984 		if (priv->s_mem_tab[i].mr_base == memr->mr_base &&
985 			priv->s_mem_tab[i].mr_limit == memr->mr_limit)
986 			return OK;
987 	}
988 
989 	i= priv->s_nr_mem_range;
990 	if (i >= NR_MEM_RANGE) {
991 		printf("do_privctl: %d already has %d mem ranges.\n",
992 			rp->p_endpoint, i);
993 		return ENOMEM;
994 	}
995 	priv->s_mem_tab[i]= *memr;
996 	priv->s_nr_mem_range++;
997 	return OK;
998 }
999 
1000