xref: /minix/minix/kernel/proc.c (revision 9f988b79)
1 /* This file contains essentially all of the process and message handling.
2  * Together with "mpx.s" it forms the lowest layer of the MINIX kernel.
3  * There is one entry point from the outside:
4  *
5  *   sys_call: 	      a system call, i.e., the kernel is trapped with an INT
6  *
7  * Changes:
8  *   Aug 19, 2005     rewrote scheduling code  (Jorrit N. Herder)
9  *   Jul 25, 2005     rewrote system call handling  (Jorrit N. Herder)
10  *   May 26, 2005     rewrote message passing functions  (Jorrit N. Herder)
11  *   May 24, 2005     new notification system call  (Jorrit N. Herder)
12  *   Oct 28, 2004     nonblocking send and receive calls  (Jorrit N. Herder)
13  *
14  * The code here is critical to make everything work and is important for the
15  * overall performance of the system. A large fraction of the code deals with
16  * list manipulation. To make this both easy to understand and fast to execute
17  * pointer pointers are used throughout the code. Pointer pointers prevent
18  * exceptions for the head or tail of a linked list.
19  *
20  *  node_t *queue, *new_node;	// assume these as global variables
21  *  node_t **xpp = &queue; 	// get pointer pointer to head of queue
22  *  while (*xpp != NULL) 	// find last pointer of the linked list
23  *      xpp = &(*xpp)->next;	// get pointer to next pointer
24  *  *xpp = new_node;		// now replace the end (the NULL pointer)
25  *  new_node->next = NULL;	// and mark the new end of the list
26  *
27  * For example, when adding a new node to the end of the list, one normally
28  * makes an exception for an empty list and looks up the end of the list for
29  * nonempty lists. As shown above, this is not required with pointer pointers.
30  */
31 
32 #include <minix/com.h>
33 #include <minix/ipcconst.h>
34 #include <stddef.h>
35 #include <signal.h>
36 #include <assert.h>
37 #include <string.h>
38 
39 #include "kernel/kernel.h"
40 #include "vm.h"
41 #include "clock.h"
42 #include "spinlock.h"
43 #include "arch_proto.h"
44 
45 #include <minix/syslib.h>
46 
47 /* Scheduling and message passing functions */
48 static void idle(void);
49 /**
50  * Made public for use in clock.c (for user-space scheduling)
51 static int mini_send(struct proc *caller_ptr, endpoint_t dst_e, message
52 	*m_ptr, int flags);
53 */
54 static int mini_receive(struct proc *caller_ptr, endpoint_t src,
55 	message *m_buff_usr, int flags);
56 static int mini_senda(struct proc *caller_ptr, asynmsg_t *table, size_t
57 	size);
58 static int deadlock(int function, register struct proc *caller,
59 	endpoint_t src_dst_e);
60 static int try_async(struct proc *caller_ptr);
61 static int try_one(endpoint_t receive_e, struct proc *src_ptr,
62 	struct proc *dst_ptr);
63 static struct proc * pick_proc(void);
64 static void enqueue_head(struct proc *rp);
65 
66 /* all idles share the same idle_priv structure */
67 static struct priv idle_priv;
68 
69 static void set_idle_name(char * name, int n)
70 {
71         int i, c;
72         int p_z = 0;
73 
74         if (n > 999)
75                 n = 999;
76 
77         name[0] = 'i';
78         name[1] = 'd';
79         name[2] = 'l';
80         name[3] = 'e';
81 
82         for (i = 4, c = 100; c > 0; c /= 10) {
83                 int digit;
84 
85                 digit = n / c;
86                 n -= digit * c;
87 
88                 if (p_z || digit != 0 || c == 1) {
89                         p_z = 1;
90                         name[i++] = '0' + digit;
91                 }
92         }
93 
94         name[i] = '\0';
95 
96 }
97 
98 
99 #define PICK_ANY	1
100 #define PICK_HIGHERONLY	2
101 
102 #define BuildNotifyMessage(m_ptr, src, dst_ptr) \
103 	memset((m_ptr), 0, sizeof(*(m_ptr)));				\
104 	(m_ptr)->m_type = NOTIFY_MESSAGE;				\
105 	(m_ptr)->m_notify.timestamp = get_monotonic();		\
106 	switch (src) {							\
107 	case HARDWARE:							\
108 		(m_ptr)->m_notify.interrupts =			\
109 			priv(dst_ptr)->s_int_pending;			\
110 		priv(dst_ptr)->s_int_pending = 0;			\
111 		break;							\
112 	case SYSTEM:							\
113 		memcpy(&(m_ptr)->m_notify.sigset,			\
114 			&priv(dst_ptr)->s_sig_pending,			\
115 			sizeof(sigset_t));				\
116 		sigemptyset(&priv(dst_ptr)->s_sig_pending);		\
117 		break;							\
118 	}
119 
120 static message m_notify_buff = { 0, NOTIFY_MESSAGE };
121 
122 void proc_init(void)
123 {
124 	struct proc * rp;
125 	struct priv *sp;
126 	int i;
127 
128 	/* Clear the process table. Announce each slot as empty and set up
129 	 * mappings for proc_addr() and proc_nr() macros. Do the same for the
130 	 * table with privilege structures for the system processes.
131 	 */
132 	for (rp = BEG_PROC_ADDR, i = -NR_TASKS; rp < END_PROC_ADDR; ++rp, ++i) {
133 		rp->p_rts_flags = RTS_SLOT_FREE;/* initialize free slot */
134 		rp->p_magic = PMAGIC;
135 		rp->p_nr = i;			/* proc number from ptr */
136 		rp->p_endpoint = _ENDPOINT(0, rp->p_nr); /* generation no. 0 */
137 		rp->p_scheduler = NULL;		/* no user space scheduler */
138 		rp->p_priority = 0;		/* no priority */
139 		rp->p_quantum_size_ms = 0;	/* no quantum size */
140 
141 		/* arch-specific initialization */
142 		arch_proc_reset(rp);
143 	}
144 	for (sp = BEG_PRIV_ADDR, i = 0; sp < END_PRIV_ADDR; ++sp, ++i) {
145 		sp->s_proc_nr = NONE;		/* initialize as free */
146 		sp->s_id = (sys_id_t) i;	/* priv structure index */
147 		ppriv_addr[i] = sp;		/* priv ptr from number */
148 		sp->s_sig_mgr = NONE;		/* clear signal managers */
149 		sp->s_bak_sig_mgr = NONE;
150 	}
151 
152 	idle_priv.s_flags = IDL_F;
153 	/* initialize IDLE structures for every CPU */
154 	for (i = 0; i < CONFIG_MAX_CPUS; i++) {
155 		struct proc * ip = get_cpu_var_ptr(i, idle_proc);
156 		ip->p_endpoint = IDLE;
157 		ip->p_priv = &idle_priv;
158 		/* must not let idle ever get scheduled */
159 		ip->p_rts_flags |= RTS_PROC_STOP;
160 		set_idle_name(ip->p_name, i);
161 	}
162 }
163 
164 static void switch_address_space_idle(void)
165 {
166 #ifdef CONFIG_SMP
167 	/*
168 	 * currently we bet that VM is always alive and its pages available so
169 	 * when the CPU wakes up the kernel is mapped and no surprises happen.
170 	 * This is only a problem if more than 1 cpus are available
171 	 */
172 	switch_address_space(proc_addr(VM_PROC_NR));
173 #endif
174 }
175 
176 /*===========================================================================*
177  *				idle					     *
178  *===========================================================================*/
179 static void idle(void)
180 {
181 	struct proc * p;
182 
183 	/* This function is called whenever there is no work to do.
184 	 * Halt the CPU, and measure how many timestamp counter ticks are
185 	 * spent not doing anything. This allows test setups to measure
186 	 * the CPU utilization of certain workloads with high precision.
187 	 */
188 
189 	p = get_cpulocal_var(proc_ptr) = get_cpulocal_var_ptr(idle_proc);
190 	if (priv(p)->s_flags & BILLABLE)
191 		get_cpulocal_var(bill_ptr) = p;
192 
193 	switch_address_space_idle();
194 
195 #ifdef CONFIG_SMP
196 	get_cpulocal_var(cpu_is_idle) = 1;
197 	/* we don't need to keep time on APs as it is handled on the BSP */
198 	if (cpuid != bsp_cpu_id)
199 		stop_local_timer();
200 	else
201 #endif
202 	{
203 		/*
204 		 * If the timer has expired while in kernel we must
205 		 * rearm it before we go to sleep
206 		 */
207 		restart_local_timer();
208 	}
209 
210 	/* start accounting for the idle time */
211 	context_stop(proc_addr(KERNEL));
212 #if !SPROFILE
213 	halt_cpu();
214 #else
215 	if (!sprofiling)
216 		halt_cpu();
217 	else {
218 		volatile int * v;
219 
220 		v = get_cpulocal_var_ptr(idle_interrupted);
221 		interrupts_enable();
222 		while (!*v)
223 			arch_pause();
224 		interrupts_disable();
225 		*v = 0;
226 	}
227 #endif
228 	/*
229 	 * end of accounting for the idle task does not happen here, the kernel
230 	 * is handling stuff for quite a while before it gets back here!
231 	 */
232 }
233 
234 /*===========================================================================*
235  *                              vm_suspend                                *
236  *===========================================================================*/
237 void vm_suspend(struct proc *caller, const struct proc *target,
238         const vir_bytes linaddr, const vir_bytes len, const int type,
239         const int writeflag)
240 {
241         /* This range is not OK for this process. Set parameters
242          * of the request and notify VM about the pending request.
243          */
244         assert(!RTS_ISSET(caller, RTS_VMREQUEST));
245         assert(!RTS_ISSET(target, RTS_VMREQUEST));
246 
247         RTS_SET(caller, RTS_VMREQUEST);
248 
249         caller->p_vmrequest.req_type = VMPTYPE_CHECK;
250         caller->p_vmrequest.target = target->p_endpoint;
251         caller->p_vmrequest.params.check.start = linaddr;
252         caller->p_vmrequest.params.check.length = len;
253         caller->p_vmrequest.params.check.writeflag = writeflag;
254         caller->p_vmrequest.type = type;
255 
256         /* Connect caller on vmrequest wait queue. */
257         if(!(caller->p_vmrequest.nextrequestor = vmrequest))
258                 if(OK != send_sig(VM_PROC_NR, SIGKMEM))
259                         panic("send_sig failed");
260         vmrequest = caller;
261 }
262 
263 /*===========================================================================*
264  *                              delivermsg                                *
265  *===========================================================================*/
266 static void delivermsg(struct proc *rp)
267 {
268         assert(!RTS_ISSET(rp, RTS_VMREQUEST));
269         assert(rp->p_misc_flags & MF_DELIVERMSG);
270         assert(rp->p_delivermsg.m_source != NONE);
271 
272         if (copy_msg_to_user(&rp->p_delivermsg,
273                                 (message *) rp->p_delivermsg_vir)) {
274                 if(rp->p_misc_flags & MF_MSGFAILED) {
275                         /* 2nd consecutive failure means this won't succeed */
276                         printf("WARNING wrong user pointer 0x%08lx from "
277                                 "process %s / %d\n",
278                                 rp->p_delivermsg_vir,
279                                 rp->p_name,
280                                 rp->p_endpoint);
281                         cause_sig(rp->p_nr, SIGSEGV);
282                 } else {
283                         /* 1st failure means we have to ask VM to handle it */
284                         vm_suspend(rp, rp, rp->p_delivermsg_vir,
285                                 sizeof(message), VMSTYPE_DELIVERMSG, 1);
286                         rp->p_misc_flags |= MF_MSGFAILED;
287                 }
288         } else {
289                 /* Indicate message has been delivered; address is 'used'. */
290                 rp->p_delivermsg.m_source = NONE;
291                 rp->p_misc_flags &= ~(MF_DELIVERMSG|MF_MSGFAILED);
292 
293                 if(!(rp->p_misc_flags & MF_CONTEXT_SET)) {
294                         rp->p_reg.retreg = OK;
295                 }
296         }
297 }
298 
299 /*===========================================================================*
300  *				switch_to_user				     *
301  *===========================================================================*/
302 void switch_to_user(void)
303 {
304 	/* This function is called an instant before proc_ptr is
305 	 * to be scheduled again.
306 	 */
307 	struct proc * p;
308 #ifdef CONFIG_SMP
309 	int tlb_must_refresh = 0;
310 #endif
311 
312 	p = get_cpulocal_var(proc_ptr);
313 	/*
314 	 * if the current process is still runnable check the misc flags and let
315 	 * it run unless it becomes not runnable in the meantime
316 	 */
317 	if (proc_is_runnable(p))
318 		goto check_misc_flags;
319 	/*
320 	 * if a process becomes not runnable while handling the misc flags, we
321 	 * need to pick a new one here and start from scratch. Also if the
322 	 * current process wasn't runnable, we pick a new one here
323 	 */
324 not_runnable_pick_new:
325 	if (proc_is_preempted(p)) {
326 		p->p_rts_flags &= ~RTS_PREEMPTED;
327 		if (proc_is_runnable(p)) {
328 			if (p->p_cpu_time_left)
329 				enqueue_head(p);
330 			else
331 				enqueue(p);
332 		}
333 	}
334 
335 	/*
336 	 * if we have no process to run, set IDLE as the current process for
337 	 * time accounting and put the cpu in an idle state. After the next
338 	 * timer interrupt the execution resumes here and we can pick another
339 	 * process. If there is still nothing runnable we "schedule" IDLE again
340 	 */
341 	while (!(p = pick_proc())) {
342 		idle();
343 	}
344 
345 	/* update the global variable */
346 	get_cpulocal_var(proc_ptr) = p;
347 
348 #ifdef CONFIG_SMP
349 	if (p->p_misc_flags & MF_FLUSH_TLB && get_cpulocal_var(ptproc) == p)
350 		tlb_must_refresh = 1;
351 #endif
352 	switch_address_space(p);
353 
354 check_misc_flags:
355 
356 	assert(p);
357 	assert(proc_is_runnable(p));
358 	while (p->p_misc_flags &
359 		(MF_KCALL_RESUME | MF_DELIVERMSG |
360 		 MF_SC_DEFER | MF_SC_TRACE | MF_SC_ACTIVE)) {
361 
362 		assert(proc_is_runnable(p));
363 		if (p->p_misc_flags & MF_KCALL_RESUME) {
364 			kernel_call_resume(p);
365 		}
366 		else if (p->p_misc_flags & MF_DELIVERMSG) {
367 			TRACE(VF_SCHEDULING, printf("delivering to %s / %d\n",
368 				p->p_name, p->p_endpoint););
369 			delivermsg(p);
370 		}
371 		else if (p->p_misc_flags & MF_SC_DEFER) {
372 			/* Perform the system call that we deferred earlier. */
373 
374 			assert (!(p->p_misc_flags & MF_SC_ACTIVE));
375 
376 			arch_do_syscall(p);
377 
378 			/* If the process is stopped for signal delivery, and
379 			 * not blocked sending a message after the system call,
380 			 * inform PM.
381 			 */
382 			if ((p->p_misc_flags & MF_SIG_DELAY) &&
383 					!RTS_ISSET(p, RTS_SENDING))
384 				sig_delay_done(p);
385 		}
386 		else if (p->p_misc_flags & MF_SC_TRACE) {
387 			/* Trigger a system call leave event if this was a
388 			 * system call. We must do this after processing the
389 			 * other flags above, both for tracing correctness and
390 			 * to be able to use 'break'.
391 			 */
392 			if (!(p->p_misc_flags & MF_SC_ACTIVE))
393 				break;
394 
395 			p->p_misc_flags &=
396 				~(MF_SC_TRACE | MF_SC_ACTIVE);
397 
398 			/* Signal the "leave system call" event.
399 			 * Block the process.
400 			 */
401 			cause_sig(proc_nr(p), SIGTRAP);
402 		}
403 		else if (p->p_misc_flags & MF_SC_ACTIVE) {
404 			/* If MF_SC_ACTIVE was set, remove it now:
405 			 * we're leaving the system call.
406 			 */
407 			p->p_misc_flags &= ~MF_SC_ACTIVE;
408 
409 			break;
410 		}
411 
412 		/*
413 		 * the selected process might not be runnable anymore. We have
414 		 * to checkit and schedule another one
415 		 */
416 		if (!proc_is_runnable(p))
417 			goto not_runnable_pick_new;
418 	}
419 	/*
420 	 * check the quantum left before it runs again. We must do it only here
421 	 * as we are sure that a possible out-of-quantum message to the
422 	 * scheduler will not collide with the regular ipc
423 	 */
424 	if (!p->p_cpu_time_left)
425 		proc_no_time(p);
426 	/*
427 	 * After handling the misc flags the selected process might not be
428 	 * runnable anymore. We have to checkit and schedule another one
429 	 */
430 	if (!proc_is_runnable(p))
431 		goto not_runnable_pick_new;
432 
433 	TRACE(VF_SCHEDULING, printf("cpu %d starting %s / %d "
434 				"pc 0x%08x\n",
435 		cpuid, p->p_name, p->p_endpoint, p->p_reg.pc););
436 #if DEBUG_TRACE
437 	p->p_schedules++;
438 #endif
439 
440 	p = arch_finish_switch_to_user();
441 	assert(p->p_cpu_time_left);
442 
443 	context_stop(proc_addr(KERNEL));
444 
445 	/* If the process isn't the owner of FPU, enable the FPU exception */
446 	if (get_cpulocal_var(fpu_owner) != p)
447 		enable_fpu_exception();
448 	else
449 		disable_fpu_exception();
450 
451 	/* If MF_CONTEXT_SET is set, don't clobber process state within
452 	 * the kernel. The next kernel entry is OK again though.
453 	 */
454 	p->p_misc_flags &= ~MF_CONTEXT_SET;
455 
456 #if defined(__i386__)
457   	assert(p->p_seg.p_cr3 != 0);
458 #elif defined(__arm__)
459 	assert(p->p_seg.p_ttbr != 0);
460 #endif
461 #ifdef CONFIG_SMP
462 	if (p->p_misc_flags & MF_FLUSH_TLB) {
463 		if (tlb_must_refresh)
464 			refresh_tlb();
465 		p->p_misc_flags &= ~MF_FLUSH_TLB;
466 	}
467 #endif
468 
469 	restart_local_timer();
470 
471 	/*
472 	 * restore_user_context() carries out the actual mode switch from kernel
473 	 * to userspace. This function does not return
474 	 */
475 	restore_user_context(p);
476 	NOT_REACHABLE;
477 }
478 
479 /*
480  * handler for all synchronous IPC calls
481  */
482 static int do_sync_ipc(struct proc * caller_ptr, /* who made the call */
483 			int call_nr,	/* system call number and flags */
484 			endpoint_t src_dst_e,	/* src or dst of the call */
485 			message *m_ptr)	/* users pointer to a message */
486 {
487   int result;					/* the system call's result */
488   int src_dst_p;				/* Process slot number */
489   char *callname;
490 
491   /* Check destination. RECEIVE is the only call that accepts ANY (in addition
492    * to a real endpoint). The other calls (SEND, SENDREC, and NOTIFY) require an
493    * endpoint to corresponds to a process. In addition, it is necessary to check
494    * whether a process is allowed to send to a given destination.
495    */
496   assert(call_nr != SENDA);
497 
498   /* Only allow non-negative call_nr values less than 32 */
499   if (call_nr < 0 || call_nr > IPCNO_HIGHEST || call_nr >= 32
500       || !(callname = ipc_call_names[call_nr])) {
501 #if DEBUG_ENABLE_IPC_WARNINGS
502       printf("sys_call: trap %d not allowed, caller %d, src_dst %d\n",
503           call_nr, proc_nr(caller_ptr), src_dst_e);
504 #endif
505 	return(ETRAPDENIED);		/* trap denied by mask or kernel */
506   }
507 
508   if (src_dst_e == ANY)
509   {
510 	if (call_nr != RECEIVE)
511 	{
512 #if 0
513 		printf("sys_call: %s by %d with bad endpoint %d\n",
514 			callname,
515 			proc_nr(caller_ptr), src_dst_e);
516 #endif
517 		return EINVAL;
518 	}
519 	src_dst_p = (int) src_dst_e;
520   }
521   else
522   {
523 	/* Require a valid source and/or destination process. */
524 	if(!isokendpt(src_dst_e, &src_dst_p)) {
525 #if 0
526 		printf("sys_call: %s by %d with bad endpoint %d\n",
527 			callname,
528 			proc_nr(caller_ptr), src_dst_e);
529 #endif
530 		return EDEADSRCDST;
531 	}
532 
533 	/* If the call is to send to a process, i.e., for SEND, SENDNB,
534 	 * SENDREC or NOTIFY, verify that the caller is allowed to send to
535 	 * the given destination.
536 	 */
537 	if (call_nr != RECEIVE)
538 	{
539 		if (!may_send_to(caller_ptr, src_dst_p)) {
540 #if DEBUG_ENABLE_IPC_WARNINGS
541 			printf(
542 			"sys_call: ipc mask denied %s from %d to %d\n",
543 				callname,
544 				caller_ptr->p_endpoint, src_dst_e);
545 #endif
546 			return(ECALLDENIED);	/* call denied by ipc mask */
547 		}
548 	}
549   }
550 
551   /* Check if the process has privileges for the requested call. Calls to the
552    * kernel may only be SENDREC, because tasks always reply and may not block
553    * if the caller doesn't do receive().
554    */
555   if (!(priv(caller_ptr)->s_trap_mask & (1 << call_nr))) {
556 #if DEBUG_ENABLE_IPC_WARNINGS
557       printf("sys_call: %s not allowed, caller %d, src_dst %d\n",
558           callname, proc_nr(caller_ptr), src_dst_p);
559 #endif
560 	return(ETRAPDENIED);		/* trap denied by mask or kernel */
561   }
562 
563   if (call_nr != SENDREC && call_nr != RECEIVE && iskerneln(src_dst_p)) {
564 #if DEBUG_ENABLE_IPC_WARNINGS
565       printf("sys_call: trap %s not allowed, caller %d, src_dst %d\n",
566            callname, proc_nr(caller_ptr), src_dst_e);
567 #endif
568 	return(ETRAPDENIED);		/* trap denied by mask or kernel */
569   }
570 
571   switch(call_nr) {
572   case SENDREC:
573 	/* A flag is set so that notifications cannot interrupt SENDREC. */
574 	caller_ptr->p_misc_flags |= MF_REPLY_PEND;
575 	/* fall through */
576   case SEND:
577 	result = mini_send(caller_ptr, src_dst_e, m_ptr, 0);
578 	if (call_nr == SEND || result != OK)
579 		break;				/* done, or SEND failed */
580 	/* fall through for SENDREC */
581   case RECEIVE:
582 	if (call_nr == RECEIVE) {
583 		caller_ptr->p_misc_flags &= ~MF_REPLY_PEND;
584 		IPC_STATUS_CLEAR(caller_ptr);  /* clear IPC status code */
585 	}
586 	result = mini_receive(caller_ptr, src_dst_e, m_ptr, 0);
587 	break;
588   case NOTIFY:
589 	result = mini_notify(caller_ptr, src_dst_e);
590 	break;
591   case SENDNB:
592         result = mini_send(caller_ptr, src_dst_e, m_ptr, NON_BLOCKING);
593         break;
594   default:
595 	result = EBADCALL;			/* illegal system call */
596   }
597 
598   /* Now, return the result of the system call to the caller. */
599   return(result);
600 }
601 
602 int do_ipc(reg_t r1, reg_t r2, reg_t r3)
603 {
604   struct proc *const caller_ptr = get_cpulocal_var(proc_ptr);	/* get pointer to caller */
605   int call_nr = (int) r1;
606 
607   assert(!RTS_ISSET(caller_ptr, RTS_SLOT_FREE));
608 
609   /* bill kernel time to this process. */
610   kbill_ipc = caller_ptr;
611 
612   /* If this process is subject to system call tracing, handle that first. */
613   if (caller_ptr->p_misc_flags & (MF_SC_TRACE | MF_SC_DEFER)) {
614 	/* Are we tracing this process, and is it the first sys_call entry? */
615 	if ((caller_ptr->p_misc_flags & (MF_SC_TRACE | MF_SC_DEFER)) ==
616 							MF_SC_TRACE) {
617 		/* We must notify the tracer before processing the actual
618 		 * system call. If we don't, the tracer could not obtain the
619 		 * input message. Postpone the entire system call.
620 		 */
621 		caller_ptr->p_misc_flags &= ~MF_SC_TRACE;
622 		assert(!(caller_ptr->p_misc_flags & MF_SC_DEFER));
623 		caller_ptr->p_misc_flags |= MF_SC_DEFER;
624 		caller_ptr->p_defer.r1 = r1;
625 		caller_ptr->p_defer.r2 = r2;
626 		caller_ptr->p_defer.r3 = r3;
627 
628 		/* Signal the "enter system call" event. Block the process. */
629 		cause_sig(proc_nr(caller_ptr), SIGTRAP);
630 
631 		/* Preserve the return register's value. */
632 		return caller_ptr->p_reg.retreg;
633 	}
634 
635 	/* If the MF_SC_DEFER flag is set, the syscall is now being resumed. */
636 	caller_ptr->p_misc_flags &= ~MF_SC_DEFER;
637 
638 	assert (!(caller_ptr->p_misc_flags & MF_SC_ACTIVE));
639 
640 	/* Set a flag to allow reliable tracing of leaving the system call. */
641 	caller_ptr->p_misc_flags |= MF_SC_ACTIVE;
642   }
643 
644   if(caller_ptr->p_misc_flags & MF_DELIVERMSG) {
645 	panic("sys_call: MF_DELIVERMSG on for %s / %d\n",
646 		caller_ptr->p_name, caller_ptr->p_endpoint);
647   }
648 
649   /* Now check if the call is known and try to perform the request. The only
650    * system calls that exist in MINIX are sending and receiving messages.
651    *   - SENDREC: combines SEND and RECEIVE in a single system call
652    *   - SEND:    sender blocks until its message has been delivered
653    *   - RECEIVE: receiver blocks until an acceptable message has arrived
654    *   - NOTIFY:  asynchronous call; deliver notification or mark pending
655    *   - SENDA:   list of asynchronous send requests
656    */
657   switch(call_nr) {
658   	case SENDREC:
659   	case SEND:
660   	case RECEIVE:
661   	case NOTIFY:
662   	case SENDNB:
663   	{
664   	    /* Process accounting for scheduling */
665 	    caller_ptr->p_accounting.ipc_sync++;
666 
667   	    return do_sync_ipc(caller_ptr, call_nr, (endpoint_t) r2,
668 			    (message *) r3);
669   	}
670   	case SENDA:
671   	{
672  	    /*
673   	     * Get and check the size of the argument in bytes as it is a
674   	     * table
675   	     */
676   	    size_t msg_size = (size_t) r2;
677 
678   	    /* Process accounting for scheduling */
679 	    caller_ptr->p_accounting.ipc_async++;
680 
681   	    /* Limit size to something reasonable. An arbitrary choice is 16
682   	     * times the number of process table entries.
683   	     */
684   	    if (msg_size > 16*(NR_TASKS + NR_PROCS))
685 	        return EDOM;
686   	    return mini_senda(caller_ptr, (asynmsg_t *) r3, msg_size);
687   	}
688   	case MINIX_KERNINFO:
689 	{
690 		/* It might not be initialized yet. */
691 	  	if(!minix_kerninfo_user) {
692 			return EBADCALL;
693 		}
694 
695   		arch_set_secondary_ipc_return(caller_ptr, minix_kerninfo_user);
696   		return OK;
697 	}
698   	default:
699 	return EBADCALL;		/* illegal system call */
700   }
701 }
702 
703 /*===========================================================================*
704  *				deadlock				     *
705  *===========================================================================*/
706 static int deadlock(function, cp, src_dst_e)
707 int function;					/* trap number */
708 register struct proc *cp;			/* pointer to caller */
709 endpoint_t src_dst_e;				/* src or dst process */
710 {
711 /* Check for deadlock. This can happen if 'caller_ptr' and 'src_dst' have
712  * a cyclic dependency of blocking send and receive calls. The only cyclic
713  * dependency that is not fatal is if the caller and target directly SEND(REC)
714  * and RECEIVE to each other. If a deadlock is found, the group size is
715  * returned. Otherwise zero is returned.
716  */
717   register struct proc *xp;			/* process pointer */
718   int group_size = 1;				/* start with only caller */
719 #if DEBUG_ENABLE_IPC_WARNINGS
720   static struct proc *processes[NR_PROCS + NR_TASKS];
721   processes[0] = cp;
722 #endif
723 
724   while (src_dst_e != ANY) { 			/* check while process nr */
725       int src_dst_slot;
726       okendpt(src_dst_e, &src_dst_slot);
727       xp = proc_addr(src_dst_slot);		/* follow chain of processes */
728       assert(proc_ptr_ok(xp));
729       assert(!RTS_ISSET(xp, RTS_SLOT_FREE));
730 #if DEBUG_ENABLE_IPC_WARNINGS
731       processes[group_size] = xp;
732 #endif
733       group_size ++;				/* extra process in group */
734 
735       /* Check whether the last process in the chain has a dependency. If it
736        * has not, the cycle cannot be closed and we are done.
737        */
738       if((src_dst_e = P_BLOCKEDON(xp)) == NONE)
739 	return 0;
740 
741       /* Now check if there is a cyclic dependency. For group sizes of two,
742        * a combination of SEND(REC) and RECEIVE is not fatal. Larger groups
743        * or other combinations indicate a deadlock.
744        */
745       if (src_dst_e == cp->p_endpoint) {	/* possible deadlock */
746 	  if (group_size == 2) {		/* caller and src_dst */
747 	      /* The function number is magically converted to flags. */
748 	      if ((xp->p_rts_flags ^ (function << 2)) & RTS_SENDING) {
749 	          return(0);			/* not a deadlock */
750 	      }
751 	  }
752 #if DEBUG_ENABLE_IPC_WARNINGS
753 	  {
754 		int i;
755 		printf("deadlock between these processes:\n");
756 		for(i = 0; i < group_size; i++) {
757 			printf(" %10s ", processes[i]->p_name);
758 		}
759 		printf("\n\n");
760 		for(i = 0; i < group_size; i++) {
761 			print_proc(processes[i]);
762 			proc_stacktrace(processes[i]);
763 		}
764 	  }
765 #endif
766           return(group_size);			/* deadlock found */
767       }
768   }
769   return(0);					/* not a deadlock */
770 }
771 
772 /*===========================================================================*
773  *				has_pending				     *
774  *===========================================================================*/
775 static int has_pending(sys_map_t *map, int src_p, int asynm)
776 {
777 /* Check to see if there is a pending message from the desired source
778  * available.
779  */
780 
781   int src_id;
782   sys_id_t id = NULL_PRIV_ID;
783 #ifdef CONFIG_SMP
784   struct proc * p;
785 #endif
786 
787   /* Either check a specific bit in the mask map, or find the first bit set in
788    * it (if any), depending on whether the receive was called on a specific
789    * source endpoint.
790    */
791   if (src_p != ANY) {
792 	src_id = nr_to_id(src_p);
793 	if (get_sys_bit(*map, src_id)) {
794 #ifdef CONFIG_SMP
795 		p = proc_addr(id_to_nr(src_id));
796 		if (asynm && RTS_ISSET(p, RTS_VMINHIBIT))
797 			p->p_misc_flags |= MF_SENDA_VM_MISS;
798 		else
799 #endif
800 			id = src_id;
801 	}
802   } else {
803 	/* Find a source with a pending message */
804 	for (src_id = 0; src_id < NR_SYS_PROCS; src_id += BITCHUNK_BITS) {
805 		if (get_sys_bits(*map, src_id) != 0) {
806 #ifdef CONFIG_SMP
807 			while (src_id < NR_SYS_PROCS) {
808 				while (!get_sys_bit(*map, src_id)) {
809 					if (src_id == NR_SYS_PROCS)
810 						goto quit_search;
811 					src_id++;
812 				}
813 				p = proc_addr(id_to_nr(src_id));
814 				/*
815 				 * We must not let kernel fiddle with pages of a
816 				 * process which are currently being changed by
817 				 * VM.  It is dangerous! So do not report such a
818 				 * process as having pending async messages.
819 				 * Skip it.
820 				 */
821 				if (asynm && RTS_ISSET(p, RTS_VMINHIBIT)) {
822 					p->p_misc_flags |= MF_SENDA_VM_MISS;
823 					src_id++;
824 				} else
825 					goto quit_search;
826 			}
827 #else
828 			while (!get_sys_bit(*map, src_id)) src_id++;
829 			goto quit_search;
830 #endif
831 		}
832 	}
833 
834 quit_search:
835 	if (src_id < NR_SYS_PROCS)	/* Found one */
836 		id = src_id;
837   }
838 
839   return(id);
840 }
841 
842 /*===========================================================================*
843  *				has_pending_notify			     *
844  *===========================================================================*/
845 int has_pending_notify(struct proc * caller, int src_p)
846 {
847 	sys_map_t * map = &priv(caller)->s_notify_pending;
848 	return has_pending(map, src_p, 0);
849 }
850 
851 /*===========================================================================*
852  *				has_pending_asend			     *
853  *===========================================================================*/
854 int has_pending_asend(struct proc * caller, int src_p)
855 {
856 	sys_map_t * map = &priv(caller)->s_asyn_pending;
857 	return has_pending(map, src_p, 1);
858 }
859 
860 /*===========================================================================*
861  *				unset_notify_pending			     *
862  *===========================================================================*/
863 void unset_notify_pending(struct proc * caller, int src_p)
864 {
865 	sys_map_t * map = &priv(caller)->s_notify_pending;
866 	unset_sys_bit(*map, src_p);
867 }
868 
869 /*===========================================================================*
870  *				mini_send				     *
871  *===========================================================================*/
872 int mini_send(
873   register struct proc *caller_ptr,	/* who is trying to send a message? */
874   endpoint_t dst_e,			/* to whom is message being sent? */
875   message *m_ptr,			/* pointer to message buffer */
876   const int flags
877 )
878 {
879 /* Send a message from 'caller_ptr' to 'dst'. If 'dst' is blocked waiting
880  * for this message, copy the message to it and unblock 'dst'. If 'dst' is
881  * not waiting at all, or is waiting for another source, queue 'caller_ptr'.
882  */
883   register struct proc *dst_ptr;
884   register struct proc **xpp;
885   int dst_p;
886   dst_p = _ENDPOINT_P(dst_e);
887   dst_ptr = proc_addr(dst_p);
888 
889   if (RTS_ISSET(dst_ptr, RTS_NO_ENDPOINT))
890   {
891 	return EDEADSRCDST;
892   }
893 
894   /* Check if 'dst' is blocked waiting for this message. The destination's
895    * RTS_SENDING flag may be set when its SENDREC call blocked while sending.
896    */
897   if (WILLRECEIVE(caller_ptr->p_endpoint, dst_ptr, (vir_bytes)m_ptr, NULL)) {
898 	int call;
899 	/* Destination is indeed waiting for this message. */
900 	assert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG));
901 
902 	if (!(flags & FROM_KERNEL)) {
903 		if(copy_msg_from_user(m_ptr, &dst_ptr->p_delivermsg))
904 			return EFAULT;
905 	} else {
906 		dst_ptr->p_delivermsg = *m_ptr;
907 		IPC_STATUS_ADD_FLAGS(dst_ptr, IPC_FLG_MSG_FROM_KERNEL);
908 	}
909 
910 	dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
911 	dst_ptr->p_misc_flags |= MF_DELIVERMSG;
912 
913 	call = (caller_ptr->p_misc_flags & MF_REPLY_PEND ? SENDREC
914 		: (flags & NON_BLOCKING ? SENDNB : SEND));
915 	IPC_STATUS_ADD_CALL(dst_ptr, call);
916 
917 	if (dst_ptr->p_misc_flags & MF_REPLY_PEND)
918 		dst_ptr->p_misc_flags &= ~MF_REPLY_PEND;
919 
920 	RTS_UNSET(dst_ptr, RTS_RECEIVING);
921 
922 #if DEBUG_IPC_HOOK
923 	hook_ipc_msgsend(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
924 	hook_ipc_msgrecv(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
925 #endif
926   } else {
927 	if(flags & NON_BLOCKING) {
928 		return(ENOTREADY);
929 	}
930 
931 	/* Check for a possible deadlock before actually blocking. */
932 	if (deadlock(SEND, caller_ptr, dst_e)) {
933 		return(ELOCKED);
934 	}
935 
936 	/* Destination is not waiting.  Block and dequeue caller. */
937 	if (!(flags & FROM_KERNEL)) {
938 		if(copy_msg_from_user(m_ptr, &caller_ptr->p_sendmsg))
939 			return EFAULT;
940 	} else {
941 		caller_ptr->p_sendmsg = *m_ptr;
942 		/*
943 		 * we need to remember that this message is from kernel so we
944 		 * can set the delivery status flags when the message is
945 		 * actually delivered
946 		 */
947 		caller_ptr->p_misc_flags |= MF_SENDING_FROM_KERNEL;
948 	}
949 
950 	RTS_SET(caller_ptr, RTS_SENDING);
951 	caller_ptr->p_sendto_e = dst_e;
952 
953 	/* Process is now blocked.  Put in on the destination's queue. */
954 	assert(caller_ptr->p_q_link == NULL);
955 	xpp = &dst_ptr->p_caller_q;		/* find end of list */
956 	while (*xpp) xpp = &(*xpp)->p_q_link;
957 	*xpp = caller_ptr;			/* add caller to end */
958 
959 #if DEBUG_IPC_HOOK
960 	hook_ipc_msgsend(&caller_ptr->p_sendmsg, caller_ptr, dst_ptr);
961 #endif
962   }
963   return(OK);
964 }
965 
966 /*===========================================================================*
967  *				mini_receive				     *
968  *===========================================================================*/
969 static int mini_receive(struct proc * caller_ptr,
970 			endpoint_t src_e, /* which message source is wanted */
971 			message * m_buff_usr, /* pointer to message buffer */
972 			const int flags)
973 {
974 /* A process or task wants to get a message.  If a message is already queued,
975  * acquire it and deblock the sender.  If no message from the desired source
976  * is available block the caller.
977  */
978   register struct proc **xpp;
979   int r, src_id, found, src_proc_nr, src_p;
980   endpoint_t sender_e;
981 
982   assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
983 
984   /* This is where we want our message. */
985   caller_ptr->p_delivermsg_vir = (vir_bytes) m_buff_usr;
986 
987   if(src_e == ANY) src_p = ANY;
988   else
989   {
990 	okendpt(src_e, &src_p);
991 	if (RTS_ISSET(proc_addr(src_p), RTS_NO_ENDPOINT))
992 	{
993 		return EDEADSRCDST;
994 	}
995   }
996 
997 
998   /* Check to see if a message from desired source is already available.  The
999    * caller's RTS_SENDING flag may be set if SENDREC couldn't send. If it is
1000    * set, the process should be blocked.
1001    */
1002   if (!RTS_ISSET(caller_ptr, RTS_SENDING)) {
1003 
1004     /* Check if there are pending notifications, except for SENDREC. */
1005     if (! (caller_ptr->p_misc_flags & MF_REPLY_PEND)) {
1006 
1007 	/* Check for pending notifications */
1008         src_id = has_pending_notify(caller_ptr, src_p);
1009         found = src_id != NULL_PRIV_ID;
1010         if(found) {
1011             src_proc_nr = id_to_nr(src_id);		/* get source proc */
1012             sender_e = proc_addr(src_proc_nr)->p_endpoint;
1013         }
1014 
1015         if (found && CANRECEIVE(src_e, sender_e, caller_ptr, 0,
1016           &m_notify_buff)) {
1017 
1018 #if DEBUG_ENABLE_IPC_WARNINGS
1019 	    if(src_proc_nr == NONE) {
1020 		printf("mini_receive: sending notify from NONE\n");
1021 	    }
1022 #endif
1023 	    assert(src_proc_nr != NONE);
1024             unset_notify_pending(caller_ptr, src_id);	/* no longer pending */
1025 
1026             /* Found a suitable source, deliver the notification message. */
1027 	    assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
1028 	    assert(src_e == ANY || sender_e == src_e);
1029 
1030 	    /* assemble message */
1031 	    BuildNotifyMessage(&caller_ptr->p_delivermsg, src_proc_nr, caller_ptr);
1032 	    caller_ptr->p_delivermsg.m_source = sender_e;
1033 	    caller_ptr->p_misc_flags |= MF_DELIVERMSG;
1034 
1035 	    IPC_STATUS_ADD_CALL(caller_ptr, NOTIFY);
1036 
1037 	    goto receive_done;
1038         }
1039     }
1040 
1041     /* Check for pending asynchronous messages */
1042     if (has_pending_asend(caller_ptr, src_p) != NULL_PRIV_ID) {
1043         if (src_p != ANY)
1044 		r = try_one(src_e, proc_addr(src_p), caller_ptr);
1045         else
1046         	r = try_async(caller_ptr);
1047 
1048 	if (r == OK) {
1049             IPC_STATUS_ADD_CALL(caller_ptr, SENDA);
1050             goto receive_done;
1051         }
1052     }
1053 
1054     /* Check caller queue. Use pointer pointers to keep code simple. */
1055     xpp = &caller_ptr->p_caller_q;
1056     while (*xpp) {
1057 	struct proc * sender = *xpp;
1058 	endpoint_t sender_e = sender->p_endpoint;
1059 
1060         if (CANRECEIVE(src_e, sender_e, caller_ptr, 0, &sender->p_sendmsg)) {
1061             int call;
1062 	    assert(!RTS_ISSET(sender, RTS_SLOT_FREE));
1063 	    assert(!RTS_ISSET(sender, RTS_NO_ENDPOINT));
1064 
1065 	    /* Found acceptable message. Copy it and update status. */
1066   	    assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
1067 	    caller_ptr->p_delivermsg = sender->p_sendmsg;
1068 	    caller_ptr->p_delivermsg.m_source = sender->p_endpoint;
1069 	    caller_ptr->p_misc_flags |= MF_DELIVERMSG;
1070 	    RTS_UNSET(sender, RTS_SENDING);
1071 
1072 	    call = (sender->p_misc_flags & MF_REPLY_PEND ? SENDREC : SEND);
1073 	    IPC_STATUS_ADD_CALL(caller_ptr, call);
1074 
1075 	    /*
1076 	     * if the message is originally from the kernel on behalf of this
1077 	     * process, we must send the status flags accordingly
1078 	     */
1079 	    if (sender->p_misc_flags & MF_SENDING_FROM_KERNEL) {
1080 		IPC_STATUS_ADD_FLAGS(caller_ptr, IPC_FLG_MSG_FROM_KERNEL);
1081 		/* we can clean the flag now, not need anymore */
1082 		sender->p_misc_flags &= ~MF_SENDING_FROM_KERNEL;
1083 	    }
1084 	    if (sender->p_misc_flags & MF_SIG_DELAY)
1085 		sig_delay_done(sender);
1086 
1087 #if DEBUG_IPC_HOOK
1088             hook_ipc_msgrecv(&caller_ptr->p_delivermsg, *xpp, caller_ptr);
1089 #endif
1090 
1091             *xpp = sender->p_q_link;		/* remove from queue */
1092 	    sender->p_q_link = NULL;
1093 	    goto receive_done;
1094 	}
1095 	xpp = &sender->p_q_link;		/* proceed to next */
1096     }
1097   }
1098 
1099   /* No suitable message is available or the caller couldn't send in SENDREC.
1100    * Block the process trying to receive, unless the flags tell otherwise.
1101    */
1102   if ( ! (flags & NON_BLOCKING)) {
1103       /* Check for a possible deadlock before actually blocking. */
1104       if (deadlock(RECEIVE, caller_ptr, src_e)) {
1105           return(ELOCKED);
1106       }
1107 
1108       caller_ptr->p_getfrom_e = src_e;
1109       RTS_SET(caller_ptr, RTS_RECEIVING);
1110       return(OK);
1111   } else {
1112 	return(ENOTREADY);
1113   }
1114 
1115 receive_done:
1116   if (caller_ptr->p_misc_flags & MF_REPLY_PEND)
1117 	  caller_ptr->p_misc_flags &= ~MF_REPLY_PEND;
1118   return OK;
1119 }
1120 
1121 /*===========================================================================*
1122  *				mini_notify				     *
1123  *===========================================================================*/
1124 int mini_notify(
1125   const struct proc *caller_ptr,	/* sender of the notification */
1126   endpoint_t dst_e			/* which process to notify */
1127 )
1128 {
1129   register struct proc *dst_ptr;
1130   int src_id;				/* source id for late delivery */
1131   int dst_p;
1132 
1133   if (!isokendpt(dst_e, &dst_p)) {
1134 	util_stacktrace();
1135 	printf("mini_notify: bogus endpoint %d\n", dst_e);
1136 	return EDEADSRCDST;
1137   }
1138 
1139   dst_ptr = proc_addr(dst_p);
1140 
1141   /* Check to see if target is blocked waiting for this message. A process
1142    * can be both sending and receiving during a SENDREC system call.
1143    */
1144   if (WILLRECEIVE(caller_ptr->p_endpoint, dst_ptr, 0, &m_notify_buff) &&
1145     !(dst_ptr->p_misc_flags & MF_REPLY_PEND)) {
1146       /* Destination is indeed waiting for a message. Assemble a notification
1147        * message and deliver it. Copy from pseudo-source HARDWARE, since the
1148        * message is in the kernel's address space.
1149        */
1150       assert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG));
1151 
1152       BuildNotifyMessage(&dst_ptr->p_delivermsg, proc_nr(caller_ptr), dst_ptr);
1153       dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
1154       dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1155 
1156       IPC_STATUS_ADD_CALL(dst_ptr, NOTIFY);
1157       RTS_UNSET(dst_ptr, RTS_RECEIVING);
1158 
1159       return(OK);
1160   }
1161 
1162   /* Destination is not ready to receive the notification. Add it to the
1163    * bit map with pending notifications. Note the indirectness: the privilege id
1164    * instead of the process number is used in the pending bit map.
1165    */
1166   src_id = priv(caller_ptr)->s_id;
1167   set_sys_bit(priv(dst_ptr)->s_notify_pending, src_id);
1168   return(OK);
1169 }
1170 
1171 #define ASCOMPLAIN(caller, entry, field)	\
1172 	printf("kernel:%s:%d: asyn failed for %s in %s "	\
1173 	"(%d/%zu, tab 0x%lx)\n",__FILE__,__LINE__,	\
1174 field, caller->p_name, entry, priv(caller)->s_asynsize, priv(caller)->s_asyntab)
1175 
1176 #define A_RETR(entry) do {			\
1177   if (data_copy(				\
1178   		caller_ptr->p_endpoint, table_v + (entry)*sizeof(asynmsg_t),\
1179   		KERNEL, (vir_bytes) &tabent,	\
1180   		sizeof(tabent)) != OK) {	\
1181   			ASCOMPLAIN(caller_ptr, entry, "message entry");	\
1182   			r = EFAULT;		\
1183 	                goto asyn_error; \
1184   }						\
1185   else if(tabent.dst == SELF) { \
1186       tabent.dst = caller_ptr->p_endpoint; \
1187   } \
1188   			 } while(0)
1189 
1190 #define A_INSRT(entry) do {			\
1191   if (data_copy(KERNEL, (vir_bytes) &tabent,	\
1192   		caller_ptr->p_endpoint, table_v + (entry)*sizeof(asynmsg_t),\
1193   		sizeof(tabent)) != OK) {	\
1194   			ASCOMPLAIN(caller_ptr, entry, "message entry");	\
1195 			/* Do NOT set r or goto asyn_error here! */ \
1196   }						\
1197   			  } while(0)
1198 
1199 /*===========================================================================*
1200  *				try_deliver_senda			     *
1201  *===========================================================================*/
1202 int try_deliver_senda(struct proc *caller_ptr,
1203 				asynmsg_t *table,
1204 				size_t size)
1205 {
1206   int r, dst_p, done, do_notify;
1207   unsigned int i;
1208   unsigned flags;
1209   endpoint_t dst;
1210   struct proc *dst_ptr;
1211   struct priv *privp;
1212   asynmsg_t tabent;
1213   const vir_bytes table_v = (vir_bytes) table;
1214   message *m_ptr = NULL;
1215 
1216   privp = priv(caller_ptr);
1217 
1218   /* Clear table */
1219   privp->s_asyntab = -1;
1220   privp->s_asynsize = 0;
1221   privp->s_asynendpoint = caller_ptr->p_endpoint;
1222 
1223   if (size == 0) return(OK);  /* Nothing to do, just return */
1224 
1225   /* Scan the table */
1226   do_notify = FALSE;
1227   done = TRUE;
1228 
1229   /* Limit size to something reasonable. An arbitrary choice is 16
1230    * times the number of process table entries.
1231    *
1232    * (this check has been duplicated in sys_call but is left here
1233    * as a sanity check)
1234    */
1235   if (size > 16*(NR_TASKS + NR_PROCS)) {
1236     r = EDOM;
1237     return r;
1238   }
1239 
1240   for (i = 0; i < size; i++) {
1241 	/* Process each entry in the table and store the result in the table.
1242 	 * If we're done handling a message, copy the result to the sender. */
1243 
1244 	dst = NONE;
1245 	/* Copy message to kernel */
1246 	A_RETR(i);
1247 	flags = tabent.flags;
1248 	dst = tabent.dst;
1249 
1250 	if (flags == 0) continue; /* Skip empty entries */
1251 
1252 	/* 'flags' field must contain only valid bits */
1253 	if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR)) {
1254 		r = EINVAL;
1255 		goto asyn_error;
1256 	}
1257 	if (!(flags & AMF_VALID)) { /* Must contain message */
1258 		r = EINVAL;
1259 		goto asyn_error;
1260 	}
1261 	if (flags & AMF_DONE) continue;	/* Already done processing */
1262 
1263 	r = OK;
1264 	if (!isokendpt(tabent.dst, &dst_p))
1265 		r = EDEADSRCDST; /* Bad destination, report the error */
1266 	else if (iskerneln(dst_p))
1267 		r = ECALLDENIED; /* Asyn sends to the kernel are not allowed */
1268 	else if (!may_asynsend_to(caller_ptr, dst_p))
1269 		r = ECALLDENIED; /* Send denied by IPC mask */
1270 	else 	/* r == OK */
1271 		dst_ptr = proc_addr(dst_p);
1272 
1273 	/* XXX: RTS_NO_ENDPOINT should be removed */
1274 	if (r == OK && RTS_ISSET(dst_ptr, RTS_NO_ENDPOINT)) {
1275 		r = EDEADSRCDST;
1276 	}
1277 
1278 	/* Check if 'dst' is blocked waiting for this message.
1279 	 * If AMF_NOREPLY is set, do not satisfy the receiving part of
1280 	 * a SENDREC.
1281 	 */
1282 	if (r == OK && WILLRECEIVE(caller_ptr->p_endpoint, dst_ptr,
1283 	    (vir_bytes)&table[i].msg, NULL) &&
1284 	    (!(flags&AMF_NOREPLY) || !(dst_ptr->p_misc_flags&MF_REPLY_PEND))) {
1285 		/* Destination is indeed waiting for this message. */
1286 		dst_ptr->p_delivermsg = tabent.msg;
1287 		dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
1288 		dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1289 		IPC_STATUS_ADD_CALL(dst_ptr, SENDA);
1290 		RTS_UNSET(dst_ptr, RTS_RECEIVING);
1291 #if DEBUG_IPC_HOOK
1292 		hook_ipc_msgrecv(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
1293 #endif
1294 	} else if (r == OK) {
1295 		/* Inform receiver that something is pending */
1296 		set_sys_bit(priv(dst_ptr)->s_asyn_pending,
1297 			    priv(caller_ptr)->s_id);
1298 		done = FALSE;
1299 		continue;
1300 	}
1301 
1302 	/* Store results */
1303 	tabent.result = r;
1304 	tabent.flags = flags | AMF_DONE;
1305 	if (flags & AMF_NOTIFY)
1306 		do_notify = TRUE;
1307 	else if (r != OK && (flags & AMF_NOTIFY_ERR))
1308 		do_notify = TRUE;
1309 	A_INSRT(i);	/* Copy results to caller; ignore errors */
1310 	continue;
1311 
1312 asyn_error:
1313 	if (dst != NONE)
1314 		printf("KERNEL senda error %d to %d\n", r, dst);
1315 	else
1316 		printf("KERNEL senda error %d\n", r);
1317   }
1318 
1319   if (do_notify)
1320 	mini_notify(proc_addr(ASYNCM), caller_ptr->p_endpoint);
1321 
1322   if (!done) {
1323 	privp->s_asyntab = (vir_bytes) table;
1324 	privp->s_asynsize = size;
1325   }
1326 
1327   return(OK);
1328 }
1329 
1330 /*===========================================================================*
1331  *				mini_senda				     *
1332  *===========================================================================*/
1333 static int mini_senda(struct proc *caller_ptr, asynmsg_t *table, size_t size)
1334 {
1335   struct priv *privp;
1336 
1337   privp = priv(caller_ptr);
1338   if (!(privp->s_flags & SYS_PROC)) {
1339 	printf( "mini_senda: warning caller has no privilege structure\n");
1340 	return(EPERM);
1341   }
1342 
1343   return try_deliver_senda(caller_ptr, table, size);
1344 }
1345 
1346 
1347 /*===========================================================================*
1348  *				try_async				     *
1349  *===========================================================================*/
1350 static int try_async(caller_ptr)
1351 struct proc *caller_ptr;
1352 {
1353   int r;
1354   struct priv *privp;
1355   struct proc *src_ptr;
1356   sys_map_t *map;
1357 
1358   map = &priv(caller_ptr)->s_asyn_pending;
1359 
1360   /* Try all privilege structures */
1361   for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; ++privp)  {
1362 	if (privp->s_proc_nr == NONE)
1363 		continue;
1364 
1365 	if (!get_sys_bit(*map, privp->s_id))
1366 		continue;
1367 
1368 	src_ptr = proc_addr(privp->s_proc_nr);
1369 
1370 #ifdef CONFIG_SMP
1371 	/*
1372 	 * Do not copy from a process which does not have a stable address space
1373 	 * due to VM fiddling with it
1374 	 */
1375 	if (RTS_ISSET(src_ptr, RTS_VMINHIBIT)) {
1376 		src_ptr->p_misc_flags |= MF_SENDA_VM_MISS;
1377 		continue;
1378 	}
1379 #endif
1380 
1381 	assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
1382 	if ((r = try_one(ANY, src_ptr, caller_ptr)) == OK)
1383 		return(r);
1384   }
1385 
1386   return(ESRCH);
1387 }
1388 
1389 
1390 /*===========================================================================*
1391  *				try_one					     *
1392  *===========================================================================*/
1393 static int try_one(endpoint_t receive_e, struct proc *src_ptr,
1394     struct proc *dst_ptr)
1395 {
1396 /* Try to receive an asynchronous message from 'src_ptr' */
1397   int r = EAGAIN, done, do_notify;
1398   unsigned int flags, i;
1399   size_t size;
1400   endpoint_t dst, src_e;
1401   struct proc *caller_ptr;
1402   struct priv *privp;
1403   asynmsg_t tabent;
1404   vir_bytes table_v;
1405 
1406   privp = priv(src_ptr);
1407   if (!(privp->s_flags & SYS_PROC)) return(EPERM);
1408   size = privp->s_asynsize;
1409   table_v = privp->s_asyntab;
1410 
1411   /* Clear table pending message flag. We're done unless we're not. */
1412   unset_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1413 
1414   if (size == 0) return(EAGAIN);
1415   if (privp->s_asynendpoint != src_ptr->p_endpoint) return EAGAIN;
1416   if (!may_asynsend_to(src_ptr, proc_nr(dst_ptr))) return (ECALLDENIED);
1417 
1418   caller_ptr = src_ptr;	/* Needed for A_ macros later on */
1419   src_e = src_ptr->p_endpoint;
1420 
1421   /* Scan the table */
1422   do_notify = FALSE;
1423   done = TRUE;
1424 
1425   for (i = 0; i < size; i++) {
1426   	/* Process each entry in the table and store the result in the table.
1427   	 * If we're done handling a message, copy the result to the sender.
1428   	 * Some checks done in mini_senda are duplicated here, as the sender
1429   	 * could've altered the contents of the table in the meantime.
1430   	 */
1431 
1432 	/* Copy message to kernel */
1433 	A_RETR(i);
1434 	flags = tabent.flags;
1435 	dst = tabent.dst;
1436 
1437 	if (flags == 0) continue;	/* Skip empty entries */
1438 
1439 	/* 'flags' field must contain only valid bits */
1440 	if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR))
1441 		r = EINVAL;
1442 	else if (!(flags & AMF_VALID)) /* Must contain message */
1443 		r = EINVAL;
1444 	else if (flags & AMF_DONE) continue; /* Already done processing */
1445 
1446 	/* Clear done flag. The sender is done sending when all messages in the
1447 	 * table are marked done or empty. However, we will know that only
1448 	 * the next time we enter this function or when the sender decides to
1449 	 * send additional asynchronous messages and manages to deliver them
1450 	 * all.
1451 	 */
1452 	done = FALSE;
1453 
1454 	if (r == EINVAL)
1455 		goto store_result;
1456 
1457 	/* Message must be directed at receiving end */
1458 	if (dst != dst_ptr->p_endpoint) continue;
1459 
1460 	if (!CANRECEIVE(receive_e, src_e, dst_ptr,
1461 		table_v + i*sizeof(asynmsg_t) + offsetof(struct asynmsg,msg),
1462 		NULL)) {
1463 		continue;
1464 	}
1465 
1466 	/* If AMF_NOREPLY is set, then this message is not a reply to a
1467 	 * SENDREC and thus should not satisfy the receiving part of the
1468 	 * SENDREC. This message is to be delivered later.
1469 	 */
1470 	if ((flags & AMF_NOREPLY) && (dst_ptr->p_misc_flags & MF_REPLY_PEND))
1471 		continue;
1472 
1473 	/* Destination is ready to receive the message; deliver it */
1474 	r = OK;
1475 	dst_ptr->p_delivermsg = tabent.msg;
1476 	dst_ptr->p_delivermsg.m_source = src_ptr->p_endpoint;
1477 	dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1478 #if DEBUG_IPC_HOOK
1479 	hook_ipc_msgrecv(&dst_ptr->p_delivermsg, src_ptr, dst_ptr);
1480 #endif
1481 
1482 store_result:
1483 	/* Store results for sender. We may just have started delivering a
1484 	 * message, so we must not return an error to the caller in the case
1485 	 * that storing the results triggers an error!
1486 	 */
1487 	tabent.result = r;
1488 	tabent.flags = flags | AMF_DONE;
1489 	if (flags & AMF_NOTIFY) do_notify = TRUE;
1490 	else if (r != OK && (flags & AMF_NOTIFY_ERR)) do_notify = TRUE;
1491 	A_INSRT(i);	/* Copy results to sender; ignore errors */
1492 
1493 	break;
1494   }
1495 
1496   if (do_notify)
1497 	mini_notify(proc_addr(ASYNCM), src_ptr->p_endpoint);
1498 
1499   if (done) {
1500 	privp->s_asyntab = -1;
1501 	privp->s_asynsize = 0;
1502   } else {
1503 	set_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1504   }
1505 
1506 asyn_error:
1507   return(r);
1508 }
1509 
1510 /*===========================================================================*
1511  *				cancel_async				     *
1512  *===========================================================================*/
1513 int cancel_async(struct proc *src_ptr, struct proc *dst_ptr)
1514 {
1515 /* Cancel asynchronous messages from src to dst, because dst is not interested
1516  * in them (e.g., dst has been restarted) */
1517   int done, do_notify;
1518   unsigned int flags, i;
1519   size_t size;
1520   endpoint_t dst;
1521   struct proc *caller_ptr;
1522   struct priv *privp;
1523   asynmsg_t tabent;
1524   vir_bytes table_v;
1525 
1526   privp = priv(src_ptr);
1527   if (!(privp->s_flags & SYS_PROC)) return(EPERM);
1528   size = privp->s_asynsize;
1529   table_v = privp->s_asyntab;
1530 
1531   /* Clear table pending message flag. We're done unless we're not. */
1532   privp->s_asyntab = -1;
1533   privp->s_asynsize = 0;
1534   unset_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1535 
1536   if (size == 0) return(EAGAIN);
1537   if (!may_send_to(src_ptr, proc_nr(dst_ptr))) return(ECALLDENIED);
1538 
1539   caller_ptr = src_ptr;	/* Needed for A_ macros later on */
1540 
1541   /* Scan the table */
1542   do_notify = FALSE;
1543   done = TRUE;
1544 
1545 
1546   for (i = 0; i < size; i++) {
1547   	/* Process each entry in the table and store the result in the table.
1548   	 * If we're done handling a message, copy the result to the sender.
1549   	 * Some checks done in mini_senda are duplicated here, as the sender
1550   	 * could've altered the contents of the table in the mean time.
1551   	 */
1552 
1553   	int r = EDEADSRCDST;	/* Cancel delivery due to dead dst */
1554 
1555 	/* Copy message to kernel */
1556 	A_RETR(i);
1557 	flags = tabent.flags;
1558 	dst = tabent.dst;
1559 
1560 	if (flags == 0) continue;	/* Skip empty entries */
1561 
1562 	/* 'flags' field must contain only valid bits */
1563 	if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR))
1564 		r = EINVAL;
1565 	else if (!(flags & AMF_VALID)) /* Must contain message */
1566 		r = EINVAL;
1567 	else if (flags & AMF_DONE) continue; /* Already done processing */
1568 
1569 	/* Message must be directed at receiving end */
1570 	if (dst != dst_ptr->p_endpoint) {
1571 		done = FALSE;
1572 		continue;
1573 	}
1574 
1575 	/* Store results for sender */
1576 	tabent.result = r;
1577 	tabent.flags = flags | AMF_DONE;
1578 	if (flags & AMF_NOTIFY) do_notify = TRUE;
1579 	else if (r != OK && (flags & AMF_NOTIFY_ERR)) do_notify = TRUE;
1580 	A_INSRT(i);	/* Copy results to sender; ignore errors */
1581   }
1582 
1583   if (do_notify)
1584 	mini_notify(proc_addr(ASYNCM), src_ptr->p_endpoint);
1585 
1586   if (!done) {
1587 	privp->s_asyntab = table_v;
1588 	privp->s_asynsize = size;
1589   }
1590 
1591 asyn_error:
1592   return(OK);
1593 }
1594 
1595 /*===========================================================================*
1596  *				enqueue					     *
1597  *===========================================================================*/
1598 void enqueue(
1599   register struct proc *rp	/* this process is now runnable */
1600 )
1601 {
1602 /* Add 'rp' to one of the queues of runnable processes.  This function is
1603  * responsible for inserting a process into one of the scheduling queues.
1604  * The mechanism is implemented here.   The actual scheduling policy is
1605  * defined in sched() and pick_proc().
1606  *
1607  * This function can be used x-cpu as it always uses the queues of the cpu the
1608  * process is assigned to.
1609  */
1610   int q = rp->p_priority;	 		/* scheduling queue to use */
1611   struct proc **rdy_head, **rdy_tail;
1612 
1613   assert(proc_is_runnable(rp));
1614 
1615   assert(q >= 0);
1616 
1617   rdy_head = get_cpu_var(rp->p_cpu, run_q_head);
1618   rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1619 
1620   /* Now add the process to the queue. */
1621   if (!rdy_head[q]) {		/* add to empty queue */
1622       rdy_head[q] = rdy_tail[q] = rp; 		/* create a new queue */
1623       rp->p_nextready = NULL;		/* mark new end */
1624   }
1625   else {					/* add to tail of queue */
1626       rdy_tail[q]->p_nextready = rp;		/* chain tail of queue */
1627       rdy_tail[q] = rp;				/* set new queue tail */
1628       rp->p_nextready = NULL;		/* mark new end */
1629   }
1630 
1631   if (cpuid == rp->p_cpu) {
1632 	  /*
1633 	   * enqueueing a process with a higher priority than the current one,
1634 	   * it gets preempted. The current process must be preemptible. Testing
1635 	   * the priority also makes sure that a process does not preempt itself
1636 	   */
1637 	  struct proc * p;
1638 	  p = get_cpulocal_var(proc_ptr);
1639 	  assert(p);
1640 	  if((p->p_priority > rp->p_priority) &&
1641 			  (priv(p)->s_flags & PREEMPTIBLE))
1642 		  RTS_SET(p, RTS_PREEMPTED); /* calls dequeue() */
1643   }
1644 #ifdef CONFIG_SMP
1645   /*
1646    * if the process was enqueued on a different cpu and the cpu is idle, i.e.
1647    * the time is off, we need to wake up that cpu and let it schedule this new
1648    * process
1649    */
1650   else if (get_cpu_var(rp->p_cpu, cpu_is_idle)) {
1651 	  smp_schedule(rp->p_cpu);
1652   }
1653 #endif
1654 
1655   /* Make note of when this process was added to queue */
1656   read_tsc_64(&(get_cpulocal_var(proc_ptr)->p_accounting.enter_queue));
1657 
1658 
1659 #if DEBUG_SANITYCHECKS
1660   assert(runqueues_ok_local());
1661 #endif
1662 }
1663 
1664 /*===========================================================================*
1665  *				enqueue_head				     *
1666  *===========================================================================*/
1667 /*
1668  * put a process at the front of its run queue. It comes handy when a process is
1669  * preempted and removed from run queue to not to have a currently not-runnable
1670  * process on a run queue. We have to put this process back at the fron to be
1671  * fair
1672  */
1673 static void enqueue_head(struct proc *rp)
1674 {
1675   const int q = rp->p_priority;	 		/* scheduling queue to use */
1676 
1677   struct proc **rdy_head, **rdy_tail;
1678 
1679   assert(proc_ptr_ok(rp));
1680   assert(proc_is_runnable(rp));
1681 
1682   /*
1683    * the process was runnable without its quantum expired when dequeued. A
1684    * process with no time left should have been handled else and differently
1685    */
1686   assert(rp->p_cpu_time_left);
1687 
1688   assert(q >= 0);
1689 
1690 
1691   rdy_head = get_cpu_var(rp->p_cpu, run_q_head);
1692   rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1693 
1694   /* Now add the process to the queue. */
1695   if (!rdy_head[q]) {		/* add to empty queue */
1696 	rdy_head[q] = rdy_tail[q] = rp; 	/* create a new queue */
1697 	rp->p_nextready = NULL;			/* mark new end */
1698   } else {					/* add to head of queue */
1699 	rp->p_nextready = rdy_head[q];		/* chain head of queue */
1700 	rdy_head[q] = rp;			/* set new queue head */
1701   }
1702 
1703   /* Make note of when this process was added to queue */
1704   read_tsc_64(&(get_cpulocal_var(proc_ptr->p_accounting.enter_queue)));
1705 
1706 
1707   /* Process accounting for scheduling */
1708   rp->p_accounting.dequeues--;
1709   rp->p_accounting.preempted++;
1710 
1711 #if DEBUG_SANITYCHECKS
1712   assert(runqueues_ok_local());
1713 #endif
1714 }
1715 
1716 /*===========================================================================*
1717  *				dequeue					     *
1718  *===========================================================================*/
1719 void dequeue(struct proc *rp)
1720 /* this process is no longer runnable */
1721 {
1722 /* A process must be removed from the scheduling queues, for example, because
1723  * it has blocked.  If the currently active process is removed, a new process
1724  * is picked to run by calling pick_proc().
1725  *
1726  * This function can operate x-cpu as it always removes the process from the
1727  * queue of the cpu the process is currently assigned to.
1728  */
1729   int q = rp->p_priority;		/* queue to use */
1730   struct proc **xpp;			/* iterate over queue */
1731   struct proc *prev_xp;
1732   u64_t tsc, tsc_delta;
1733 
1734   struct proc **rdy_tail;
1735 
1736   assert(proc_ptr_ok(rp));
1737   assert(!proc_is_runnable(rp));
1738 
1739   /* Side-effect for kernel: check if the task's stack still is ok? */
1740   assert (!iskernelp(rp) || *priv(rp)->s_stack_guard == STACK_GUARD);
1741 
1742   rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1743 
1744   /* Now make sure that the process is not in its ready queue. Remove the
1745    * process if it is found. A process can be made unready even if it is not
1746    * running by being sent a signal that kills it.
1747    */
1748   prev_xp = NULL;
1749   for (xpp = get_cpu_var_ptr(rp->p_cpu, run_q_head[q]); *xpp;
1750 		  xpp = &(*xpp)->p_nextready) {
1751       if (*xpp == rp) {				/* found process to remove */
1752           *xpp = (*xpp)->p_nextready;		/* replace with next chain */
1753           if (rp == rdy_tail[q]) {		/* queue tail removed */
1754               rdy_tail[q] = prev_xp;		/* set new tail */
1755 	  }
1756 
1757           break;
1758       }
1759       prev_xp = *xpp;				/* save previous in chain */
1760   }
1761 
1762 
1763   /* Process accounting for scheduling */
1764   rp->p_accounting.dequeues++;
1765 
1766   /* this is not all that accurate on virtual machines, especially with
1767      IO bound processes that only spend a short amount of time in the queue
1768      at a time. */
1769   if (rp->p_accounting.enter_queue) {
1770 	read_tsc_64(&tsc);
1771 	tsc_delta = tsc - rp->p_accounting.enter_queue;
1772 	rp->p_accounting.time_in_queue = rp->p_accounting.time_in_queue +
1773 		tsc_delta;
1774 	rp->p_accounting.enter_queue = 0;
1775   }
1776 
1777 
1778 #if DEBUG_SANITYCHECKS
1779   assert(runqueues_ok_local());
1780 #endif
1781 }
1782 
1783 /*===========================================================================*
1784  *				pick_proc				     *
1785  *===========================================================================*/
1786 static struct proc * pick_proc(void)
1787 {
1788 /* Decide who to run now.  A new process is selected an returned.
1789  * When a billable process is selected, record it in 'bill_ptr', so that the
1790  * clock task can tell who to bill for system time.
1791  *
1792  * This function always uses the run queues of the local cpu!
1793  */
1794   register struct proc *rp;			/* process to run */
1795   struct proc **rdy_head;
1796   int q;				/* iterate over queues */
1797 
1798   /* Check each of the scheduling queues for ready processes. The number of
1799    * queues is defined in proc.h, and priorities are set in the task table.
1800    * If there are no processes ready to run, return NULL.
1801    */
1802   rdy_head = get_cpulocal_var(run_q_head);
1803   for (q=0; q < NR_SCHED_QUEUES; q++) {
1804 	if(!(rp = rdy_head[q])) {
1805 		TRACE(VF_PICKPROC, printf("cpu %d queue %d empty\n", cpuid, q););
1806 		continue;
1807 	}
1808 	assert(proc_is_runnable(rp));
1809 	if (priv(rp)->s_flags & BILLABLE)
1810 		get_cpulocal_var(bill_ptr) = rp; /* bill for system time */
1811 	return rp;
1812   }
1813   return NULL;
1814 }
1815 
1816 /*===========================================================================*
1817  *				endpoint_lookup				     *
1818  *===========================================================================*/
1819 struct proc *endpoint_lookup(endpoint_t e)
1820 {
1821 	int n;
1822 
1823 	if(!isokendpt(e, &n)) return NULL;
1824 
1825 	return proc_addr(n);
1826 }
1827 
1828 /*===========================================================================*
1829  *				isokendpt_f				     *
1830  *===========================================================================*/
1831 #if DEBUG_ENABLE_IPC_WARNINGS
1832 int isokendpt_f(file, line, e, p, fatalflag)
1833 const char *file;
1834 int line;
1835 #else
1836 int isokendpt_f(e, p, fatalflag)
1837 #endif
1838 endpoint_t e;
1839 int *p;
1840 const int fatalflag;
1841 {
1842 	int ok = 0;
1843 	/* Convert an endpoint number into a process number.
1844 	 * Return nonzero if the process is alive with the corresponding
1845 	 * generation number, zero otherwise.
1846 	 *
1847 	 * This function is called with file and line number by the
1848 	 * isokendpt_d macro if DEBUG_ENABLE_IPC_WARNINGS is defined,
1849 	 * otherwise without. This allows us to print the where the
1850 	 * conversion was attempted, making the errors verbose without
1851 	 * adding code for that at every call.
1852 	 *
1853 	 * If fatalflag is nonzero, we must panic if the conversion doesn't
1854 	 * succeed.
1855 	 */
1856 	*p = _ENDPOINT_P(e);
1857 	ok = 0;
1858 	if(isokprocn(*p) && !isemptyn(*p) && proc_addr(*p)->p_endpoint == e)
1859 		ok = 1;
1860 	if(!ok && fatalflag)
1861 		panic("invalid endpoint: %d",  e);
1862 	return ok;
1863 }
1864 
1865 static void notify_scheduler(struct proc *p)
1866 {
1867 	message m_no_quantum;
1868 	int err;
1869 
1870 	assert(!proc_kernel_scheduler(p));
1871 
1872 	/* dequeue the process */
1873 	RTS_SET(p, RTS_NO_QUANTUM);
1874 	/*
1875 	 * Notify the process's scheduler that it has run out of
1876 	 * quantum. This is done by sending a message to the scheduler
1877 	 * on the process's behalf
1878 	 */
1879 	m_no_quantum.m_source = p->p_endpoint;
1880 	m_no_quantum.m_type   = SCHEDULING_NO_QUANTUM;
1881 	m_no_quantum.m_krn_lsys_schedule.acnt_queue = cpu_time_2_ms(p->p_accounting.time_in_queue);
1882 	m_no_quantum.m_krn_lsys_schedule.acnt_deqs      = p->p_accounting.dequeues;
1883 	m_no_quantum.m_krn_lsys_schedule.acnt_ipc_sync  = p->p_accounting.ipc_sync;
1884 	m_no_quantum.m_krn_lsys_schedule.acnt_ipc_async = p->p_accounting.ipc_async;
1885 	m_no_quantum.m_krn_lsys_schedule.acnt_preempt   = p->p_accounting.preempted;
1886 	m_no_quantum.m_krn_lsys_schedule.acnt_cpu       = cpuid;
1887 	m_no_quantum.m_krn_lsys_schedule.acnt_cpu_load  = cpu_load();
1888 
1889 	/* Reset accounting */
1890 	reset_proc_accounting(p);
1891 
1892 	if ((err = mini_send(p, p->p_scheduler->p_endpoint,
1893 					&m_no_quantum, FROM_KERNEL))) {
1894 		panic("WARNING: Scheduling: mini_send returned %d\n", err);
1895 	}
1896 }
1897 
1898 void proc_no_time(struct proc * p)
1899 {
1900 	if (!proc_kernel_scheduler(p) && priv(p)->s_flags & PREEMPTIBLE) {
1901 		/* this dequeues the process */
1902 		notify_scheduler(p);
1903 	}
1904 	else {
1905 		/*
1906 		 * non-preemptible processes only need their quantum to
1907 		 * be renewed. In fact, they by pass scheduling
1908 		 */
1909 		p->p_cpu_time_left = ms_2_cpu_time(p->p_quantum_size_ms);
1910 #if DEBUG_RACE
1911 		RTS_SET(p, RTS_PREEMPTED);
1912 		RTS_UNSET(p, RTS_PREEMPTED);
1913 #endif
1914 	}
1915 }
1916 
1917 void reset_proc_accounting(struct proc *p)
1918 {
1919   p->p_accounting.preempted = 0;
1920   p->p_accounting.ipc_sync  = 0;
1921   p->p_accounting.ipc_async = 0;
1922   p->p_accounting.dequeues  = 0;
1923   p->p_accounting.time_in_queue = 0;
1924   p->p_accounting.enter_queue = 0;
1925 }
1926 
1927 void copr_not_available_handler(void)
1928 {
1929 	struct proc * p;
1930 	struct proc ** local_fpu_owner;
1931 	/*
1932 	 * Disable the FPU exception (both for the kernel and for the process
1933 	 * once it's scheduled), and initialize or restore the FPU state.
1934 	 */
1935 
1936 	disable_fpu_exception();
1937 
1938 	p = get_cpulocal_var(proc_ptr);
1939 
1940 	/* if FPU is not owned by anyone, do not store anything */
1941 	local_fpu_owner = get_cpulocal_var_ptr(fpu_owner);
1942 	if (*local_fpu_owner != NULL) {
1943 		assert(*local_fpu_owner != p);
1944 		save_local_fpu(*local_fpu_owner, FALSE /*retain*/);
1945 	}
1946 
1947 	/*
1948 	 * restore the current process' state and let it run again, do not
1949 	 * schedule!
1950 	 */
1951 	if (restore_fpu(p) != OK) {
1952 		/* Restoring FPU state failed. This is always the process's own
1953 		 * fault. Send a signal, and schedule another process instead.
1954 		 */
1955 		*local_fpu_owner = NULL;		/* release FPU */
1956 		cause_sig(proc_nr(p), SIGFPE);
1957 		return;
1958 	}
1959 
1960 	*local_fpu_owner = p;
1961 	context_stop(proc_addr(KERNEL));
1962 	restore_user_context(p);
1963 	NOT_REACHABLE;
1964 }
1965 
1966 void release_fpu(struct proc * p) {
1967 	struct proc ** fpu_owner_ptr;
1968 
1969 	fpu_owner_ptr = get_cpu_var_ptr(p->p_cpu, fpu_owner);
1970 
1971 	if (*fpu_owner_ptr == p)
1972 		*fpu_owner_ptr = NULL;
1973 }
1974 
1975 void ser_dump_proc()
1976 {
1977         struct proc *pp;
1978 
1979         for (pp= BEG_PROC_ADDR; pp < END_PROC_ADDR; pp++)
1980         {
1981                 if (isemptyp(pp))
1982                         continue;
1983                 print_proc_recursive(pp);
1984         }
1985 }
1986