xref: /minix/minix/kernel/proc.c (revision 03ac74ed)
1 /* This file contains essentially all of the process and message handling.
2  * Together with "mpx.s" it forms the lowest layer of the MINIX kernel.
3  * There is one entry point from the outside:
4  *
5  *   sys_call: 	      a system call, i.e., the kernel is trapped with an INT
6  *
7  * Changes:
8  *   Aug 19, 2005     rewrote scheduling code  (Jorrit N. Herder)
9  *   Jul 25, 2005     rewrote system call handling  (Jorrit N. Herder)
10  *   May 26, 2005     rewrote message passing functions  (Jorrit N. Herder)
11  *   May 24, 2005     new notification system call  (Jorrit N. Herder)
12  *   Oct 28, 2004     nonblocking send and receive calls  (Jorrit N. Herder)
13  *
14  * The code here is critical to make everything work and is important for the
15  * overall performance of the system. A large fraction of the code deals with
16  * list manipulation. To make this both easy to understand and fast to execute
17  * pointer pointers are used throughout the code. Pointer pointers prevent
18  * exceptions for the head or tail of a linked list.
19  *
20  *  node_t *queue, *new_node;	// assume these as global variables
21  *  node_t **xpp = &queue; 	// get pointer pointer to head of queue
22  *  while (*xpp != NULL) 	// find last pointer of the linked list
23  *      xpp = &(*xpp)->next;	// get pointer to next pointer
24  *  *xpp = new_node;		// now replace the end (the NULL pointer)
25  *  new_node->next = NULL;	// and mark the new end of the list
26  *
27  * For example, when adding a new node to the end of the list, one normally
28  * makes an exception for an empty list and looks up the end of the list for
29  * nonempty lists. As shown above, this is not required with pointer pointers.
30  */
31 
32 #include <stddef.h>
33 #include <signal.h>
34 #include <assert.h>
35 #include <string.h>
36 
37 #include "vm.h"
38 #include "clock.h"
39 #include "spinlock.h"
40 #include "arch_proto.h"
41 
42 #include <minix/syslib.h>
43 
44 /* Scheduling and message passing functions */
45 static void idle(void);
46 /**
47  * Made public for use in clock.c (for user-space scheduling)
48 static int mini_send(struct proc *caller_ptr, endpoint_t dst_e, message
49 	*m_ptr, int flags);
50 */
51 static int mini_receive(struct proc *caller_ptr, endpoint_t src,
52 	message *m_buff_usr, int flags);
53 static int mini_senda(struct proc *caller_ptr, asynmsg_t *table, size_t
54 	size);
55 static int deadlock(int function, register struct proc *caller,
56 	endpoint_t src_dst_e);
57 static int try_async(struct proc *caller_ptr);
58 static int try_one(endpoint_t receive_e, struct proc *src_ptr,
59 	struct proc *dst_ptr);
60 static struct proc * pick_proc(void);
61 static void enqueue_head(struct proc *rp);
62 
63 /* all idles share the same idle_priv structure */
64 static struct priv idle_priv;
65 
66 static void set_idle_name(char * name, int n)
67 {
68         int i, c;
69         int p_z = 0;
70 
71         if (n > 999)
72                 n = 999;
73 
74         name[0] = 'i';
75         name[1] = 'd';
76         name[2] = 'l';
77         name[3] = 'e';
78 
79         for (i = 4, c = 100; c > 0; c /= 10) {
80                 int digit;
81 
82                 digit = n / c;
83                 n -= digit * c;
84 
85                 if (p_z || digit != 0 || c == 1) {
86                         p_z = 1;
87                         name[i++] = '0' + digit;
88                 }
89         }
90 
91         name[i] = '\0';
92 
93 }
94 
95 
96 #define PICK_ANY	1
97 #define PICK_HIGHERONLY	2
98 
99 #define BuildNotifyMessage(m_ptr, src, dst_ptr) \
100 	memset((m_ptr), 0, sizeof(*(m_ptr)));				\
101 	(m_ptr)->m_type = NOTIFY_MESSAGE;				\
102 	(m_ptr)->m_notify.timestamp = get_monotonic();		\
103 	switch (src) {							\
104 	case HARDWARE:							\
105 		(m_ptr)->m_notify.interrupts =			\
106 			priv(dst_ptr)->s_int_pending;			\
107 		priv(dst_ptr)->s_int_pending = 0;			\
108 		break;							\
109 	case SYSTEM:							\
110 		memcpy(&(m_ptr)->m_notify.sigset,			\
111 			&priv(dst_ptr)->s_sig_pending,			\
112 			sizeof(sigset_t));				\
113 		sigemptyset(&priv(dst_ptr)->s_sig_pending);		\
114 		break;							\
115 	}
116 
117 static message m_notify_buff = { 0, NOTIFY_MESSAGE };
118 
119 void proc_init(void)
120 {
121 	struct proc * rp;
122 	struct priv *sp;
123 	int i;
124 
125 	/* Clear the process table. Announce each slot as empty and set up
126 	 * mappings for proc_addr() and proc_nr() macros. Do the same for the
127 	 * table with privilege structures for the system processes.
128 	 */
129 	for (rp = BEG_PROC_ADDR, i = -NR_TASKS; rp < END_PROC_ADDR; ++rp, ++i) {
130 		rp->p_rts_flags = RTS_SLOT_FREE;/* initialize free slot */
131 		rp->p_magic = PMAGIC;
132 		rp->p_nr = i;			/* proc number from ptr */
133 		rp->p_endpoint = _ENDPOINT(0, rp->p_nr); /* generation no. 0 */
134 		rp->p_scheduler = NULL;		/* no user space scheduler */
135 		rp->p_priority = 0;		/* no priority */
136 		rp->p_quantum_size_ms = 0;	/* no quantum size */
137 
138 		/* arch-specific initialization */
139 		arch_proc_reset(rp);
140 	}
141 	for (sp = BEG_PRIV_ADDR, i = 0; sp < END_PRIV_ADDR; ++sp, ++i) {
142 		sp->s_proc_nr = NONE;		/* initialize as free */
143 		sp->s_id = (sys_id_t) i;	/* priv structure index */
144 		ppriv_addr[i] = sp;		/* priv ptr from number */
145 		sp->s_sig_mgr = NONE;		/* clear signal managers */
146 		sp->s_bak_sig_mgr = NONE;
147 	}
148 
149 	idle_priv.s_flags = IDL_F;
150 	/* initialize IDLE structures for every CPU */
151 	for (i = 0; i < CONFIG_MAX_CPUS; i++) {
152 		struct proc * ip = get_cpu_var_ptr(i, idle_proc);
153 		ip->p_endpoint = IDLE;
154 		ip->p_priv = &idle_priv;
155 		/* must not let idle ever get scheduled */
156 		ip->p_rts_flags |= RTS_PROC_STOP;
157 		set_idle_name(ip->p_name, i);
158 	}
159 }
160 
161 static void switch_address_space_idle(void)
162 {
163 #ifdef CONFIG_SMP
164 	/*
165 	 * currently we bet that VM is always alive and its pages available so
166 	 * when the CPU wakes up the kernel is mapped and no surprises happen.
167 	 * This is only a problem if more than 1 cpus are available
168 	 */
169 	switch_address_space(proc_addr(VM_PROC_NR));
170 #endif
171 }
172 
173 /*===========================================================================*
174  *				idle					     *
175  *===========================================================================*/
176 static void idle(void)
177 {
178 	struct proc * p;
179 
180 	/* This function is called whenever there is no work to do.
181 	 * Halt the CPU, and measure how many timestamp counter ticks are
182 	 * spent not doing anything. This allows test setups to measure
183 	 * the CPU utilization of certain workloads with high precision.
184 	 */
185 
186 	p = get_cpulocal_var(proc_ptr) = get_cpulocal_var_ptr(idle_proc);
187 	if (priv(p)->s_flags & BILLABLE)
188 		get_cpulocal_var(bill_ptr) = p;
189 
190 	switch_address_space_idle();
191 
192 #ifdef CONFIG_SMP
193 	get_cpulocal_var(cpu_is_idle) = 1;
194 	/* we don't need to keep time on APs as it is handled on the BSP */
195 	if (cpuid != bsp_cpu_id)
196 		stop_local_timer();
197 	else
198 #endif
199 	{
200 		/*
201 		 * If the timer has expired while in kernel we must
202 		 * rearm it before we go to sleep
203 		 */
204 		restart_local_timer();
205 	}
206 
207 	/* start accounting for the idle time */
208 	context_stop(proc_addr(KERNEL));
209 #if !SPROFILE
210 	halt_cpu();
211 #else
212 	if (!sprofiling)
213 		halt_cpu();
214 	else {
215 		volatile int * v;
216 
217 		v = get_cpulocal_var_ptr(idle_interrupted);
218 		interrupts_enable();
219 		while (!*v)
220 			arch_pause();
221 		interrupts_disable();
222 		*v = 0;
223 	}
224 #endif
225 	/*
226 	 * end of accounting for the idle task does not happen here, the kernel
227 	 * is handling stuff for quite a while before it gets back here!
228 	 */
229 }
230 
231 /*===========================================================================*
232  *                              vm_suspend                                *
233  *===========================================================================*/
234 void vm_suspend(struct proc *caller, const struct proc *target,
235         const vir_bytes linaddr, const vir_bytes len, const int type,
236         const int writeflag)
237 {
238         /* This range is not OK for this process. Set parameters
239          * of the request and notify VM about the pending request.
240          */
241         assert(!RTS_ISSET(caller, RTS_VMREQUEST));
242         assert(!RTS_ISSET(target, RTS_VMREQUEST));
243 
244         RTS_SET(caller, RTS_VMREQUEST);
245 
246         caller->p_vmrequest.req_type = VMPTYPE_CHECK;
247         caller->p_vmrequest.target = target->p_endpoint;
248         caller->p_vmrequest.params.check.start = linaddr;
249         caller->p_vmrequest.params.check.length = len;
250         caller->p_vmrequest.params.check.writeflag = writeflag;
251         caller->p_vmrequest.type = type;
252 
253         /* Connect caller on vmrequest wait queue. */
254         if(!(caller->p_vmrequest.nextrequestor = vmrequest))
255                 if(OK != send_sig(VM_PROC_NR, SIGKMEM))
256                         panic("send_sig failed");
257         vmrequest = caller;
258 }
259 
260 /*===========================================================================*
261  *                              delivermsg                                *
262  *===========================================================================*/
263 static void delivermsg(struct proc *rp)
264 {
265         assert(!RTS_ISSET(rp, RTS_VMREQUEST));
266         assert(rp->p_misc_flags & MF_DELIVERMSG);
267         assert(rp->p_delivermsg.m_source != NONE);
268 
269         if (copy_msg_to_user(&rp->p_delivermsg,
270                                 (message *) rp->p_delivermsg_vir)) {
271                 if(rp->p_misc_flags & MF_MSGFAILED) {
272                         /* 2nd consecutive failure means this won't succeed */
273                         printf("WARNING wrong user pointer 0x%08lx from "
274                                 "process %s / %d\n",
275                                 rp->p_delivermsg_vir,
276                                 rp->p_name,
277                                 rp->p_endpoint);
278                         cause_sig(rp->p_nr, SIGSEGV);
279                 } else {
280                         /* 1st failure means we have to ask VM to handle it */
281                         vm_suspend(rp, rp, rp->p_delivermsg_vir,
282                                 sizeof(message), VMSTYPE_DELIVERMSG, 1);
283                         rp->p_misc_flags |= MF_MSGFAILED;
284                 }
285         } else {
286                 /* Indicate message has been delivered; address is 'used'. */
287                 rp->p_delivermsg.m_source = NONE;
288                 rp->p_misc_flags &= ~(MF_DELIVERMSG|MF_MSGFAILED);
289 
290                 if(!(rp->p_misc_flags & MF_CONTEXT_SET)) {
291                         rp->p_reg.retreg = OK;
292                 }
293         }
294 }
295 
296 /*===========================================================================*
297  *				switch_to_user				     *
298  *===========================================================================*/
299 void switch_to_user(void)
300 {
301 	/* This function is called an instant before proc_ptr is
302 	 * to be scheduled again.
303 	 */
304 	struct proc * p;
305 #ifdef CONFIG_SMP
306 	int tlb_must_refresh = 0;
307 #endif
308 
309 	p = get_cpulocal_var(proc_ptr);
310 	/*
311 	 * if the current process is still runnable check the misc flags and let
312 	 * it run unless it becomes not runnable in the meantime
313 	 */
314 	if (proc_is_runnable(p))
315 		goto check_misc_flags;
316 	/*
317 	 * if a process becomes not runnable while handling the misc flags, we
318 	 * need to pick a new one here and start from scratch. Also if the
319 	 * current process wasn't runnable, we pick a new one here
320 	 */
321 not_runnable_pick_new:
322 	if (proc_is_preempted(p)) {
323 		p->p_rts_flags &= ~RTS_PREEMPTED;
324 		if (proc_is_runnable(p)) {
325 			if (p->p_cpu_time_left)
326 				enqueue_head(p);
327 			else
328 				enqueue(p);
329 		}
330 	}
331 
332 	/*
333 	 * if we have no process to run, set IDLE as the current process for
334 	 * time accounting and put the cpu in an idle state. After the next
335 	 * timer interrupt the execution resumes here and we can pick another
336 	 * process. If there is still nothing runnable we "schedule" IDLE again
337 	 */
338 	while (!(p = pick_proc())) {
339 		idle();
340 	}
341 
342 	/* update the global variable */
343 	get_cpulocal_var(proc_ptr) = p;
344 
345 #ifdef CONFIG_SMP
346 	if (p->p_misc_flags & MF_FLUSH_TLB && get_cpulocal_var(ptproc) == p)
347 		tlb_must_refresh = 1;
348 #endif
349 	switch_address_space(p);
350 
351 check_misc_flags:
352 
353 	assert(p);
354 	assert(proc_is_runnable(p));
355 	while (p->p_misc_flags &
356 		(MF_KCALL_RESUME | MF_DELIVERMSG |
357 		 MF_SC_DEFER | MF_SC_TRACE | MF_SC_ACTIVE)) {
358 
359 		assert(proc_is_runnable(p));
360 		if (p->p_misc_flags & MF_KCALL_RESUME) {
361 			kernel_call_resume(p);
362 		}
363 		else if (p->p_misc_flags & MF_DELIVERMSG) {
364 			TRACE(VF_SCHEDULING, printf("delivering to %s / %d\n",
365 				p->p_name, p->p_endpoint););
366 			delivermsg(p);
367 		}
368 		else if (p->p_misc_flags & MF_SC_DEFER) {
369 			/* Perform the system call that we deferred earlier. */
370 
371 			assert (!(p->p_misc_flags & MF_SC_ACTIVE));
372 
373 			arch_do_syscall(p);
374 
375 			/* If the process is stopped for signal delivery, and
376 			 * not blocked sending a message after the system call,
377 			 * inform PM.
378 			 */
379 			if ((p->p_misc_flags & MF_SIG_DELAY) &&
380 					!RTS_ISSET(p, RTS_SENDING))
381 				sig_delay_done(p);
382 		}
383 		else if (p->p_misc_flags & MF_SC_TRACE) {
384 			/* Trigger a system call leave event if this was a
385 			 * system call. We must do this after processing the
386 			 * other flags above, both for tracing correctness and
387 			 * to be able to use 'break'.
388 			 */
389 			if (!(p->p_misc_flags & MF_SC_ACTIVE))
390 				break;
391 
392 			p->p_misc_flags &=
393 				~(MF_SC_TRACE | MF_SC_ACTIVE);
394 
395 			/* Signal the "leave system call" event.
396 			 * Block the process.
397 			 */
398 			cause_sig(proc_nr(p), SIGTRAP);
399 		}
400 		else if (p->p_misc_flags & MF_SC_ACTIVE) {
401 			/* If MF_SC_ACTIVE was set, remove it now:
402 			 * we're leaving the system call.
403 			 */
404 			p->p_misc_flags &= ~MF_SC_ACTIVE;
405 
406 			break;
407 		}
408 
409 		/*
410 		 * the selected process might not be runnable anymore. We have
411 		 * to checkit and schedule another one
412 		 */
413 		if (!proc_is_runnable(p))
414 			goto not_runnable_pick_new;
415 	}
416 	/*
417 	 * check the quantum left before it runs again. We must do it only here
418 	 * as we are sure that a possible out-of-quantum message to the
419 	 * scheduler will not collide with the regular ipc
420 	 */
421 	if (!p->p_cpu_time_left)
422 		proc_no_time(p);
423 	/*
424 	 * After handling the misc flags the selected process might not be
425 	 * runnable anymore. We have to checkit and schedule another one
426 	 */
427 	if (!proc_is_runnable(p))
428 		goto not_runnable_pick_new;
429 
430 	TRACE(VF_SCHEDULING, printf("cpu %d starting %s / %d "
431 				"pc 0x%08x\n",
432 		cpuid, p->p_name, p->p_endpoint, p->p_reg.pc););
433 #if DEBUG_TRACE
434 	p->p_schedules++;
435 #endif
436 
437 	p = arch_finish_switch_to_user();
438 	assert(p->p_cpu_time_left);
439 
440 	context_stop(proc_addr(KERNEL));
441 
442 	/* If the process isn't the owner of FPU, enable the FPU exception */
443 	if (get_cpulocal_var(fpu_owner) != p)
444 		enable_fpu_exception();
445 	else
446 		disable_fpu_exception();
447 
448 	/* If MF_CONTEXT_SET is set, don't clobber process state within
449 	 * the kernel. The next kernel entry is OK again though.
450 	 */
451 	p->p_misc_flags &= ~MF_CONTEXT_SET;
452 
453 #if defined(__i386__)
454   	assert(p->p_seg.p_cr3 != 0);
455 #elif defined(__arm__)
456 	assert(p->p_seg.p_ttbr != 0);
457 #endif
458 #ifdef CONFIG_SMP
459 	if (p->p_misc_flags & MF_FLUSH_TLB) {
460 		if (tlb_must_refresh)
461 			refresh_tlb();
462 		p->p_misc_flags &= ~MF_FLUSH_TLB;
463 	}
464 #endif
465 
466 	restart_local_timer();
467 
468 	/*
469 	 * restore_user_context() carries out the actual mode switch from kernel
470 	 * to userspace. This function does not return
471 	 */
472 	restore_user_context(p);
473 	NOT_REACHABLE;
474 }
475 
476 /*
477  * handler for all synchronous IPC calls
478  */
479 static int do_sync_ipc(struct proc * caller_ptr, /* who made the call */
480 			int call_nr,	/* system call number and flags */
481 			endpoint_t src_dst_e,	/* src or dst of the call */
482 			message *m_ptr)	/* users pointer to a message */
483 {
484   int result;					/* the system call's result */
485   int src_dst_p;				/* Process slot number */
486   char *callname;
487 
488   /* Check destination. RECEIVE is the only call that accepts ANY (in addition
489    * to a real endpoint). The other calls (SEND, SENDREC, and NOTIFY) require an
490    * endpoint to corresponds to a process. In addition, it is necessary to check
491    * whether a process is allowed to send to a given destination.
492    */
493   assert(call_nr != SENDA);
494 
495   /* Only allow non-negative call_nr values less than 32 */
496   if (call_nr < 0 || call_nr > IPCNO_HIGHEST || call_nr >= 32
497       || !(callname = ipc_call_names[call_nr])) {
498 #if DEBUG_ENABLE_IPC_WARNINGS
499       printf("sys_call: trap %d not allowed, caller %d, src_dst %d\n",
500           call_nr, proc_nr(caller_ptr), src_dst_e);
501 #endif
502 	return(ETRAPDENIED);		/* trap denied by mask or kernel */
503   }
504 
505   if (src_dst_e == ANY)
506   {
507 	if (call_nr != RECEIVE)
508 	{
509 #if 0
510 		printf("sys_call: %s by %d with bad endpoint %d\n",
511 			callname,
512 			proc_nr(caller_ptr), src_dst_e);
513 #endif
514 		return EINVAL;
515 	}
516 	src_dst_p = (int) src_dst_e;
517   }
518   else
519   {
520 	/* Require a valid source and/or destination process. */
521 	if(!isokendpt(src_dst_e, &src_dst_p)) {
522 #if 0
523 		printf("sys_call: %s by %d with bad endpoint %d\n",
524 			callname,
525 			proc_nr(caller_ptr), src_dst_e);
526 #endif
527 		return EDEADSRCDST;
528 	}
529 
530 	/* If the call is to send to a process, i.e., for SEND, SENDNB,
531 	 * SENDREC or NOTIFY, verify that the caller is allowed to send to
532 	 * the given destination.
533 	 */
534 	if (call_nr != RECEIVE)
535 	{
536 		if (!may_send_to(caller_ptr, src_dst_p)) {
537 #if DEBUG_ENABLE_IPC_WARNINGS
538 			printf(
539 			"sys_call: ipc mask denied %s from %d to %d\n",
540 				callname,
541 				caller_ptr->p_endpoint, src_dst_e);
542 #endif
543 			return(ECALLDENIED);	/* call denied by ipc mask */
544 		}
545 	}
546   }
547 
548   /* Check if the process has privileges for the requested call. Calls to the
549    * kernel may only be SENDREC, because tasks always reply and may not block
550    * if the caller doesn't do receive().
551    */
552   if (!(priv(caller_ptr)->s_trap_mask & (1 << call_nr))) {
553 #if DEBUG_ENABLE_IPC_WARNINGS
554       printf("sys_call: %s not allowed, caller %d, src_dst %d\n",
555           callname, proc_nr(caller_ptr), src_dst_p);
556 #endif
557 	return(ETRAPDENIED);		/* trap denied by mask or kernel */
558   }
559 
560   if (call_nr != SENDREC && call_nr != RECEIVE && iskerneln(src_dst_p)) {
561 #if DEBUG_ENABLE_IPC_WARNINGS
562       printf("sys_call: trap %s not allowed, caller %d, src_dst %d\n",
563            callname, proc_nr(caller_ptr), src_dst_e);
564 #endif
565 	return(ETRAPDENIED);		/* trap denied by mask or kernel */
566   }
567 
568   switch(call_nr) {
569   case SENDREC:
570 	/* A flag is set so that notifications cannot interrupt SENDREC. */
571 	caller_ptr->p_misc_flags |= MF_REPLY_PEND;
572 	/* fall through */
573   case SEND:
574 	result = mini_send(caller_ptr, src_dst_e, m_ptr, 0);
575 	if (call_nr == SEND || result != OK)
576 		break;				/* done, or SEND failed */
577 	/* fall through for SENDREC */
578   case RECEIVE:
579 	if (call_nr == RECEIVE) {
580 		caller_ptr->p_misc_flags &= ~MF_REPLY_PEND;
581 		IPC_STATUS_CLEAR(caller_ptr);  /* clear IPC status code */
582 	}
583 	result = mini_receive(caller_ptr, src_dst_e, m_ptr, 0);
584 	break;
585   case NOTIFY:
586 	result = mini_notify(caller_ptr, src_dst_e);
587 	break;
588   case SENDNB:
589         result = mini_send(caller_ptr, src_dst_e, m_ptr, NON_BLOCKING);
590         break;
591   default:
592 	result = EBADCALL;			/* illegal system call */
593   }
594 
595   /* Now, return the result of the system call to the caller. */
596   return(result);
597 }
598 
599 int do_ipc(reg_t r1, reg_t r2, reg_t r3)
600 {
601   struct proc *const caller_ptr = get_cpulocal_var(proc_ptr);	/* get pointer to caller */
602   int call_nr = (int) r1;
603 
604   assert(!RTS_ISSET(caller_ptr, RTS_SLOT_FREE));
605 
606   /* bill kernel time to this process. */
607   kbill_ipc = caller_ptr;
608 
609   /* If this process is subject to system call tracing, handle that first. */
610   if (caller_ptr->p_misc_flags & (MF_SC_TRACE | MF_SC_DEFER)) {
611 	/* Are we tracing this process, and is it the first sys_call entry? */
612 	if ((caller_ptr->p_misc_flags & (MF_SC_TRACE | MF_SC_DEFER)) ==
613 							MF_SC_TRACE) {
614 		/* We must notify the tracer before processing the actual
615 		 * system call. If we don't, the tracer could not obtain the
616 		 * input message. Postpone the entire system call.
617 		 */
618 		caller_ptr->p_misc_flags &= ~MF_SC_TRACE;
619 		assert(!(caller_ptr->p_misc_flags & MF_SC_DEFER));
620 		caller_ptr->p_misc_flags |= MF_SC_DEFER;
621 		caller_ptr->p_defer.r1 = r1;
622 		caller_ptr->p_defer.r2 = r2;
623 		caller_ptr->p_defer.r3 = r3;
624 
625 		/* Signal the "enter system call" event. Block the process. */
626 		cause_sig(proc_nr(caller_ptr), SIGTRAP);
627 
628 		/* Preserve the return register's value. */
629 		return caller_ptr->p_reg.retreg;
630 	}
631 
632 	/* If the MF_SC_DEFER flag is set, the syscall is now being resumed. */
633 	caller_ptr->p_misc_flags &= ~MF_SC_DEFER;
634 
635 	assert (!(caller_ptr->p_misc_flags & MF_SC_ACTIVE));
636 
637 	/* Set a flag to allow reliable tracing of leaving the system call. */
638 	caller_ptr->p_misc_flags |= MF_SC_ACTIVE;
639   }
640 
641   if(caller_ptr->p_misc_flags & MF_DELIVERMSG) {
642 	panic("sys_call: MF_DELIVERMSG on for %s / %d\n",
643 		caller_ptr->p_name, caller_ptr->p_endpoint);
644   }
645 
646   /* Now check if the call is known and try to perform the request. The only
647    * system calls that exist in MINIX are sending and receiving messages.
648    *   - SENDREC: combines SEND and RECEIVE in a single system call
649    *   - SEND:    sender blocks until its message has been delivered
650    *   - RECEIVE: receiver blocks until an acceptable message has arrived
651    *   - NOTIFY:  asynchronous call; deliver notification or mark pending
652    *   - SENDA:   list of asynchronous send requests
653    */
654   switch(call_nr) {
655   	case SENDREC:
656   	case SEND:
657   	case RECEIVE:
658   	case NOTIFY:
659   	case SENDNB:
660   	{
661   	    /* Process accounting for scheduling */
662 	    caller_ptr->p_accounting.ipc_sync++;
663 
664   	    return do_sync_ipc(caller_ptr, call_nr, (endpoint_t) r2,
665 			    (message *) r3);
666   	}
667   	case SENDA:
668   	{
669  	    /*
670   	     * Get and check the size of the argument in bytes as it is a
671   	     * table
672   	     */
673   	    size_t msg_size = (size_t) r2;
674 
675   	    /* Process accounting for scheduling */
676 	    caller_ptr->p_accounting.ipc_async++;
677 
678   	    /* Limit size to something reasonable. An arbitrary choice is 16
679   	     * times the number of process table entries.
680   	     */
681   	    if (msg_size > 16*(NR_TASKS + NR_PROCS))
682 	        return EDOM;
683   	    return mini_senda(caller_ptr, (asynmsg_t *) r3, msg_size);
684   	}
685   	case MINIX_KERNINFO:
686 	{
687 		/* It might not be initialized yet. */
688 	  	if(!minix_kerninfo_user) {
689 			return EBADCALL;
690 		}
691 
692   		arch_set_secondary_ipc_return(caller_ptr, minix_kerninfo_user);
693   		return OK;
694 	}
695   	default:
696 	return EBADCALL;		/* illegal system call */
697   }
698 }
699 
700 /*===========================================================================*
701  *				deadlock				     *
702  *===========================================================================*/
703 static int deadlock(
704   int function,				/* trap number */
705   register struct proc *cp,		/* pointer to caller */
706   endpoint_t src_dst_e			/* src or dst process */
707 )
708 {
709 /* Check for deadlock. This can happen if 'caller_ptr' and 'src_dst' have
710  * a cyclic dependency of blocking send and receive calls. The only cyclic
711  * dependency that is not fatal is if the caller and target directly SEND(REC)
712  * and RECEIVE to each other. If a deadlock is found, the group size is
713  * returned. Otherwise zero is returned.
714  */
715   register struct proc *xp;			/* process pointer */
716   int group_size = 1;				/* start with only caller */
717 #if DEBUG_ENABLE_IPC_WARNINGS
718   static struct proc *processes[NR_PROCS + NR_TASKS];
719   processes[0] = cp;
720 #endif
721 
722   while (src_dst_e != ANY) { 			/* check while process nr */
723       int src_dst_slot;
724       okendpt(src_dst_e, &src_dst_slot);
725       xp = proc_addr(src_dst_slot);		/* follow chain of processes */
726       assert(proc_ptr_ok(xp));
727       assert(!RTS_ISSET(xp, RTS_SLOT_FREE));
728 #if DEBUG_ENABLE_IPC_WARNINGS
729       processes[group_size] = xp;
730 #endif
731       group_size ++;				/* extra process in group */
732 
733       /* Check whether the last process in the chain has a dependency. If it
734        * has not, the cycle cannot be closed and we are done.
735        */
736       if((src_dst_e = P_BLOCKEDON(xp)) == NONE)
737 	return 0;
738 
739       /* Now check if there is a cyclic dependency. For group sizes of two,
740        * a combination of SEND(REC) and RECEIVE is not fatal. Larger groups
741        * or other combinations indicate a deadlock.
742        */
743       if (src_dst_e == cp->p_endpoint) {	/* possible deadlock */
744 	  if (group_size == 2) {		/* caller and src_dst */
745 	      /* The function number is magically converted to flags. */
746 	      if ((xp->p_rts_flags ^ (function << 2)) & RTS_SENDING) {
747 	          return(0);			/* not a deadlock */
748 	      }
749 	  }
750 #if DEBUG_ENABLE_IPC_WARNINGS
751 	  {
752 		int i;
753 		printf("deadlock between these processes:\n");
754 		for(i = 0; i < group_size; i++) {
755 			printf(" %10s ", processes[i]->p_name);
756 		}
757 		printf("\n\n");
758 		for(i = 0; i < group_size; i++) {
759 			print_proc(processes[i]);
760 			proc_stacktrace(processes[i]);
761 		}
762 	  }
763 #endif
764           return(group_size);			/* deadlock found */
765       }
766   }
767   return(0);					/* not a deadlock */
768 }
769 
770 /*===========================================================================*
771  *				has_pending				     *
772  *===========================================================================*/
773 static int has_pending(sys_map_t *map, int src_p, int asynm)
774 {
775 /* Check to see if there is a pending message from the desired source
776  * available.
777  */
778 
779   int src_id;
780   sys_id_t id = NULL_PRIV_ID;
781 #ifdef CONFIG_SMP
782   struct proc * p;
783 #endif
784 
785   /* Either check a specific bit in the mask map, or find the first bit set in
786    * it (if any), depending on whether the receive was called on a specific
787    * source endpoint.
788    */
789   if (src_p != ANY) {
790 	src_id = nr_to_id(src_p);
791 	if (get_sys_bit(*map, src_id)) {
792 #ifdef CONFIG_SMP
793 		p = proc_addr(id_to_nr(src_id));
794 		if (asynm && RTS_ISSET(p, RTS_VMINHIBIT))
795 			p->p_misc_flags |= MF_SENDA_VM_MISS;
796 		else
797 #endif
798 			id = src_id;
799 	}
800   } else {
801 	/* Find a source with a pending message */
802 	for (src_id = 0; src_id < NR_SYS_PROCS; src_id += BITCHUNK_BITS) {
803 		if (get_sys_bits(*map, src_id) != 0) {
804 #ifdef CONFIG_SMP
805 			while (src_id < NR_SYS_PROCS) {
806 				while (!get_sys_bit(*map, src_id)) {
807 					if (src_id == NR_SYS_PROCS)
808 						goto quit_search;
809 					src_id++;
810 				}
811 				p = proc_addr(id_to_nr(src_id));
812 				/*
813 				 * We must not let kernel fiddle with pages of a
814 				 * process which are currently being changed by
815 				 * VM.  It is dangerous! So do not report such a
816 				 * process as having pending async messages.
817 				 * Skip it.
818 				 */
819 				if (asynm && RTS_ISSET(p, RTS_VMINHIBIT)) {
820 					p->p_misc_flags |= MF_SENDA_VM_MISS;
821 					src_id++;
822 				} else
823 					goto quit_search;
824 			}
825 #else
826 			while (!get_sys_bit(*map, src_id)) src_id++;
827 			goto quit_search;
828 #endif
829 		}
830 	}
831 
832 quit_search:
833 	if (src_id < NR_SYS_PROCS)	/* Found one */
834 		id = src_id;
835   }
836 
837   return(id);
838 }
839 
840 /*===========================================================================*
841  *				has_pending_notify			     *
842  *===========================================================================*/
843 int has_pending_notify(struct proc * caller, int src_p)
844 {
845 	sys_map_t * map = &priv(caller)->s_notify_pending;
846 	return has_pending(map, src_p, 0);
847 }
848 
849 /*===========================================================================*
850  *				has_pending_asend			     *
851  *===========================================================================*/
852 int has_pending_asend(struct proc * caller, int src_p)
853 {
854 	sys_map_t * map = &priv(caller)->s_asyn_pending;
855 	return has_pending(map, src_p, 1);
856 }
857 
858 /*===========================================================================*
859  *				unset_notify_pending			     *
860  *===========================================================================*/
861 void unset_notify_pending(struct proc * caller, int src_p)
862 {
863 	sys_map_t * map = &priv(caller)->s_notify_pending;
864 	unset_sys_bit(*map, src_p);
865 }
866 
867 /*===========================================================================*
868  *				mini_send				     *
869  *===========================================================================*/
870 int mini_send(
871   register struct proc *caller_ptr,	/* who is trying to send a message? */
872   endpoint_t dst_e,			/* to whom is message being sent? */
873   message *m_ptr,			/* pointer to message buffer */
874   const int flags
875 )
876 {
877 /* Send a message from 'caller_ptr' to 'dst'. If 'dst' is blocked waiting
878  * for this message, copy the message to it and unblock 'dst'. If 'dst' is
879  * not waiting at all, or is waiting for another source, queue 'caller_ptr'.
880  */
881   register struct proc *dst_ptr;
882   register struct proc **xpp;
883   int dst_p;
884   dst_p = _ENDPOINT_P(dst_e);
885   dst_ptr = proc_addr(dst_p);
886 
887   if (RTS_ISSET(dst_ptr, RTS_NO_ENDPOINT))
888   {
889 	return EDEADSRCDST;
890   }
891 
892   /* Check if 'dst' is blocked waiting for this message. The destination's
893    * RTS_SENDING flag may be set when its SENDREC call blocked while sending.
894    */
895   if (WILLRECEIVE(caller_ptr->p_endpoint, dst_ptr, (vir_bytes)m_ptr, NULL)) {
896 	int call;
897 	/* Destination is indeed waiting for this message. */
898 	assert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG));
899 
900 	if (!(flags & FROM_KERNEL)) {
901 		if(copy_msg_from_user(m_ptr, &dst_ptr->p_delivermsg))
902 			return EFAULT;
903 	} else {
904 		dst_ptr->p_delivermsg = *m_ptr;
905 		IPC_STATUS_ADD_FLAGS(dst_ptr, IPC_FLG_MSG_FROM_KERNEL);
906 	}
907 
908 	dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
909 	dst_ptr->p_misc_flags |= MF_DELIVERMSG;
910 
911 	call = (caller_ptr->p_misc_flags & MF_REPLY_PEND ? SENDREC
912 		: (flags & NON_BLOCKING ? SENDNB : SEND));
913 	IPC_STATUS_ADD_CALL(dst_ptr, call);
914 
915 	if (dst_ptr->p_misc_flags & MF_REPLY_PEND)
916 		dst_ptr->p_misc_flags &= ~MF_REPLY_PEND;
917 
918 	RTS_UNSET(dst_ptr, RTS_RECEIVING);
919 
920 #if DEBUG_IPC_HOOK
921 	hook_ipc_msgsend(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
922 	hook_ipc_msgrecv(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
923 #endif
924   } else {
925 	if(flags & NON_BLOCKING) {
926 		return(ENOTREADY);
927 	}
928 
929 	/* Check for a possible deadlock before actually blocking. */
930 	if (deadlock(SEND, caller_ptr, dst_e)) {
931 		return(ELOCKED);
932 	}
933 
934 	/* Destination is not waiting.  Block and dequeue caller. */
935 	if (!(flags & FROM_KERNEL)) {
936 		if(copy_msg_from_user(m_ptr, &caller_ptr->p_sendmsg))
937 			return EFAULT;
938 	} else {
939 		caller_ptr->p_sendmsg = *m_ptr;
940 		/*
941 		 * we need to remember that this message is from kernel so we
942 		 * can set the delivery status flags when the message is
943 		 * actually delivered
944 		 */
945 		caller_ptr->p_misc_flags |= MF_SENDING_FROM_KERNEL;
946 	}
947 
948 	RTS_SET(caller_ptr, RTS_SENDING);
949 	caller_ptr->p_sendto_e = dst_e;
950 
951 	/* Process is now blocked.  Put in on the destination's queue. */
952 	assert(caller_ptr->p_q_link == NULL);
953 	xpp = &dst_ptr->p_caller_q;		/* find end of list */
954 	while (*xpp) xpp = &(*xpp)->p_q_link;
955 	*xpp = caller_ptr;			/* add caller to end */
956 
957 #if DEBUG_IPC_HOOK
958 	hook_ipc_msgsend(&caller_ptr->p_sendmsg, caller_ptr, dst_ptr);
959 #endif
960   }
961   return(OK);
962 }
963 
964 /*===========================================================================*
965  *				mini_receive				     *
966  *===========================================================================*/
967 static int mini_receive(struct proc * caller_ptr,
968 			endpoint_t src_e, /* which message source is wanted */
969 			message * m_buff_usr, /* pointer to message buffer */
970 			const int flags)
971 {
972 /* A process or task wants to get a message.  If a message is already queued,
973  * acquire it and deblock the sender.  If no message from the desired source
974  * is available block the caller.
975  */
976   register struct proc **xpp;
977   int r, src_id, found, src_proc_nr, src_p;
978   endpoint_t sender_e;
979 
980   assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
981 
982   /* This is where we want our message. */
983   caller_ptr->p_delivermsg_vir = (vir_bytes) m_buff_usr;
984 
985   if(src_e == ANY) src_p = ANY;
986   else
987   {
988 	okendpt(src_e, &src_p);
989 	if (RTS_ISSET(proc_addr(src_p), RTS_NO_ENDPOINT))
990 	{
991 		return EDEADSRCDST;
992 	}
993   }
994 
995 
996   /* Check to see if a message from desired source is already available.  The
997    * caller's RTS_SENDING flag may be set if SENDREC couldn't send. If it is
998    * set, the process should be blocked.
999    */
1000   if (!RTS_ISSET(caller_ptr, RTS_SENDING)) {
1001 
1002     /* Check if there are pending notifications, except for SENDREC. */
1003     if (! (caller_ptr->p_misc_flags & MF_REPLY_PEND)) {
1004 
1005 	/* Check for pending notifications */
1006         src_id = has_pending_notify(caller_ptr, src_p);
1007         found = src_id != NULL_PRIV_ID;
1008         if(found) {
1009             src_proc_nr = id_to_nr(src_id);		/* get source proc */
1010             sender_e = proc_addr(src_proc_nr)->p_endpoint;
1011         }
1012 
1013         if (found && CANRECEIVE(src_e, sender_e, caller_ptr, 0,
1014           &m_notify_buff)) {
1015 
1016 #if DEBUG_ENABLE_IPC_WARNINGS
1017 	    if(src_proc_nr == NONE) {
1018 		printf("mini_receive: sending notify from NONE\n");
1019 	    }
1020 #endif
1021 	    assert(src_proc_nr != NONE);
1022             unset_notify_pending(caller_ptr, src_id);	/* no longer pending */
1023 
1024             /* Found a suitable source, deliver the notification message. */
1025 	    assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
1026 	    assert(src_e == ANY || sender_e == src_e);
1027 
1028 	    /* assemble message */
1029 	    BuildNotifyMessage(&caller_ptr->p_delivermsg, src_proc_nr, caller_ptr);
1030 	    caller_ptr->p_delivermsg.m_source = sender_e;
1031 	    caller_ptr->p_misc_flags |= MF_DELIVERMSG;
1032 
1033 	    IPC_STATUS_ADD_CALL(caller_ptr, NOTIFY);
1034 
1035 	    goto receive_done;
1036         }
1037     }
1038 
1039     /* Check for pending asynchronous messages */
1040     if (has_pending_asend(caller_ptr, src_p) != NULL_PRIV_ID) {
1041         if (src_p != ANY)
1042 		r = try_one(src_e, proc_addr(src_p), caller_ptr);
1043         else
1044         	r = try_async(caller_ptr);
1045 
1046 	if (r == OK) {
1047             IPC_STATUS_ADD_CALL(caller_ptr, SENDA);
1048             goto receive_done;
1049         }
1050     }
1051 
1052     /* Check caller queue. Use pointer pointers to keep code simple. */
1053     xpp = &caller_ptr->p_caller_q;
1054     while (*xpp) {
1055 	struct proc * sender = *xpp;
1056 	endpoint_t sender_e = sender->p_endpoint;
1057 
1058         if (CANRECEIVE(src_e, sender_e, caller_ptr, 0, &sender->p_sendmsg)) {
1059             int call;
1060 	    assert(!RTS_ISSET(sender, RTS_SLOT_FREE));
1061 	    assert(!RTS_ISSET(sender, RTS_NO_ENDPOINT));
1062 
1063 	    /* Found acceptable message. Copy it and update status. */
1064   	    assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
1065 	    caller_ptr->p_delivermsg = sender->p_sendmsg;
1066 	    caller_ptr->p_delivermsg.m_source = sender->p_endpoint;
1067 	    caller_ptr->p_misc_flags |= MF_DELIVERMSG;
1068 	    RTS_UNSET(sender, RTS_SENDING);
1069 
1070 	    call = (sender->p_misc_flags & MF_REPLY_PEND ? SENDREC : SEND);
1071 	    IPC_STATUS_ADD_CALL(caller_ptr, call);
1072 
1073 	    /*
1074 	     * if the message is originally from the kernel on behalf of this
1075 	     * process, we must send the status flags accordingly
1076 	     */
1077 	    if (sender->p_misc_flags & MF_SENDING_FROM_KERNEL) {
1078 		IPC_STATUS_ADD_FLAGS(caller_ptr, IPC_FLG_MSG_FROM_KERNEL);
1079 		/* we can clean the flag now, not need anymore */
1080 		sender->p_misc_flags &= ~MF_SENDING_FROM_KERNEL;
1081 	    }
1082 	    if (sender->p_misc_flags & MF_SIG_DELAY)
1083 		sig_delay_done(sender);
1084 
1085 #if DEBUG_IPC_HOOK
1086             hook_ipc_msgrecv(&caller_ptr->p_delivermsg, *xpp, caller_ptr);
1087 #endif
1088 
1089             *xpp = sender->p_q_link;		/* remove from queue */
1090 	    sender->p_q_link = NULL;
1091 	    goto receive_done;
1092 	}
1093 	xpp = &sender->p_q_link;		/* proceed to next */
1094     }
1095   }
1096 
1097   /* No suitable message is available or the caller couldn't send in SENDREC.
1098    * Block the process trying to receive, unless the flags tell otherwise.
1099    */
1100   if ( ! (flags & NON_BLOCKING)) {
1101       /* Check for a possible deadlock before actually blocking. */
1102       if (deadlock(RECEIVE, caller_ptr, src_e)) {
1103           return(ELOCKED);
1104       }
1105 
1106       caller_ptr->p_getfrom_e = src_e;
1107       RTS_SET(caller_ptr, RTS_RECEIVING);
1108       return(OK);
1109   } else {
1110 	return(ENOTREADY);
1111   }
1112 
1113 receive_done:
1114   if (caller_ptr->p_misc_flags & MF_REPLY_PEND)
1115 	  caller_ptr->p_misc_flags &= ~MF_REPLY_PEND;
1116   return OK;
1117 }
1118 
1119 /*===========================================================================*
1120  *				mini_notify				     *
1121  *===========================================================================*/
1122 int mini_notify(
1123   const struct proc *caller_ptr,	/* sender of the notification */
1124   endpoint_t dst_e			/* which process to notify */
1125 )
1126 {
1127   register struct proc *dst_ptr;
1128   int src_id;				/* source id for late delivery */
1129   int dst_p;
1130 
1131   if (!isokendpt(dst_e, &dst_p)) {
1132 	util_stacktrace();
1133 	printf("mini_notify: bogus endpoint %d\n", dst_e);
1134 	return EDEADSRCDST;
1135   }
1136 
1137   dst_ptr = proc_addr(dst_p);
1138 
1139   /* Check to see if target is blocked waiting for this message. A process
1140    * can be both sending and receiving during a SENDREC system call.
1141    */
1142   if (WILLRECEIVE(caller_ptr->p_endpoint, dst_ptr, 0, &m_notify_buff) &&
1143     !(dst_ptr->p_misc_flags & MF_REPLY_PEND)) {
1144       /* Destination is indeed waiting for a message. Assemble a notification
1145        * message and deliver it. Copy from pseudo-source HARDWARE, since the
1146        * message is in the kernel's address space.
1147        */
1148       assert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG));
1149 
1150       BuildNotifyMessage(&dst_ptr->p_delivermsg, proc_nr(caller_ptr), dst_ptr);
1151       dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
1152       dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1153 
1154       IPC_STATUS_ADD_CALL(dst_ptr, NOTIFY);
1155       RTS_UNSET(dst_ptr, RTS_RECEIVING);
1156 
1157       return(OK);
1158   }
1159 
1160   /* Destination is not ready to receive the notification. Add it to the
1161    * bit map with pending notifications. Note the indirectness: the privilege id
1162    * instead of the process number is used in the pending bit map.
1163    */
1164   src_id = priv(caller_ptr)->s_id;
1165   set_sys_bit(priv(dst_ptr)->s_notify_pending, src_id);
1166   return(OK);
1167 }
1168 
1169 #define ASCOMPLAIN(caller, entry, field)	\
1170 	printf("kernel:%s:%d: asyn failed for %s in %s "	\
1171 	"(%d/%zu, tab 0x%lx)\n",__FILE__,__LINE__,	\
1172 field, caller->p_name, entry, priv(caller)->s_asynsize, priv(caller)->s_asyntab)
1173 
1174 #define A_RETR(entry) do {			\
1175   if (data_copy(				\
1176   		caller_ptr->p_endpoint, table_v + (entry)*sizeof(asynmsg_t),\
1177   		KERNEL, (vir_bytes) &tabent,	\
1178   		sizeof(tabent)) != OK) {	\
1179   			ASCOMPLAIN(caller_ptr, entry, "message entry");	\
1180   			r = EFAULT;		\
1181 	                goto asyn_error; \
1182   }						\
1183   else if(tabent.dst == SELF) { \
1184       tabent.dst = caller_ptr->p_endpoint; \
1185   } \
1186   			 } while(0)
1187 
1188 #define A_INSRT(entry) do {			\
1189   if (data_copy(KERNEL, (vir_bytes) &tabent,	\
1190   		caller_ptr->p_endpoint, table_v + (entry)*sizeof(asynmsg_t),\
1191   		sizeof(tabent)) != OK) {	\
1192   			ASCOMPLAIN(caller_ptr, entry, "message entry");	\
1193 			/* Do NOT set r or goto asyn_error here! */ \
1194   }						\
1195   			  } while(0)
1196 
1197 /*===========================================================================*
1198  *				try_deliver_senda			     *
1199  *===========================================================================*/
1200 int try_deliver_senda(struct proc *caller_ptr,
1201 				asynmsg_t *table,
1202 				size_t size)
1203 {
1204   int r, dst_p, done, do_notify;
1205   unsigned int i;
1206   unsigned flags;
1207   endpoint_t dst;
1208   struct proc *dst_ptr;
1209   struct priv *privp;
1210   asynmsg_t tabent;
1211   const vir_bytes table_v = (vir_bytes) table;
1212   message *m_ptr = NULL;
1213 
1214   privp = priv(caller_ptr);
1215 
1216   /* Clear table */
1217   privp->s_asyntab = -1;
1218   privp->s_asynsize = 0;
1219   privp->s_asynendpoint = caller_ptr->p_endpoint;
1220 
1221   if (size == 0) return(OK);  /* Nothing to do, just return */
1222 
1223   /* Scan the table */
1224   do_notify = FALSE;
1225   done = TRUE;
1226 
1227   /* Limit size to something reasonable. An arbitrary choice is 16
1228    * times the number of process table entries.
1229    *
1230    * (this check has been duplicated in sys_call but is left here
1231    * as a sanity check)
1232    */
1233   if (size > 16*(NR_TASKS + NR_PROCS)) {
1234     r = EDOM;
1235     return r;
1236   }
1237 
1238   for (i = 0; i < size; i++) {
1239 	/* Process each entry in the table and store the result in the table.
1240 	 * If we're done handling a message, copy the result to the sender. */
1241 
1242 	dst = NONE;
1243 	/* Copy message to kernel */
1244 	A_RETR(i);
1245 	flags = tabent.flags;
1246 	dst = tabent.dst;
1247 
1248 	if (flags == 0) continue; /* Skip empty entries */
1249 
1250 	/* 'flags' field must contain only valid bits */
1251 	if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR)) {
1252 		r = EINVAL;
1253 		goto asyn_error;
1254 	}
1255 	if (!(flags & AMF_VALID)) { /* Must contain message */
1256 		r = EINVAL;
1257 		goto asyn_error;
1258 	}
1259 	if (flags & AMF_DONE) continue;	/* Already done processing */
1260 
1261 	r = OK;
1262 	if (!isokendpt(tabent.dst, &dst_p))
1263 		r = EDEADSRCDST; /* Bad destination, report the error */
1264 	else if (iskerneln(dst_p))
1265 		r = ECALLDENIED; /* Asyn sends to the kernel are not allowed */
1266 	else if (!may_asynsend_to(caller_ptr, dst_p))
1267 		r = ECALLDENIED; /* Send denied by IPC mask */
1268 	else 	/* r == OK */
1269 		dst_ptr = proc_addr(dst_p);
1270 
1271 	/* XXX: RTS_NO_ENDPOINT should be removed */
1272 	if (r == OK && RTS_ISSET(dst_ptr, RTS_NO_ENDPOINT)) {
1273 		r = EDEADSRCDST;
1274 	}
1275 
1276 	/* Check if 'dst' is blocked waiting for this message.
1277 	 * If AMF_NOREPLY is set, do not satisfy the receiving part of
1278 	 * a SENDREC.
1279 	 */
1280 	if (r == OK && WILLRECEIVE(caller_ptr->p_endpoint, dst_ptr,
1281 	    (vir_bytes)&table[i].msg, NULL) &&
1282 	    (!(flags&AMF_NOREPLY) || !(dst_ptr->p_misc_flags&MF_REPLY_PEND))) {
1283 		/* Destination is indeed waiting for this message. */
1284 		dst_ptr->p_delivermsg = tabent.msg;
1285 		dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint;
1286 		dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1287 		IPC_STATUS_ADD_CALL(dst_ptr, SENDA);
1288 		RTS_UNSET(dst_ptr, RTS_RECEIVING);
1289 #if DEBUG_IPC_HOOK
1290 		hook_ipc_msgrecv(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr);
1291 #endif
1292 	} else if (r == OK) {
1293 		/* Inform receiver that something is pending */
1294 		set_sys_bit(priv(dst_ptr)->s_asyn_pending,
1295 			    priv(caller_ptr)->s_id);
1296 		done = FALSE;
1297 		continue;
1298 	}
1299 
1300 	/* Store results */
1301 	tabent.result = r;
1302 	tabent.flags = flags | AMF_DONE;
1303 	if (flags & AMF_NOTIFY)
1304 		do_notify = TRUE;
1305 	else if (r != OK && (flags & AMF_NOTIFY_ERR))
1306 		do_notify = TRUE;
1307 	A_INSRT(i);	/* Copy results to caller; ignore errors */
1308 	continue;
1309 
1310 asyn_error:
1311 	if (dst != NONE)
1312 		printf("KERNEL senda error %d to %d\n", r, dst);
1313 	else
1314 		printf("KERNEL senda error %d\n", r);
1315   }
1316 
1317   if (do_notify)
1318 	mini_notify(proc_addr(ASYNCM), caller_ptr->p_endpoint);
1319 
1320   if (!done) {
1321 	privp->s_asyntab = (vir_bytes) table;
1322 	privp->s_asynsize = size;
1323   }
1324 
1325   return(OK);
1326 }
1327 
1328 /*===========================================================================*
1329  *				mini_senda				     *
1330  *===========================================================================*/
1331 static int mini_senda(struct proc *caller_ptr, asynmsg_t *table, size_t size)
1332 {
1333   struct priv *privp;
1334 
1335   privp = priv(caller_ptr);
1336   if (!(privp->s_flags & SYS_PROC)) {
1337 	printf( "mini_senda: warning caller has no privilege structure\n");
1338 	return(EPERM);
1339   }
1340 
1341   return try_deliver_senda(caller_ptr, table, size);
1342 }
1343 
1344 
1345 /*===========================================================================*
1346  *				try_async				     *
1347  *===========================================================================*/
1348 static int try_async(struct proc * caller_ptr)
1349 {
1350   int r;
1351   struct priv *privp;
1352   struct proc *src_ptr;
1353   sys_map_t *map;
1354 
1355   map = &priv(caller_ptr)->s_asyn_pending;
1356 
1357   /* Try all privilege structures */
1358   for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; ++privp)  {
1359 	if (privp->s_proc_nr == NONE)
1360 		continue;
1361 
1362 	if (!get_sys_bit(*map, privp->s_id))
1363 		continue;
1364 
1365 	src_ptr = proc_addr(privp->s_proc_nr);
1366 
1367 #ifdef CONFIG_SMP
1368 	/*
1369 	 * Do not copy from a process which does not have a stable address space
1370 	 * due to VM fiddling with it
1371 	 */
1372 	if (RTS_ISSET(src_ptr, RTS_VMINHIBIT)) {
1373 		src_ptr->p_misc_flags |= MF_SENDA_VM_MISS;
1374 		continue;
1375 	}
1376 #endif
1377 
1378 	assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG));
1379 	if ((r = try_one(ANY, src_ptr, caller_ptr)) == OK)
1380 		return(r);
1381   }
1382 
1383   return(ESRCH);
1384 }
1385 
1386 
1387 /*===========================================================================*
1388  *				try_one					     *
1389  *===========================================================================*/
1390 static int try_one(endpoint_t receive_e, struct proc *src_ptr,
1391     struct proc *dst_ptr)
1392 {
1393 /* Try to receive an asynchronous message from 'src_ptr' */
1394   int r = EAGAIN, done, do_notify;
1395   unsigned int flags, i;
1396   size_t size;
1397   endpoint_t dst, src_e;
1398   struct proc *caller_ptr;
1399   struct priv *privp;
1400   asynmsg_t tabent;
1401   vir_bytes table_v;
1402 
1403   privp = priv(src_ptr);
1404   if (!(privp->s_flags & SYS_PROC)) return(EPERM);
1405   size = privp->s_asynsize;
1406   table_v = privp->s_asyntab;
1407 
1408   /* Clear table pending message flag. We're done unless we're not. */
1409   unset_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1410 
1411   if (size == 0) return(EAGAIN);
1412   if (privp->s_asynendpoint != src_ptr->p_endpoint) return EAGAIN;
1413   if (!may_asynsend_to(src_ptr, proc_nr(dst_ptr))) return (ECALLDENIED);
1414 
1415   caller_ptr = src_ptr;	/* Needed for A_ macros later on */
1416   src_e = src_ptr->p_endpoint;
1417 
1418   /* Scan the table */
1419   do_notify = FALSE;
1420   done = TRUE;
1421 
1422   for (i = 0; i < size; i++) {
1423   	/* Process each entry in the table and store the result in the table.
1424   	 * If we're done handling a message, copy the result to the sender.
1425   	 * Some checks done in mini_senda are duplicated here, as the sender
1426   	 * could've altered the contents of the table in the meantime.
1427   	 */
1428 
1429 	/* Copy message to kernel */
1430 	A_RETR(i);
1431 	flags = tabent.flags;
1432 	dst = tabent.dst;
1433 
1434 	if (flags == 0) continue;	/* Skip empty entries */
1435 
1436 	/* 'flags' field must contain only valid bits */
1437 	if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR))
1438 		r = EINVAL;
1439 	else if (!(flags & AMF_VALID)) /* Must contain message */
1440 		r = EINVAL;
1441 	else if (flags & AMF_DONE) continue; /* Already done processing */
1442 
1443 	/* Clear done flag. The sender is done sending when all messages in the
1444 	 * table are marked done or empty. However, we will know that only
1445 	 * the next time we enter this function or when the sender decides to
1446 	 * send additional asynchronous messages and manages to deliver them
1447 	 * all.
1448 	 */
1449 	done = FALSE;
1450 
1451 	if (r == EINVAL)
1452 		goto store_result;
1453 
1454 	/* Message must be directed at receiving end */
1455 	if (dst != dst_ptr->p_endpoint) continue;
1456 
1457 	if (!CANRECEIVE(receive_e, src_e, dst_ptr,
1458 		table_v + i*sizeof(asynmsg_t) + offsetof(struct asynmsg,msg),
1459 		NULL)) {
1460 		continue;
1461 	}
1462 
1463 	/* If AMF_NOREPLY is set, then this message is not a reply to a
1464 	 * SENDREC and thus should not satisfy the receiving part of the
1465 	 * SENDREC. This message is to be delivered later.
1466 	 */
1467 	if ((flags & AMF_NOREPLY) && (dst_ptr->p_misc_flags & MF_REPLY_PEND))
1468 		continue;
1469 
1470 	/* Destination is ready to receive the message; deliver it */
1471 	r = OK;
1472 	dst_ptr->p_delivermsg = tabent.msg;
1473 	dst_ptr->p_delivermsg.m_source = src_ptr->p_endpoint;
1474 	dst_ptr->p_misc_flags |= MF_DELIVERMSG;
1475 #if DEBUG_IPC_HOOK
1476 	hook_ipc_msgrecv(&dst_ptr->p_delivermsg, src_ptr, dst_ptr);
1477 #endif
1478 
1479 store_result:
1480 	/* Store results for sender. We may just have started delivering a
1481 	 * message, so we must not return an error to the caller in the case
1482 	 * that storing the results triggers an error!
1483 	 */
1484 	tabent.result = r;
1485 	tabent.flags = flags | AMF_DONE;
1486 	if (flags & AMF_NOTIFY) do_notify = TRUE;
1487 	else if (r != OK && (flags & AMF_NOTIFY_ERR)) do_notify = TRUE;
1488 	A_INSRT(i);	/* Copy results to sender; ignore errors */
1489 
1490 	break;
1491   }
1492 
1493   if (do_notify)
1494 	mini_notify(proc_addr(ASYNCM), src_ptr->p_endpoint);
1495 
1496   if (done) {
1497 	privp->s_asyntab = -1;
1498 	privp->s_asynsize = 0;
1499   } else {
1500 	set_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1501   }
1502 
1503 asyn_error:
1504   return(r);
1505 }
1506 
1507 /*===========================================================================*
1508  *				cancel_async				     *
1509  *===========================================================================*/
1510 int cancel_async(struct proc *src_ptr, struct proc *dst_ptr)
1511 {
1512 /* Cancel asynchronous messages from src to dst, because dst is not interested
1513  * in them (e.g., dst has been restarted) */
1514   int done, do_notify;
1515   unsigned int flags, i;
1516   size_t size;
1517   endpoint_t dst;
1518   struct proc *caller_ptr;
1519   struct priv *privp;
1520   asynmsg_t tabent;
1521   vir_bytes table_v;
1522 
1523   privp = priv(src_ptr);
1524   if (!(privp->s_flags & SYS_PROC)) return(EPERM);
1525   size = privp->s_asynsize;
1526   table_v = privp->s_asyntab;
1527 
1528   /* Clear table pending message flag. We're done unless we're not. */
1529   privp->s_asyntab = -1;
1530   privp->s_asynsize = 0;
1531   unset_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id);
1532 
1533   if (size == 0) return(EAGAIN);
1534   if (!may_send_to(src_ptr, proc_nr(dst_ptr))) return(ECALLDENIED);
1535 
1536   caller_ptr = src_ptr;	/* Needed for A_ macros later on */
1537 
1538   /* Scan the table */
1539   do_notify = FALSE;
1540   done = TRUE;
1541 
1542 
1543   for (i = 0; i < size; i++) {
1544   	/* Process each entry in the table and store the result in the table.
1545   	 * If we're done handling a message, copy the result to the sender.
1546   	 * Some checks done in mini_senda are duplicated here, as the sender
1547   	 * could've altered the contents of the table in the mean time.
1548   	 */
1549 
1550   	int r = EDEADSRCDST;	/* Cancel delivery due to dead dst */
1551 
1552 	/* Copy message to kernel */
1553 	A_RETR(i);
1554 	flags = tabent.flags;
1555 	dst = tabent.dst;
1556 
1557 	if (flags == 0) continue;	/* Skip empty entries */
1558 
1559 	/* 'flags' field must contain only valid bits */
1560 	if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR))
1561 		r = EINVAL;
1562 	else if (!(flags & AMF_VALID)) /* Must contain message */
1563 		r = EINVAL;
1564 	else if (flags & AMF_DONE) continue; /* Already done processing */
1565 
1566 	/* Message must be directed at receiving end */
1567 	if (dst != dst_ptr->p_endpoint) {
1568 		done = FALSE;
1569 		continue;
1570 	}
1571 
1572 	/* Store results for sender */
1573 	tabent.result = r;
1574 	tabent.flags = flags | AMF_DONE;
1575 	if (flags & AMF_NOTIFY) do_notify = TRUE;
1576 	else if (r != OK && (flags & AMF_NOTIFY_ERR)) do_notify = TRUE;
1577 	A_INSRT(i);	/* Copy results to sender; ignore errors */
1578   }
1579 
1580   if (do_notify)
1581 	mini_notify(proc_addr(ASYNCM), src_ptr->p_endpoint);
1582 
1583   if (!done) {
1584 	privp->s_asyntab = table_v;
1585 	privp->s_asynsize = size;
1586   }
1587 
1588 asyn_error:
1589   return(OK);
1590 }
1591 
1592 /*===========================================================================*
1593  *				enqueue					     *
1594  *===========================================================================*/
1595 void enqueue(
1596   register struct proc *rp	/* this process is now runnable */
1597 )
1598 {
1599 /* Add 'rp' to one of the queues of runnable processes.  This function is
1600  * responsible for inserting a process into one of the scheduling queues.
1601  * The mechanism is implemented here.   The actual scheduling policy is
1602  * defined in sched() and pick_proc().
1603  *
1604  * This function can be used x-cpu as it always uses the queues of the cpu the
1605  * process is assigned to.
1606  */
1607   int q = rp->p_priority;	 		/* scheduling queue to use */
1608   struct proc **rdy_head, **rdy_tail;
1609 
1610   assert(proc_is_runnable(rp));
1611 
1612   assert(q >= 0);
1613 
1614   rdy_head = get_cpu_var(rp->p_cpu, run_q_head);
1615   rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1616 
1617   /* Now add the process to the queue. */
1618   if (!rdy_head[q]) {		/* add to empty queue */
1619       rdy_head[q] = rdy_tail[q] = rp; 		/* create a new queue */
1620       rp->p_nextready = NULL;		/* mark new end */
1621   }
1622   else {					/* add to tail of queue */
1623       rdy_tail[q]->p_nextready = rp;		/* chain tail of queue */
1624       rdy_tail[q] = rp;				/* set new queue tail */
1625       rp->p_nextready = NULL;		/* mark new end */
1626   }
1627 
1628   if (cpuid == rp->p_cpu) {
1629 	  /*
1630 	   * enqueueing a process with a higher priority than the current one,
1631 	   * it gets preempted. The current process must be preemptible. Testing
1632 	   * the priority also makes sure that a process does not preempt itself
1633 	   */
1634 	  struct proc * p;
1635 	  p = get_cpulocal_var(proc_ptr);
1636 	  assert(p);
1637 	  if((p->p_priority > rp->p_priority) &&
1638 			  (priv(p)->s_flags & PREEMPTIBLE))
1639 		  RTS_SET(p, RTS_PREEMPTED); /* calls dequeue() */
1640   }
1641 #ifdef CONFIG_SMP
1642   /*
1643    * if the process was enqueued on a different cpu and the cpu is idle, i.e.
1644    * the time is off, we need to wake up that cpu and let it schedule this new
1645    * process
1646    */
1647   else if (get_cpu_var(rp->p_cpu, cpu_is_idle)) {
1648 	  smp_schedule(rp->p_cpu);
1649   }
1650 #endif
1651 
1652   /* Make note of when this process was added to queue */
1653   read_tsc_64(&(get_cpulocal_var(proc_ptr)->p_accounting.enter_queue));
1654 
1655 
1656 #if DEBUG_SANITYCHECKS
1657   assert(runqueues_ok_local());
1658 #endif
1659 }
1660 
1661 /*===========================================================================*
1662  *				enqueue_head				     *
1663  *===========================================================================*/
1664 /*
1665  * put a process at the front of its run queue. It comes handy when a process is
1666  * preempted and removed from run queue to not to have a currently not-runnable
1667  * process on a run queue. We have to put this process back at the fron to be
1668  * fair
1669  */
1670 static void enqueue_head(struct proc *rp)
1671 {
1672   const int q = rp->p_priority;	 		/* scheduling queue to use */
1673 
1674   struct proc **rdy_head, **rdy_tail;
1675 
1676   assert(proc_ptr_ok(rp));
1677   assert(proc_is_runnable(rp));
1678 
1679   /*
1680    * the process was runnable without its quantum expired when dequeued. A
1681    * process with no time left should have been handled else and differently
1682    */
1683   assert(rp->p_cpu_time_left);
1684 
1685   assert(q >= 0);
1686 
1687 
1688   rdy_head = get_cpu_var(rp->p_cpu, run_q_head);
1689   rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1690 
1691   /* Now add the process to the queue. */
1692   if (!rdy_head[q]) {		/* add to empty queue */
1693 	rdy_head[q] = rdy_tail[q] = rp; 	/* create a new queue */
1694 	rp->p_nextready = NULL;			/* mark new end */
1695   } else {					/* add to head of queue */
1696 	rp->p_nextready = rdy_head[q];		/* chain head of queue */
1697 	rdy_head[q] = rp;			/* set new queue head */
1698   }
1699 
1700   /* Make note of when this process was added to queue */
1701   read_tsc_64(&(get_cpulocal_var(proc_ptr->p_accounting.enter_queue)));
1702 
1703 
1704   /* Process accounting for scheduling */
1705   rp->p_accounting.dequeues--;
1706   rp->p_accounting.preempted++;
1707 
1708 #if DEBUG_SANITYCHECKS
1709   assert(runqueues_ok_local());
1710 #endif
1711 }
1712 
1713 /*===========================================================================*
1714  *				dequeue					     *
1715  *===========================================================================*/
1716 void dequeue(struct proc *rp)
1717 /* this process is no longer runnable */
1718 {
1719 /* A process must be removed from the scheduling queues, for example, because
1720  * it has blocked.  If the currently active process is removed, a new process
1721  * is picked to run by calling pick_proc().
1722  *
1723  * This function can operate x-cpu as it always removes the process from the
1724  * queue of the cpu the process is currently assigned to.
1725  */
1726   int q = rp->p_priority;		/* queue to use */
1727   struct proc **xpp;			/* iterate over queue */
1728   struct proc *prev_xp;
1729   u64_t tsc, tsc_delta;
1730 
1731   struct proc **rdy_tail;
1732 
1733   assert(proc_ptr_ok(rp));
1734   assert(!proc_is_runnable(rp));
1735 
1736   /* Side-effect for kernel: check if the task's stack still is ok? */
1737   assert (!iskernelp(rp) || *priv(rp)->s_stack_guard == STACK_GUARD);
1738 
1739   rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail);
1740 
1741   /* Now make sure that the process is not in its ready queue. Remove the
1742    * process if it is found. A process can be made unready even if it is not
1743    * running by being sent a signal that kills it.
1744    */
1745   prev_xp = NULL;
1746   for (xpp = get_cpu_var_ptr(rp->p_cpu, run_q_head[q]); *xpp;
1747 		  xpp = &(*xpp)->p_nextready) {
1748       if (*xpp == rp) {				/* found process to remove */
1749           *xpp = (*xpp)->p_nextready;		/* replace with next chain */
1750           if (rp == rdy_tail[q]) {		/* queue tail removed */
1751               rdy_tail[q] = prev_xp;		/* set new tail */
1752 	  }
1753 
1754           break;
1755       }
1756       prev_xp = *xpp;				/* save previous in chain */
1757   }
1758 
1759 
1760   /* Process accounting for scheduling */
1761   rp->p_accounting.dequeues++;
1762 
1763   /* this is not all that accurate on virtual machines, especially with
1764      IO bound processes that only spend a short amount of time in the queue
1765      at a time. */
1766   if (rp->p_accounting.enter_queue) {
1767 	read_tsc_64(&tsc);
1768 	tsc_delta = tsc - rp->p_accounting.enter_queue;
1769 	rp->p_accounting.time_in_queue = rp->p_accounting.time_in_queue +
1770 		tsc_delta;
1771 	rp->p_accounting.enter_queue = 0;
1772   }
1773 
1774   /* For ps(1), remember when the process was last dequeued. */
1775   rp->p_dequeued = get_monotonic();
1776 
1777 #if DEBUG_SANITYCHECKS
1778   assert(runqueues_ok_local());
1779 #endif
1780 }
1781 
1782 /*===========================================================================*
1783  *				pick_proc				     *
1784  *===========================================================================*/
1785 static struct proc * pick_proc(void)
1786 {
1787 /* Decide who to run now.  A new process is selected an returned.
1788  * When a billable process is selected, record it in 'bill_ptr', so that the
1789  * clock task can tell who to bill for system time.
1790  *
1791  * This function always uses the run queues of the local cpu!
1792  */
1793   register struct proc *rp;			/* process to run */
1794   struct proc **rdy_head;
1795   int q;				/* iterate over queues */
1796 
1797   /* Check each of the scheduling queues for ready processes. The number of
1798    * queues is defined in proc.h, and priorities are set in the task table.
1799    * If there are no processes ready to run, return NULL.
1800    */
1801   rdy_head = get_cpulocal_var(run_q_head);
1802   for (q=0; q < NR_SCHED_QUEUES; q++) {
1803 	if(!(rp = rdy_head[q])) {
1804 		TRACE(VF_PICKPROC, printf("cpu %d queue %d empty\n", cpuid, q););
1805 		continue;
1806 	}
1807 	assert(proc_is_runnable(rp));
1808 	if (priv(rp)->s_flags & BILLABLE)
1809 		get_cpulocal_var(bill_ptr) = rp; /* bill for system time */
1810 	return rp;
1811   }
1812   return NULL;
1813 }
1814 
1815 /*===========================================================================*
1816  *				endpoint_lookup				     *
1817  *===========================================================================*/
1818 struct proc *endpoint_lookup(endpoint_t e)
1819 {
1820 	int n;
1821 
1822 	if(!isokendpt(e, &n)) return NULL;
1823 
1824 	return proc_addr(n);
1825 }
1826 
1827 /*===========================================================================*
1828  *				isokendpt_f				     *
1829  *===========================================================================*/
1830 #if DEBUG_ENABLE_IPC_WARNINGS
1831 int isokendpt_f(const char * file, int line, endpoint_t e, int * p,
1832 	const int fatalflag)
1833 #else
1834 int isokendpt_f(endpoint_t e, int * p, const int fatalflag)
1835 #endif
1836 {
1837 	int ok = 0;
1838 	/* Convert an endpoint number into a process number.
1839 	 * Return nonzero if the process is alive with the corresponding
1840 	 * generation number, zero otherwise.
1841 	 *
1842 	 * This function is called with file and line number by the
1843 	 * isokendpt_d macro if DEBUG_ENABLE_IPC_WARNINGS is defined,
1844 	 * otherwise without. This allows us to print the where the
1845 	 * conversion was attempted, making the errors verbose without
1846 	 * adding code for that at every call.
1847 	 *
1848 	 * If fatalflag is nonzero, we must panic if the conversion doesn't
1849 	 * succeed.
1850 	 */
1851 	*p = _ENDPOINT_P(e);
1852 	ok = 0;
1853 	if(isokprocn(*p) && !isemptyn(*p) && proc_addr(*p)->p_endpoint == e)
1854 		ok = 1;
1855 	if(!ok && fatalflag)
1856 		panic("invalid endpoint: %d",  e);
1857 	return ok;
1858 }
1859 
1860 static void notify_scheduler(struct proc *p)
1861 {
1862 	message m_no_quantum;
1863 	int err;
1864 
1865 	assert(!proc_kernel_scheduler(p));
1866 
1867 	/* dequeue the process */
1868 	RTS_SET(p, RTS_NO_QUANTUM);
1869 	/*
1870 	 * Notify the process's scheduler that it has run out of
1871 	 * quantum. This is done by sending a message to the scheduler
1872 	 * on the process's behalf
1873 	 */
1874 	m_no_quantum.m_source = p->p_endpoint;
1875 	m_no_quantum.m_type   = SCHEDULING_NO_QUANTUM;
1876 	m_no_quantum.m_krn_lsys_schedule.acnt_queue = cpu_time_2_ms(p->p_accounting.time_in_queue);
1877 	m_no_quantum.m_krn_lsys_schedule.acnt_deqs      = p->p_accounting.dequeues;
1878 	m_no_quantum.m_krn_lsys_schedule.acnt_ipc_sync  = p->p_accounting.ipc_sync;
1879 	m_no_quantum.m_krn_lsys_schedule.acnt_ipc_async = p->p_accounting.ipc_async;
1880 	m_no_quantum.m_krn_lsys_schedule.acnt_preempt   = p->p_accounting.preempted;
1881 	m_no_quantum.m_krn_lsys_schedule.acnt_cpu       = cpuid;
1882 	m_no_quantum.m_krn_lsys_schedule.acnt_cpu_load  = cpu_load();
1883 
1884 	/* Reset accounting */
1885 	reset_proc_accounting(p);
1886 
1887 	if ((err = mini_send(p, p->p_scheduler->p_endpoint,
1888 					&m_no_quantum, FROM_KERNEL))) {
1889 		panic("WARNING: Scheduling: mini_send returned %d\n", err);
1890 	}
1891 }
1892 
1893 void proc_no_time(struct proc * p)
1894 {
1895 	if (!proc_kernel_scheduler(p) && priv(p)->s_flags & PREEMPTIBLE) {
1896 		/* this dequeues the process */
1897 		notify_scheduler(p);
1898 	}
1899 	else {
1900 		/*
1901 		 * non-preemptible processes only need their quantum to
1902 		 * be renewed. In fact, they by pass scheduling
1903 		 */
1904 		p->p_cpu_time_left = ms_2_cpu_time(p->p_quantum_size_ms);
1905 #if DEBUG_RACE
1906 		RTS_SET(p, RTS_PREEMPTED);
1907 		RTS_UNSET(p, RTS_PREEMPTED);
1908 #endif
1909 	}
1910 }
1911 
1912 void reset_proc_accounting(struct proc *p)
1913 {
1914   p->p_accounting.preempted = 0;
1915   p->p_accounting.ipc_sync  = 0;
1916   p->p_accounting.ipc_async = 0;
1917   p->p_accounting.dequeues  = 0;
1918   p->p_accounting.time_in_queue = 0;
1919   p->p_accounting.enter_queue = 0;
1920 }
1921 
1922 void copr_not_available_handler(void)
1923 {
1924 	struct proc * p;
1925 	struct proc ** local_fpu_owner;
1926 	/*
1927 	 * Disable the FPU exception (both for the kernel and for the process
1928 	 * once it's scheduled), and initialize or restore the FPU state.
1929 	 */
1930 
1931 	disable_fpu_exception();
1932 
1933 	p = get_cpulocal_var(proc_ptr);
1934 
1935 	/* if FPU is not owned by anyone, do not store anything */
1936 	local_fpu_owner = get_cpulocal_var_ptr(fpu_owner);
1937 	if (*local_fpu_owner != NULL) {
1938 		assert(*local_fpu_owner != p);
1939 		save_local_fpu(*local_fpu_owner, FALSE /*retain*/);
1940 	}
1941 
1942 	/*
1943 	 * restore the current process' state and let it run again, do not
1944 	 * schedule!
1945 	 */
1946 	if (restore_fpu(p) != OK) {
1947 		/* Restoring FPU state failed. This is always the process's own
1948 		 * fault. Send a signal, and schedule another process instead.
1949 		 */
1950 		*local_fpu_owner = NULL;		/* release FPU */
1951 		cause_sig(proc_nr(p), SIGFPE);
1952 		return;
1953 	}
1954 
1955 	*local_fpu_owner = p;
1956 	context_stop(proc_addr(KERNEL));
1957 	restore_user_context(p);
1958 	NOT_REACHABLE;
1959 }
1960 
1961 void release_fpu(struct proc * p) {
1962 	struct proc ** fpu_owner_ptr;
1963 
1964 	fpu_owner_ptr = get_cpu_var_ptr(p->p_cpu, fpu_owner);
1965 
1966 	if (*fpu_owner_ptr == p)
1967 		*fpu_owner_ptr = NULL;
1968 }
1969 
1970 void ser_dump_proc(void)
1971 {
1972         struct proc *pp;
1973 
1974         for (pp= BEG_PROC_ADDR; pp < END_PROC_ADDR; pp++)
1975         {
1976                 if (isemptyp(pp))
1977                         continue;
1978                 print_proc_recursive(pp);
1979         }
1980 }
1981