1 /* This file contains essentially all of the process and message handling. 2 * Together with "mpx.s" it forms the lowest layer of the MINIX kernel. 3 * There is one entry point from the outside: 4 * 5 * sys_call: a system call, i.e., the kernel is trapped with an INT 6 * 7 * Changes: 8 * Aug 19, 2005 rewrote scheduling code (Jorrit N. Herder) 9 * Jul 25, 2005 rewrote system call handling (Jorrit N. Herder) 10 * May 26, 2005 rewrote message passing functions (Jorrit N. Herder) 11 * May 24, 2005 new notification system call (Jorrit N. Herder) 12 * Oct 28, 2004 nonblocking send and receive calls (Jorrit N. Herder) 13 * 14 * The code here is critical to make everything work and is important for the 15 * overall performance of the system. A large fraction of the code deals with 16 * list manipulation. To make this both easy to understand and fast to execute 17 * pointer pointers are used throughout the code. Pointer pointers prevent 18 * exceptions for the head or tail of a linked list. 19 * 20 * node_t *queue, *new_node; // assume these as global variables 21 * node_t **xpp = &queue; // get pointer pointer to head of queue 22 * while (*xpp != NULL) // find last pointer of the linked list 23 * xpp = &(*xpp)->next; // get pointer to next pointer 24 * *xpp = new_node; // now replace the end (the NULL pointer) 25 * new_node->next = NULL; // and mark the new end of the list 26 * 27 * For example, when adding a new node to the end of the list, one normally 28 * makes an exception for an empty list and looks up the end of the list for 29 * nonempty lists. As shown above, this is not required with pointer pointers. 30 */ 31 32 #include <minix/com.h> 33 #include <minix/ipcconst.h> 34 #include <stddef.h> 35 #include <signal.h> 36 #include <assert.h> 37 #include <string.h> 38 39 #include "kernel/kernel.h" 40 #include "vm.h" 41 #include "clock.h" 42 #include "spinlock.h" 43 #include "arch_proto.h" 44 45 #include <minix/syslib.h> 46 47 /* Scheduling and message passing functions */ 48 static void idle(void); 49 /** 50 * Made public for use in clock.c (for user-space scheduling) 51 static int mini_send(struct proc *caller_ptr, endpoint_t dst_e, message 52 *m_ptr, int flags); 53 */ 54 static int mini_receive(struct proc *caller_ptr, endpoint_t src, 55 message *m_ptr, int flags); 56 static int mini_senda(struct proc *caller_ptr, asynmsg_t *table, size_t 57 size); 58 static int deadlock(int function, register struct proc *caller, 59 endpoint_t src_dst_e); 60 static int try_async(struct proc *caller_ptr); 61 static int try_one(struct proc *src_ptr, struct proc *dst_ptr); 62 static struct proc * pick_proc(void); 63 static void enqueue_head(struct proc *rp); 64 65 /* all idles share the same idle_priv structure */ 66 static struct priv idle_priv; 67 68 static void set_idle_name(char * name, int n) 69 { 70 int i, c; 71 int p_z = 0; 72 73 if (n > 999) 74 n = 999; 75 76 name[0] = 'i'; 77 name[1] = 'd'; 78 name[2] = 'l'; 79 name[3] = 'e'; 80 81 for (i = 4, c = 100; c > 0; c /= 10) { 82 int digit; 83 84 digit = n / c; 85 n -= digit * c; 86 87 if (p_z || digit != 0 || c == 1) { 88 p_z = 1; 89 name[i++] = '0' + digit; 90 } 91 } 92 93 name[i] = '\0'; 94 95 } 96 97 98 #define PICK_ANY 1 99 #define PICK_HIGHERONLY 2 100 101 #define BuildNotifyMessage(m_ptr, src, dst_ptr) \ 102 memset((m_ptr), 0, sizeof(*(m_ptr))); \ 103 (m_ptr)->m_type = NOTIFY_MESSAGE; \ 104 (m_ptr)->m_notify.timestamp = get_monotonic(); \ 105 switch (src) { \ 106 case HARDWARE: \ 107 (m_ptr)->m_notify.interrupts = \ 108 priv(dst_ptr)->s_int_pending; \ 109 priv(dst_ptr)->s_int_pending = 0; \ 110 break; \ 111 case SYSTEM: \ 112 memcpy(&(m_ptr)->m_notify.sigset, \ 113 &priv(dst_ptr)->s_sig_pending, \ 114 sizeof(sigset_t)); \ 115 sigemptyset(&priv(dst_ptr)->s_sig_pending); \ 116 break; \ 117 } 118 119 void proc_init(void) 120 { 121 struct proc * rp; 122 struct priv *sp; 123 int i; 124 125 /* Clear the process table. Announce each slot as empty and set up 126 * mappings for proc_addr() and proc_nr() macros. Do the same for the 127 * table with privilege structures for the system processes. 128 */ 129 for (rp = BEG_PROC_ADDR, i = -NR_TASKS; rp < END_PROC_ADDR; ++rp, ++i) { 130 rp->p_rts_flags = RTS_SLOT_FREE;/* initialize free slot */ 131 rp->p_magic = PMAGIC; 132 rp->p_nr = i; /* proc number from ptr */ 133 rp->p_endpoint = _ENDPOINT(0, rp->p_nr); /* generation no. 0 */ 134 rp->p_scheduler = NULL; /* no user space scheduler */ 135 rp->p_priority = 0; /* no priority */ 136 rp->p_quantum_size_ms = 0; /* no quantum size */ 137 138 /* arch-specific initialization */ 139 arch_proc_reset(rp); 140 } 141 for (sp = BEG_PRIV_ADDR, i = 0; sp < END_PRIV_ADDR; ++sp, ++i) { 142 sp->s_proc_nr = NONE; /* initialize as free */ 143 sp->s_id = (sys_id_t) i; /* priv structure index */ 144 ppriv_addr[i] = sp; /* priv ptr from number */ 145 sp->s_sig_mgr = NONE; /* clear signal managers */ 146 sp->s_bak_sig_mgr = NONE; 147 } 148 149 idle_priv.s_flags = IDL_F; 150 /* initialize IDLE structures for every CPU */ 151 for (i = 0; i < CONFIG_MAX_CPUS; i++) { 152 struct proc * ip = get_cpu_var_ptr(i, idle_proc); 153 ip->p_endpoint = IDLE; 154 ip->p_priv = &idle_priv; 155 /* must not let idle ever get scheduled */ 156 ip->p_rts_flags |= RTS_PROC_STOP; 157 set_idle_name(ip->p_name, i); 158 } 159 } 160 161 static void switch_address_space_idle(void) 162 { 163 #ifdef CONFIG_SMP 164 /* 165 * currently we bet that VM is always alive and its pages available so 166 * when the CPU wakes up the kernel is mapped and no surprises happen. 167 * This is only a problem if more than 1 cpus are available 168 */ 169 switch_address_space(proc_addr(VM_PROC_NR)); 170 #endif 171 } 172 173 /*===========================================================================* 174 * idle * 175 *===========================================================================*/ 176 static void idle(void) 177 { 178 struct proc * p; 179 180 /* This function is called whenever there is no work to do. 181 * Halt the CPU, and measure how many timestamp counter ticks are 182 * spent not doing anything. This allows test setups to measure 183 * the CPU utilization of certain workloads with high precision. 184 */ 185 186 p = get_cpulocal_var(proc_ptr) = get_cpulocal_var_ptr(idle_proc); 187 if (priv(p)->s_flags & BILLABLE) 188 get_cpulocal_var(bill_ptr) = p; 189 190 switch_address_space_idle(); 191 192 #ifdef CONFIG_SMP 193 get_cpulocal_var(cpu_is_idle) = 1; 194 /* we don't need to keep time on APs as it is handled on the BSP */ 195 if (cpuid != bsp_cpu_id) 196 stop_local_timer(); 197 else 198 #endif 199 { 200 /* 201 * If the timer has expired while in kernel we must 202 * rearm it before we go to sleep 203 */ 204 restart_local_timer(); 205 } 206 207 /* start accounting for the idle time */ 208 context_stop(proc_addr(KERNEL)); 209 #if !SPROFILE 210 halt_cpu(); 211 #else 212 if (!sprofiling) 213 halt_cpu(); 214 else { 215 volatile int * v; 216 217 v = get_cpulocal_var_ptr(idle_interrupted); 218 interrupts_enable(); 219 while (!*v) 220 arch_pause(); 221 interrupts_disable(); 222 *v = 0; 223 } 224 #endif 225 /* 226 * end of accounting for the idle task does not happen here, the kernel 227 * is handling stuff for quite a while before it gets back here! 228 */ 229 } 230 231 /*===========================================================================* 232 * switch_to_user * 233 *===========================================================================*/ 234 void switch_to_user(void) 235 { 236 /* This function is called an instant before proc_ptr is 237 * to be scheduled again. 238 */ 239 struct proc * p; 240 #ifdef CONFIG_SMP 241 int tlb_must_refresh = 0; 242 #endif 243 244 p = get_cpulocal_var(proc_ptr); 245 /* 246 * if the current process is still runnable check the misc flags and let 247 * it run unless it becomes not runnable in the meantime 248 */ 249 if (proc_is_runnable(p)) 250 goto check_misc_flags; 251 /* 252 * if a process becomes not runnable while handling the misc flags, we 253 * need to pick a new one here and start from scratch. Also if the 254 * current process wasn't runnable, we pick a new one here 255 */ 256 not_runnable_pick_new: 257 if (proc_is_preempted(p)) { 258 p->p_rts_flags &= ~RTS_PREEMPTED; 259 if (proc_is_runnable(p)) { 260 if (p->p_cpu_time_left) 261 enqueue_head(p); 262 else 263 enqueue(p); 264 } 265 } 266 267 /* 268 * if we have no process to run, set IDLE as the current process for 269 * time accounting and put the cpu in an idle state. After the next 270 * timer interrupt the execution resumes here and we can pick another 271 * process. If there is still nothing runnable we "schedule" IDLE again 272 */ 273 while (!(p = pick_proc())) { 274 idle(); 275 } 276 277 /* update the global variable */ 278 get_cpulocal_var(proc_ptr) = p; 279 280 #ifdef CONFIG_SMP 281 if (p->p_misc_flags & MF_FLUSH_TLB && get_cpulocal_var(ptproc) == p) 282 tlb_must_refresh = 1; 283 #endif 284 switch_address_space(p); 285 286 check_misc_flags: 287 288 assert(p); 289 assert(proc_is_runnable(p)); 290 while (p->p_misc_flags & 291 (MF_KCALL_RESUME | MF_DELIVERMSG | 292 MF_SC_DEFER | MF_SC_TRACE | MF_SC_ACTIVE)) { 293 294 assert(proc_is_runnable(p)); 295 if (p->p_misc_flags & MF_KCALL_RESUME) { 296 kernel_call_resume(p); 297 } 298 else if (p->p_misc_flags & MF_DELIVERMSG) { 299 TRACE(VF_SCHEDULING, printf("delivering to %s / %d\n", 300 p->p_name, p->p_endpoint);); 301 delivermsg(p); 302 } 303 else if (p->p_misc_flags & MF_SC_DEFER) { 304 /* Perform the system call that we deferred earlier. */ 305 306 assert (!(p->p_misc_flags & MF_SC_ACTIVE)); 307 308 arch_do_syscall(p); 309 310 /* If the process is stopped for signal delivery, and 311 * not blocked sending a message after the system call, 312 * inform PM. 313 */ 314 if ((p->p_misc_flags & MF_SIG_DELAY) && 315 !RTS_ISSET(p, RTS_SENDING)) 316 sig_delay_done(p); 317 } 318 else if (p->p_misc_flags & MF_SC_TRACE) { 319 /* Trigger a system call leave event if this was a 320 * system call. We must do this after processing the 321 * other flags above, both for tracing correctness and 322 * to be able to use 'break'. 323 */ 324 if (!(p->p_misc_flags & MF_SC_ACTIVE)) 325 break; 326 327 p->p_misc_flags &= 328 ~(MF_SC_TRACE | MF_SC_ACTIVE); 329 330 /* Signal the "leave system call" event. 331 * Block the process. 332 */ 333 cause_sig(proc_nr(p), SIGTRAP); 334 } 335 else if (p->p_misc_flags & MF_SC_ACTIVE) { 336 /* If MF_SC_ACTIVE was set, remove it now: 337 * we're leaving the system call. 338 */ 339 p->p_misc_flags &= ~MF_SC_ACTIVE; 340 341 break; 342 } 343 344 /* 345 * the selected process might not be runnable anymore. We have 346 * to checkit and schedule another one 347 */ 348 if (!proc_is_runnable(p)) 349 goto not_runnable_pick_new; 350 } 351 /* 352 * check the quantum left before it runs again. We must do it only here 353 * as we are sure that a possible out-of-quantum message to the 354 * scheduler will not collide with the regular ipc 355 */ 356 if (!p->p_cpu_time_left) 357 proc_no_time(p); 358 /* 359 * After handling the misc flags the selected process might not be 360 * runnable anymore. We have to checkit and schedule another one 361 */ 362 if (!proc_is_runnable(p)) 363 goto not_runnable_pick_new; 364 365 TRACE(VF_SCHEDULING, printf("cpu %d starting %s / %d " 366 "pc 0x%08x\n", 367 cpuid, p->p_name, p->p_endpoint, p->p_reg.pc);); 368 #if DEBUG_TRACE 369 p->p_schedules++; 370 #endif 371 372 p = arch_finish_switch_to_user(); 373 assert(p->p_cpu_time_left); 374 375 context_stop(proc_addr(KERNEL)); 376 377 /* If the process isn't the owner of FPU, enable the FPU exception */ 378 if (get_cpulocal_var(fpu_owner) != p) 379 enable_fpu_exception(); 380 else 381 disable_fpu_exception(); 382 383 /* If MF_CONTEXT_SET is set, don't clobber process state within 384 * the kernel. The next kernel entry is OK again though. 385 */ 386 p->p_misc_flags &= ~MF_CONTEXT_SET; 387 388 #if defined(__i386__) 389 assert(p->p_seg.p_cr3 != 0); 390 #elif defined(__arm__) 391 assert(p->p_seg.p_ttbr != 0); 392 #endif 393 #ifdef CONFIG_SMP 394 if (p->p_misc_flags & MF_FLUSH_TLB) { 395 if (tlb_must_refresh) 396 refresh_tlb(); 397 p->p_misc_flags &= ~MF_FLUSH_TLB; 398 } 399 #endif 400 401 restart_local_timer(); 402 403 /* 404 * restore_user_context() carries out the actual mode switch from kernel 405 * to userspace. This function does not return 406 */ 407 restore_user_context(p); 408 NOT_REACHABLE; 409 } 410 411 /* 412 * handler for all synchronous IPC calls 413 */ 414 static int do_sync_ipc(struct proc * caller_ptr, /* who made the call */ 415 int call_nr, /* system call number and flags */ 416 endpoint_t src_dst_e, /* src or dst of the call */ 417 message *m_ptr) /* users pointer to a message */ 418 { 419 int result; /* the system call's result */ 420 int src_dst_p; /* Process slot number */ 421 char *callname; 422 423 /* Check destination. RECEIVE is the only call that accepts ANY (in addition 424 * to a real endpoint). The other calls (SEND, SENDREC, and NOTIFY) require an 425 * endpoint to corresponds to a process. In addition, it is necessary to check 426 * whether a process is allowed to send to a given destination. 427 */ 428 assert(call_nr != SENDA); 429 430 /* Only allow non-negative call_nr values less than 32 */ 431 if (call_nr < 0 || call_nr > IPCNO_HIGHEST || call_nr >= 32 432 || !(callname = ipc_call_names[call_nr])) { 433 #if DEBUG_ENABLE_IPC_WARNINGS 434 printf("sys_call: trap %d not allowed, caller %d, src_dst %d\n", 435 call_nr, proc_nr(caller_ptr), src_dst_e); 436 #endif 437 return(ETRAPDENIED); /* trap denied by mask or kernel */ 438 } 439 440 if (src_dst_e == ANY) 441 { 442 if (call_nr != RECEIVE) 443 { 444 #if 0 445 printf("sys_call: %s by %d with bad endpoint %d\n", 446 callname, 447 proc_nr(caller_ptr), src_dst_e); 448 #endif 449 return EINVAL; 450 } 451 src_dst_p = (int) src_dst_e; 452 } 453 else 454 { 455 /* Require a valid source and/or destination process. */ 456 if(!isokendpt(src_dst_e, &src_dst_p)) { 457 #if 0 458 printf("sys_call: %s by %d with bad endpoint %d\n", 459 callname, 460 proc_nr(caller_ptr), src_dst_e); 461 #endif 462 return EDEADSRCDST; 463 } 464 465 /* If the call is to send to a process, i.e., for SEND, SENDNB, 466 * SENDREC or NOTIFY, verify that the caller is allowed to send to 467 * the given destination. 468 */ 469 if (call_nr != RECEIVE) 470 { 471 if (!may_send_to(caller_ptr, src_dst_p)) { 472 #if DEBUG_ENABLE_IPC_WARNINGS 473 printf( 474 "sys_call: ipc mask denied %s from %d to %d\n", 475 callname, 476 caller_ptr->p_endpoint, src_dst_e); 477 #endif 478 return(ECALLDENIED); /* call denied by ipc mask */ 479 } 480 } 481 } 482 483 /* Check if the process has privileges for the requested call. Calls to the 484 * kernel may only be SENDREC, because tasks always reply and may not block 485 * if the caller doesn't do receive(). 486 */ 487 if (!(priv(caller_ptr)->s_trap_mask & (1 << call_nr))) { 488 #if DEBUG_ENABLE_IPC_WARNINGS 489 printf("sys_call: %s not allowed, caller %d, src_dst %d\n", 490 callname, proc_nr(caller_ptr), src_dst_p); 491 #endif 492 return(ETRAPDENIED); /* trap denied by mask or kernel */ 493 } 494 495 if (call_nr != SENDREC && call_nr != RECEIVE && iskerneln(src_dst_p)) { 496 #if DEBUG_ENABLE_IPC_WARNINGS 497 printf("sys_call: trap %s not allowed, caller %d, src_dst %d\n", 498 callname, proc_nr(caller_ptr), src_dst_e); 499 #endif 500 return(ETRAPDENIED); /* trap denied by mask or kernel */ 501 } 502 503 switch(call_nr) { 504 case SENDREC: 505 /* A flag is set so that notifications cannot interrupt SENDREC. */ 506 caller_ptr->p_misc_flags |= MF_REPLY_PEND; 507 /* fall through */ 508 case SEND: 509 result = mini_send(caller_ptr, src_dst_e, m_ptr, 0); 510 if (call_nr == SEND || result != OK) 511 break; /* done, or SEND failed */ 512 /* fall through for SENDREC */ 513 case RECEIVE: 514 if (call_nr == RECEIVE) { 515 caller_ptr->p_misc_flags &= ~MF_REPLY_PEND; 516 IPC_STATUS_CLEAR(caller_ptr); /* clear IPC status code */ 517 } 518 result = mini_receive(caller_ptr, src_dst_e, m_ptr, 0); 519 break; 520 case NOTIFY: 521 result = mini_notify(caller_ptr, src_dst_e); 522 break; 523 case SENDNB: 524 result = mini_send(caller_ptr, src_dst_e, m_ptr, NON_BLOCKING); 525 break; 526 default: 527 result = EBADCALL; /* illegal system call */ 528 } 529 530 /* Now, return the result of the system call to the caller. */ 531 return(result); 532 } 533 534 int do_ipc(reg_t r1, reg_t r2, reg_t r3) 535 { 536 struct proc *const caller_ptr = get_cpulocal_var(proc_ptr); /* get pointer to caller */ 537 int call_nr = (int) r1; 538 539 assert(!RTS_ISSET(caller_ptr, RTS_SLOT_FREE)); 540 541 /* bill kernel time to this process. */ 542 kbill_ipc = caller_ptr; 543 544 /* If this process is subject to system call tracing, handle that first. */ 545 if (caller_ptr->p_misc_flags & (MF_SC_TRACE | MF_SC_DEFER)) { 546 /* Are we tracing this process, and is it the first sys_call entry? */ 547 if ((caller_ptr->p_misc_flags & (MF_SC_TRACE | MF_SC_DEFER)) == 548 MF_SC_TRACE) { 549 /* We must notify the tracer before processing the actual 550 * system call. If we don't, the tracer could not obtain the 551 * input message. Postpone the entire system call. 552 */ 553 caller_ptr->p_misc_flags &= ~MF_SC_TRACE; 554 assert(!(caller_ptr->p_misc_flags & MF_SC_DEFER)); 555 caller_ptr->p_misc_flags |= MF_SC_DEFER; 556 caller_ptr->p_defer.r1 = r1; 557 caller_ptr->p_defer.r2 = r2; 558 caller_ptr->p_defer.r3 = r3; 559 560 /* Signal the "enter system call" event. Block the process. */ 561 cause_sig(proc_nr(caller_ptr), SIGTRAP); 562 563 /* Preserve the return register's value. */ 564 return caller_ptr->p_reg.retreg; 565 } 566 567 /* If the MF_SC_DEFER flag is set, the syscall is now being resumed. */ 568 caller_ptr->p_misc_flags &= ~MF_SC_DEFER; 569 570 assert (!(caller_ptr->p_misc_flags & MF_SC_ACTIVE)); 571 572 /* Set a flag to allow reliable tracing of leaving the system call. */ 573 caller_ptr->p_misc_flags |= MF_SC_ACTIVE; 574 } 575 576 if(caller_ptr->p_misc_flags & MF_DELIVERMSG) { 577 panic("sys_call: MF_DELIVERMSG on for %s / %d\n", 578 caller_ptr->p_name, caller_ptr->p_endpoint); 579 } 580 581 /* Now check if the call is known and try to perform the request. The only 582 * system calls that exist in MINIX are sending and receiving messages. 583 * - SENDREC: combines SEND and RECEIVE in a single system call 584 * - SEND: sender blocks until its message has been delivered 585 * - RECEIVE: receiver blocks until an acceptable message has arrived 586 * - NOTIFY: asynchronous call; deliver notification or mark pending 587 * - SENDA: list of asynchronous send requests 588 */ 589 switch(call_nr) { 590 case SENDREC: 591 case SEND: 592 case RECEIVE: 593 case NOTIFY: 594 case SENDNB: 595 { 596 /* Process accounting for scheduling */ 597 caller_ptr->p_accounting.ipc_sync++; 598 599 return do_sync_ipc(caller_ptr, call_nr, (endpoint_t) r2, 600 (message *) r3); 601 } 602 case SENDA: 603 { 604 /* 605 * Get and check the size of the argument in bytes as it is a 606 * table 607 */ 608 size_t msg_size = (size_t) r2; 609 610 /* Process accounting for scheduling */ 611 caller_ptr->p_accounting.ipc_async++; 612 613 /* Limit size to something reasonable. An arbitrary choice is 16 614 * times the number of process table entries. 615 */ 616 if (msg_size > 16*(NR_TASKS + NR_PROCS)) 617 return EDOM; 618 return mini_senda(caller_ptr, (asynmsg_t *) r3, msg_size); 619 } 620 case MINIX_KERNINFO: 621 { 622 /* It might not be initialized yet. */ 623 if(!minix_kerninfo_user) { 624 return EBADCALL; 625 } 626 627 arch_set_secondary_ipc_return(caller_ptr, minix_kerninfo_user); 628 return OK; 629 } 630 default: 631 return EBADCALL; /* illegal system call */ 632 } 633 } 634 635 /*===========================================================================* 636 * deadlock * 637 *===========================================================================*/ 638 static int deadlock(function, cp, src_dst_e) 639 int function; /* trap number */ 640 register struct proc *cp; /* pointer to caller */ 641 endpoint_t src_dst_e; /* src or dst process */ 642 { 643 /* Check for deadlock. This can happen if 'caller_ptr' and 'src_dst' have 644 * a cyclic dependency of blocking send and receive calls. The only cyclic 645 * dependency that is not fatal is if the caller and target directly SEND(REC) 646 * and RECEIVE to each other. If a deadlock is found, the group size is 647 * returned. Otherwise zero is returned. 648 */ 649 register struct proc *xp; /* process pointer */ 650 int group_size = 1; /* start with only caller */ 651 #if DEBUG_ENABLE_IPC_WARNINGS 652 static struct proc *processes[NR_PROCS + NR_TASKS]; 653 processes[0] = cp; 654 #endif 655 656 while (src_dst_e != ANY) { /* check while process nr */ 657 int src_dst_slot; 658 okendpt(src_dst_e, &src_dst_slot); 659 xp = proc_addr(src_dst_slot); /* follow chain of processes */ 660 assert(proc_ptr_ok(xp)); 661 assert(!RTS_ISSET(xp, RTS_SLOT_FREE)); 662 #if DEBUG_ENABLE_IPC_WARNINGS 663 processes[group_size] = xp; 664 #endif 665 group_size ++; /* extra process in group */ 666 667 /* Check whether the last process in the chain has a dependency. If it 668 * has not, the cycle cannot be closed and we are done. 669 */ 670 if((src_dst_e = P_BLOCKEDON(xp)) == NONE) 671 return 0; 672 673 /* Now check if there is a cyclic dependency. For group sizes of two, 674 * a combination of SEND(REC) and RECEIVE is not fatal. Larger groups 675 * or other combinations indicate a deadlock. 676 */ 677 if (src_dst_e == cp->p_endpoint) { /* possible deadlock */ 678 if (group_size == 2) { /* caller and src_dst */ 679 /* The function number is magically converted to flags. */ 680 if ((xp->p_rts_flags ^ (function << 2)) & RTS_SENDING) { 681 return(0); /* not a deadlock */ 682 } 683 } 684 #if DEBUG_ENABLE_IPC_WARNINGS 685 { 686 int i; 687 printf("deadlock between these processes:\n"); 688 for(i = 0; i < group_size; i++) { 689 printf(" %10s ", processes[i]->p_name); 690 } 691 printf("\n\n"); 692 for(i = 0; i < group_size; i++) { 693 print_proc(processes[i]); 694 proc_stacktrace(processes[i]); 695 } 696 } 697 #endif 698 return(group_size); /* deadlock found */ 699 } 700 } 701 return(0); /* not a deadlock */ 702 } 703 704 /*===========================================================================* 705 * has_pending * 706 *===========================================================================*/ 707 static int has_pending(sys_map_t *map, int src_p, int asynm) 708 { 709 /* Check to see if there is a pending message from the desired source 710 * available. 711 */ 712 713 int src_id; 714 sys_id_t id = NULL_PRIV_ID; 715 #ifdef CONFIG_SMP 716 struct proc * p; 717 #endif 718 719 /* Either check a specific bit in the mask map, or find the first bit set in 720 * it (if any), depending on whether the receive was called on a specific 721 * source endpoint. 722 */ 723 if (src_p != ANY) { 724 src_id = nr_to_id(src_p); 725 if (get_sys_bit(*map, src_id)) { 726 #ifdef CONFIG_SMP 727 p = proc_addr(id_to_nr(src_id)); 728 if (asynm && RTS_ISSET(p, RTS_VMINHIBIT)) 729 p->p_misc_flags |= MF_SENDA_VM_MISS; 730 else 731 #endif 732 id = src_id; 733 } 734 } else { 735 /* Find a source with a pending message */ 736 for (src_id = 0; src_id < NR_SYS_PROCS; src_id += BITCHUNK_BITS) { 737 if (get_sys_bits(*map, src_id) != 0) { 738 #ifdef CONFIG_SMP 739 while (src_id < NR_SYS_PROCS) { 740 while (!get_sys_bit(*map, src_id)) { 741 if (src_id == NR_SYS_PROCS) 742 goto quit_search; 743 src_id++; 744 } 745 p = proc_addr(id_to_nr(src_id)); 746 /* 747 * We must not let kernel fiddle with pages of a 748 * process which are currently being changed by 749 * VM. It is dangerous! So do not report such a 750 * process as having pending async messages. 751 * Skip it. 752 */ 753 if (asynm && RTS_ISSET(p, RTS_VMINHIBIT)) { 754 p->p_misc_flags |= MF_SENDA_VM_MISS; 755 src_id++; 756 } else 757 goto quit_search; 758 } 759 #else 760 while (!get_sys_bit(*map, src_id)) src_id++; 761 goto quit_search; 762 #endif 763 } 764 } 765 766 quit_search: 767 if (src_id < NR_SYS_PROCS) /* Found one */ 768 id = src_id; 769 } 770 771 return(id); 772 } 773 774 /*===========================================================================* 775 * has_pending_notify * 776 *===========================================================================*/ 777 int has_pending_notify(struct proc * caller, int src_p) 778 { 779 sys_map_t * map = &priv(caller)->s_notify_pending; 780 return has_pending(map, src_p, 0); 781 } 782 783 /*===========================================================================* 784 * has_pending_asend * 785 *===========================================================================*/ 786 int has_pending_asend(struct proc * caller, int src_p) 787 { 788 sys_map_t * map = &priv(caller)->s_asyn_pending; 789 return has_pending(map, src_p, 1); 790 } 791 792 /*===========================================================================* 793 * unset_notify_pending * 794 *===========================================================================*/ 795 void unset_notify_pending(struct proc * caller, int src_p) 796 { 797 sys_map_t * map = &priv(caller)->s_notify_pending; 798 unset_sys_bit(*map, src_p); 799 } 800 801 /*===========================================================================* 802 * mini_send * 803 *===========================================================================*/ 804 int mini_send( 805 register struct proc *caller_ptr, /* who is trying to send a message? */ 806 endpoint_t dst_e, /* to whom is message being sent? */ 807 message *m_ptr, /* pointer to message buffer */ 808 const int flags 809 ) 810 { 811 /* Send a message from 'caller_ptr' to 'dst'. If 'dst' is blocked waiting 812 * for this message, copy the message to it and unblock 'dst'. If 'dst' is 813 * not waiting at all, or is waiting for another source, queue 'caller_ptr'. 814 */ 815 register struct proc *dst_ptr; 816 register struct proc **xpp; 817 int dst_p; 818 dst_p = _ENDPOINT_P(dst_e); 819 dst_ptr = proc_addr(dst_p); 820 821 if (RTS_ISSET(dst_ptr, RTS_NO_ENDPOINT)) 822 { 823 return EDEADSRCDST; 824 } 825 826 /* Check if 'dst' is blocked waiting for this message. The destination's 827 * RTS_SENDING flag may be set when its SENDREC call blocked while sending. 828 */ 829 if (WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint)) { 830 int call; 831 /* Destination is indeed waiting for this message. */ 832 assert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG)); 833 834 if (!(flags & FROM_KERNEL)) { 835 if(copy_msg_from_user(m_ptr, &dst_ptr->p_delivermsg)) 836 return EFAULT; 837 } else { 838 dst_ptr->p_delivermsg = *m_ptr; 839 IPC_STATUS_ADD_FLAGS(dst_ptr, IPC_FLG_MSG_FROM_KERNEL); 840 } 841 842 dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint; 843 dst_ptr->p_misc_flags |= MF_DELIVERMSG; 844 845 call = (caller_ptr->p_misc_flags & MF_REPLY_PEND ? SENDREC 846 : (flags & NON_BLOCKING ? SENDNB : SEND)); 847 IPC_STATUS_ADD_CALL(dst_ptr, call); 848 849 if (dst_ptr->p_misc_flags & MF_REPLY_PEND) 850 dst_ptr->p_misc_flags &= ~MF_REPLY_PEND; 851 852 RTS_UNSET(dst_ptr, RTS_RECEIVING); 853 854 #if DEBUG_IPC_HOOK 855 hook_ipc_msgsend(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr); 856 hook_ipc_msgrecv(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr); 857 #endif 858 } else { 859 if(flags & NON_BLOCKING) { 860 return(ENOTREADY); 861 } 862 863 /* Check for a possible deadlock before actually blocking. */ 864 if (deadlock(SEND, caller_ptr, dst_e)) { 865 return(ELOCKED); 866 } 867 868 /* Destination is not waiting. Block and dequeue caller. */ 869 if (!(flags & FROM_KERNEL)) { 870 if(copy_msg_from_user(m_ptr, &caller_ptr->p_sendmsg)) 871 return EFAULT; 872 } else { 873 caller_ptr->p_sendmsg = *m_ptr; 874 /* 875 * we need to remember that this message is from kernel so we 876 * can set the delivery status flags when the message is 877 * actually delivered 878 */ 879 caller_ptr->p_misc_flags |= MF_SENDING_FROM_KERNEL; 880 } 881 882 RTS_SET(caller_ptr, RTS_SENDING); 883 caller_ptr->p_sendto_e = dst_e; 884 885 /* Process is now blocked. Put in on the destination's queue. */ 886 assert(caller_ptr->p_q_link == NULL); 887 xpp = &dst_ptr->p_caller_q; /* find end of list */ 888 while (*xpp) xpp = &(*xpp)->p_q_link; 889 *xpp = caller_ptr; /* add caller to end */ 890 891 #if DEBUG_IPC_HOOK 892 hook_ipc_msgsend(&caller_ptr->p_sendmsg, caller_ptr, dst_ptr); 893 #endif 894 } 895 return(OK); 896 } 897 898 /*===========================================================================* 899 * mini_receive * 900 *===========================================================================*/ 901 static int mini_receive(struct proc * caller_ptr, 902 endpoint_t src_e, /* which message source is wanted */ 903 message * m_buff_usr, /* pointer to message buffer */ 904 const int flags) 905 { 906 /* A process or task wants to get a message. If a message is already queued, 907 * acquire it and deblock the sender. If no message from the desired source 908 * is available block the caller. 909 */ 910 register struct proc **xpp; 911 int r, src_id, src_proc_nr, src_p; 912 913 assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG)); 914 915 /* This is where we want our message. */ 916 caller_ptr->p_delivermsg_vir = (vir_bytes) m_buff_usr; 917 918 if(src_e == ANY) src_p = ANY; 919 else 920 { 921 okendpt(src_e, &src_p); 922 if (RTS_ISSET(proc_addr(src_p), RTS_NO_ENDPOINT)) 923 { 924 return EDEADSRCDST; 925 } 926 } 927 928 929 /* Check to see if a message from desired source is already available. The 930 * caller's RTS_SENDING flag may be set if SENDREC couldn't send. If it is 931 * set, the process should be blocked. 932 */ 933 if (!RTS_ISSET(caller_ptr, RTS_SENDING)) { 934 935 /* Check if there are pending notifications, except for SENDREC. */ 936 if (! (caller_ptr->p_misc_flags & MF_REPLY_PEND)) { 937 938 /* Check for pending notifications */ 939 if ((src_id = has_pending_notify(caller_ptr, src_p)) != NULL_PRIV_ID) { 940 endpoint_t hisep; 941 942 src_proc_nr = id_to_nr(src_id); /* get source proc */ 943 #if DEBUG_ENABLE_IPC_WARNINGS 944 if(src_proc_nr == NONE) { 945 printf("mini_receive: sending notify from NONE\n"); 946 } 947 #endif 948 assert(src_proc_nr != NONE); 949 unset_notify_pending(caller_ptr, src_id); /* no longer pending */ 950 951 /* Found a suitable source, deliver the notification message. */ 952 hisep = proc_addr(src_proc_nr)->p_endpoint; 953 assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG)); 954 assert(src_e == ANY || hisep == src_e); 955 956 /* assemble message */ 957 BuildNotifyMessage(&caller_ptr->p_delivermsg, src_proc_nr, caller_ptr); 958 caller_ptr->p_delivermsg.m_source = hisep; 959 caller_ptr->p_misc_flags |= MF_DELIVERMSG; 960 961 IPC_STATUS_ADD_CALL(caller_ptr, NOTIFY); 962 963 goto receive_done; 964 } 965 } 966 967 /* Check for pending asynchronous messages */ 968 if (has_pending_asend(caller_ptr, src_p) != NULL_PRIV_ID) { 969 if (src_p != ANY) 970 r = try_one(proc_addr(src_p), caller_ptr); 971 else 972 r = try_async(caller_ptr); 973 974 if (r == OK) { 975 IPC_STATUS_ADD_CALL(caller_ptr, SENDA); 976 goto receive_done; 977 } 978 } 979 980 /* Check caller queue. Use pointer pointers to keep code simple. */ 981 xpp = &caller_ptr->p_caller_q; 982 while (*xpp) { 983 struct proc * sender = *xpp; 984 985 if (src_e == ANY || src_p == proc_nr(sender)) { 986 int call; 987 assert(!RTS_ISSET(sender, RTS_SLOT_FREE)); 988 assert(!RTS_ISSET(sender, RTS_NO_ENDPOINT)); 989 990 /* Found acceptable message. Copy it and update status. */ 991 assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG)); 992 caller_ptr->p_delivermsg = sender->p_sendmsg; 993 caller_ptr->p_delivermsg.m_source = sender->p_endpoint; 994 caller_ptr->p_misc_flags |= MF_DELIVERMSG; 995 RTS_UNSET(sender, RTS_SENDING); 996 997 call = (sender->p_misc_flags & MF_REPLY_PEND ? SENDREC : SEND); 998 IPC_STATUS_ADD_CALL(caller_ptr, call); 999 1000 /* 1001 * if the message is originally from the kernel on behalf of this 1002 * process, we must send the status flags accordingly 1003 */ 1004 if (sender->p_misc_flags & MF_SENDING_FROM_KERNEL) { 1005 IPC_STATUS_ADD_FLAGS(caller_ptr, IPC_FLG_MSG_FROM_KERNEL); 1006 /* we can clean the flag now, not need anymore */ 1007 sender->p_misc_flags &= ~MF_SENDING_FROM_KERNEL; 1008 } 1009 if (sender->p_misc_flags & MF_SIG_DELAY) 1010 sig_delay_done(sender); 1011 1012 #if DEBUG_IPC_HOOK 1013 hook_ipc_msgrecv(&caller_ptr->p_delivermsg, *xpp, caller_ptr); 1014 #endif 1015 1016 *xpp = sender->p_q_link; /* remove from queue */ 1017 sender->p_q_link = NULL; 1018 goto receive_done; 1019 } 1020 xpp = &sender->p_q_link; /* proceed to next */ 1021 } 1022 } 1023 1024 /* No suitable message is available or the caller couldn't send in SENDREC. 1025 * Block the process trying to receive, unless the flags tell otherwise. 1026 */ 1027 if ( ! (flags & NON_BLOCKING)) { 1028 /* Check for a possible deadlock before actually blocking. */ 1029 if (deadlock(RECEIVE, caller_ptr, src_e)) { 1030 return(ELOCKED); 1031 } 1032 1033 caller_ptr->p_getfrom_e = src_e; 1034 RTS_SET(caller_ptr, RTS_RECEIVING); 1035 return(OK); 1036 } else { 1037 return(ENOTREADY); 1038 } 1039 1040 receive_done: 1041 if (caller_ptr->p_misc_flags & MF_REPLY_PEND) 1042 caller_ptr->p_misc_flags &= ~MF_REPLY_PEND; 1043 return OK; 1044 } 1045 1046 /*===========================================================================* 1047 * mini_notify * 1048 *===========================================================================*/ 1049 int mini_notify( 1050 const struct proc *caller_ptr, /* sender of the notification */ 1051 endpoint_t dst_e /* which process to notify */ 1052 ) 1053 { 1054 register struct proc *dst_ptr; 1055 int src_id; /* source id for late delivery */ 1056 int dst_p; 1057 1058 if (!isokendpt(dst_e, &dst_p)) { 1059 util_stacktrace(); 1060 printf("mini_notify: bogus endpoint %d\n", dst_e); 1061 return EDEADSRCDST; 1062 } 1063 1064 dst_ptr = proc_addr(dst_p); 1065 1066 /* Check to see if target is blocked waiting for this message. A process 1067 * can be both sending and receiving during a SENDREC system call. 1068 */ 1069 if (WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint) && 1070 ! (dst_ptr->p_misc_flags & MF_REPLY_PEND)) { 1071 /* Destination is indeed waiting for a message. Assemble a notification 1072 * message and deliver it. Copy from pseudo-source HARDWARE, since the 1073 * message is in the kernel's address space. 1074 */ 1075 assert(!(dst_ptr->p_misc_flags & MF_DELIVERMSG)); 1076 1077 BuildNotifyMessage(&dst_ptr->p_delivermsg, proc_nr(caller_ptr), dst_ptr); 1078 dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint; 1079 dst_ptr->p_misc_flags |= MF_DELIVERMSG; 1080 1081 IPC_STATUS_ADD_CALL(dst_ptr, NOTIFY); 1082 RTS_UNSET(dst_ptr, RTS_RECEIVING); 1083 1084 return(OK); 1085 } 1086 1087 /* Destination is not ready to receive the notification. Add it to the 1088 * bit map with pending notifications. Note the indirectness: the privilege id 1089 * instead of the process number is used in the pending bit map. 1090 */ 1091 src_id = priv(caller_ptr)->s_id; 1092 set_sys_bit(priv(dst_ptr)->s_notify_pending, src_id); 1093 return(OK); 1094 } 1095 1096 #define ASCOMPLAIN(caller, entry, field) \ 1097 printf("kernel:%s:%d: asyn failed for %s in %s " \ 1098 "(%d/%zu, tab 0x%lx)\n",__FILE__,__LINE__, \ 1099 field, caller->p_name, entry, priv(caller)->s_asynsize, priv(caller)->s_asyntab) 1100 1101 #define A_RETR_FLD(entry, field) \ 1102 if(data_copy(caller_ptr->p_endpoint, \ 1103 table_v + (entry)*sizeof(asynmsg_t) + offsetof(struct asynmsg,field),\ 1104 KERNEL, (vir_bytes) &tabent.field, \ 1105 sizeof(tabent.field)) != OK) {\ 1106 ASCOMPLAIN(caller_ptr, entry, #field); \ 1107 r = EFAULT; \ 1108 goto asyn_error; \ 1109 } 1110 1111 #define A_RETR(entry) do { \ 1112 if (data_copy( \ 1113 caller_ptr->p_endpoint, table_v + (entry)*sizeof(asynmsg_t),\ 1114 KERNEL, (vir_bytes) &tabent, \ 1115 sizeof(tabent)) != OK) { \ 1116 ASCOMPLAIN(caller_ptr, entry, "message entry"); \ 1117 r = EFAULT; \ 1118 goto asyn_error; \ 1119 } \ 1120 } while(0) 1121 1122 #define A_INSRT_FLD(entry, field) \ 1123 if(data_copy(KERNEL, (vir_bytes) &tabent.field, \ 1124 caller_ptr->p_endpoint, \ 1125 table_v + (entry)*sizeof(asynmsg_t) + offsetof(struct asynmsg,field),\ 1126 sizeof(tabent.field)) != OK) {\ 1127 ASCOMPLAIN(caller_ptr, entry, #field); \ 1128 r = EFAULT; \ 1129 goto asyn_error; \ 1130 } 1131 1132 #define A_INSRT(entry) do { \ 1133 if (data_copy(KERNEL, (vir_bytes) &tabent, \ 1134 caller_ptr->p_endpoint, table_v + (entry)*sizeof(asynmsg_t),\ 1135 sizeof(tabent)) != OK) { \ 1136 ASCOMPLAIN(caller_ptr, entry, "message entry"); \ 1137 r = EFAULT; \ 1138 goto asyn_error; \ 1139 } \ 1140 } while(0) 1141 1142 /*===========================================================================* 1143 * try_deliver_senda * 1144 *===========================================================================*/ 1145 int try_deliver_senda(struct proc *caller_ptr, 1146 asynmsg_t *table, 1147 size_t size) 1148 { 1149 int r, dst_p, done, do_notify; 1150 unsigned int i; 1151 unsigned flags; 1152 endpoint_t dst; 1153 struct proc *dst_ptr; 1154 struct priv *privp; 1155 asynmsg_t tabent; 1156 const vir_bytes table_v = (vir_bytes) table; 1157 1158 privp = priv(caller_ptr); 1159 1160 /* Clear table */ 1161 privp->s_asyntab = -1; 1162 privp->s_asynsize = 0; 1163 1164 if (size == 0) return(OK); /* Nothing to do, just return */ 1165 1166 /* Scan the table */ 1167 do_notify = FALSE; 1168 done = TRUE; 1169 1170 /* Limit size to something reasonable. An arbitrary choice is 16 1171 * times the number of process table entries. 1172 * 1173 * (this check has been duplicated in sys_call but is left here 1174 * as a sanity check) 1175 */ 1176 if (size > 16*(NR_TASKS + NR_PROCS)) { 1177 r = EDOM; 1178 return r; 1179 } 1180 1181 for (i = 0; i < size; i++) { 1182 /* Process each entry in the table and store the result in the table. 1183 * If we're done handling a message, copy the result to the sender. */ 1184 1185 dst = NONE; 1186 /* Copy message to kernel */ 1187 A_RETR(i); 1188 flags = tabent.flags; 1189 dst = tabent.dst; 1190 1191 if (flags == 0) continue; /* Skip empty entries */ 1192 1193 /* 'flags' field must contain only valid bits */ 1194 if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR)) { 1195 r = EINVAL; 1196 goto asyn_error; 1197 } 1198 if (!(flags & AMF_VALID)) { /* Must contain message */ 1199 r = EINVAL; 1200 goto asyn_error; 1201 } 1202 if (flags & AMF_DONE) continue; /* Already done processing */ 1203 1204 r = OK; 1205 if (!isokendpt(tabent.dst, &dst_p)) 1206 r = EDEADSRCDST; /* Bad destination, report the error */ 1207 else if (iskerneln(dst_p)) 1208 r = ECALLDENIED; /* Asyn sends to the kernel are not allowed */ 1209 else if (!may_send_to(caller_ptr, dst_p)) 1210 r = ECALLDENIED; /* Send denied by IPC mask */ 1211 else /* r == OK */ 1212 dst_ptr = proc_addr(dst_p); 1213 1214 /* XXX: RTS_NO_ENDPOINT should be removed */ 1215 if (r == OK && RTS_ISSET(dst_ptr, RTS_NO_ENDPOINT)) { 1216 r = EDEADSRCDST; 1217 } 1218 1219 /* Check if 'dst' is blocked waiting for this message. 1220 * If AMF_NOREPLY is set, do not satisfy the receiving part of 1221 * a SENDREC. 1222 */ 1223 if (r == OK && WILLRECEIVE(dst_ptr, caller_ptr->p_endpoint) && 1224 (!(flags&AMF_NOREPLY) || !(dst_ptr->p_misc_flags&MF_REPLY_PEND))) { 1225 /* Destination is indeed waiting for this message. */ 1226 dst_ptr->p_delivermsg = tabent.msg; 1227 dst_ptr->p_delivermsg.m_source = caller_ptr->p_endpoint; 1228 dst_ptr->p_misc_flags |= MF_DELIVERMSG; 1229 IPC_STATUS_ADD_CALL(dst_ptr, SENDA); 1230 RTS_UNSET(dst_ptr, RTS_RECEIVING); 1231 #if DEBUG_IPC_HOOK 1232 hook_ipc_msgrecv(&dst_ptr->p_delivermsg, caller_ptr, dst_ptr); 1233 #endif 1234 } else if (r == OK) { 1235 /* Inform receiver that something is pending */ 1236 set_sys_bit(priv(dst_ptr)->s_asyn_pending, 1237 priv(caller_ptr)->s_id); 1238 done = FALSE; 1239 continue; 1240 } 1241 1242 /* Store results */ 1243 tabent.result = r; 1244 tabent.flags = flags | AMF_DONE; 1245 if (flags & AMF_NOTIFY) 1246 do_notify = TRUE; 1247 else if (r != OK && (flags & AMF_NOTIFY_ERR)) 1248 do_notify = TRUE; 1249 A_INSRT(i); /* Copy results to caller */ 1250 continue; 1251 1252 asyn_error: 1253 if (dst != NONE) 1254 printf("KERNEL senda error %d to %d\n", r, dst); 1255 else 1256 printf("KERNEL senda error %d\n", r); 1257 } 1258 1259 if (do_notify) 1260 mini_notify(proc_addr(ASYNCM), caller_ptr->p_endpoint); 1261 1262 if (!done) { 1263 privp->s_asyntab = (vir_bytes) table; 1264 privp->s_asynsize = size; 1265 } 1266 1267 return(OK); 1268 } 1269 1270 /*===========================================================================* 1271 * mini_senda * 1272 *===========================================================================*/ 1273 static int mini_senda(struct proc *caller_ptr, asynmsg_t *table, size_t size) 1274 { 1275 struct priv *privp; 1276 1277 privp = priv(caller_ptr); 1278 if (!(privp->s_flags & SYS_PROC)) { 1279 printf( "mini_senda: warning caller has no privilege structure\n"); 1280 return(EPERM); 1281 } 1282 1283 return try_deliver_senda(caller_ptr, table, size); 1284 } 1285 1286 1287 /*===========================================================================* 1288 * try_async * 1289 *===========================================================================*/ 1290 static int try_async(caller_ptr) 1291 struct proc *caller_ptr; 1292 { 1293 int r; 1294 struct priv *privp; 1295 struct proc *src_ptr; 1296 sys_map_t *map; 1297 1298 map = &priv(caller_ptr)->s_asyn_pending; 1299 1300 /* Try all privilege structures */ 1301 for (privp = BEG_PRIV_ADDR; privp < END_PRIV_ADDR; ++privp) { 1302 if (privp->s_proc_nr == NONE) 1303 continue; 1304 1305 if (!get_sys_bit(*map, privp->s_id)) 1306 continue; 1307 1308 src_ptr = proc_addr(privp->s_proc_nr); 1309 1310 #ifdef CONFIG_SMP 1311 /* 1312 * Do not copy from a process which does not have a stable address space 1313 * due to VM fiddling with it 1314 */ 1315 if (RTS_ISSET(src_ptr, RTS_VMINHIBIT)) { 1316 src_ptr->p_misc_flags |= MF_SENDA_VM_MISS; 1317 continue; 1318 } 1319 #endif 1320 1321 assert(!(caller_ptr->p_misc_flags & MF_DELIVERMSG)); 1322 if ((r = try_one(src_ptr, caller_ptr)) == OK) 1323 return(r); 1324 } 1325 1326 return(ESRCH); 1327 } 1328 1329 1330 /*===========================================================================* 1331 * try_one * 1332 *===========================================================================*/ 1333 static int try_one(struct proc *src_ptr, struct proc *dst_ptr) 1334 { 1335 /* Try to receive an asynchronous message from 'src_ptr' */ 1336 int r = EAGAIN, done, do_notify; 1337 unsigned int flags, i; 1338 size_t size; 1339 endpoint_t dst; 1340 struct proc *caller_ptr; 1341 struct priv *privp; 1342 asynmsg_t tabent; 1343 vir_bytes table_v; 1344 1345 privp = priv(src_ptr); 1346 if (!(privp->s_flags & SYS_PROC)) return(EPERM); 1347 size = privp->s_asynsize; 1348 table_v = privp->s_asyntab; 1349 1350 /* Clear table pending message flag. We're done unless we're not. */ 1351 unset_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id); 1352 1353 if (size == 0) return(EAGAIN); 1354 if (!may_send_to(src_ptr, proc_nr(dst_ptr))) return(ECALLDENIED); 1355 1356 caller_ptr = src_ptr; /* Needed for A_ macros later on */ 1357 1358 /* Scan the table */ 1359 do_notify = FALSE; 1360 done = TRUE; 1361 1362 for (i = 0; i < size; i++) { 1363 /* Process each entry in the table and store the result in the table. 1364 * If we're done handling a message, copy the result to the sender. 1365 * Some checks done in mini_senda are duplicated here, as the sender 1366 * could've altered the contents of the table in the meantime. 1367 */ 1368 1369 /* Copy message to kernel */ 1370 A_RETR(i); 1371 flags = tabent.flags; 1372 dst = tabent.dst; 1373 1374 if (flags == 0) continue; /* Skip empty entries */ 1375 1376 /* 'flags' field must contain only valid bits */ 1377 if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR)) 1378 r = EINVAL; 1379 else if (!(flags & AMF_VALID)) /* Must contain message */ 1380 r = EINVAL; 1381 else if (flags & AMF_DONE) continue; /* Already done processing */ 1382 1383 /* Clear done flag. The sender is done sending when all messages in the 1384 * table are marked done or empty. However, we will know that only 1385 * the next time we enter this function or when the sender decides to 1386 * send additional asynchronous messages and manages to deliver them 1387 * all. 1388 */ 1389 done = FALSE; 1390 1391 if (r == EINVAL) 1392 goto store_result; 1393 1394 /* Message must be directed at receiving end */ 1395 if (dst != dst_ptr->p_endpoint) continue; 1396 1397 /* If AMF_NOREPLY is set, then this message is not a reply to a 1398 * SENDREC and thus should not satisfy the receiving part of the 1399 * SENDREC. This message is to be delivered later. 1400 */ 1401 if ((flags & AMF_NOREPLY) && (dst_ptr->p_misc_flags & MF_REPLY_PEND)) 1402 continue; 1403 1404 /* Destination is ready to receive the message; deliver it */ 1405 r = OK; 1406 dst_ptr->p_delivermsg = tabent.msg; 1407 dst_ptr->p_delivermsg.m_source = src_ptr->p_endpoint; 1408 dst_ptr->p_misc_flags |= MF_DELIVERMSG; 1409 #if DEBUG_IPC_HOOK 1410 hook_ipc_msgrecv(&dst_ptr->p_delivermsg, src_ptr, dst_ptr); 1411 #endif 1412 1413 store_result: 1414 /* Store results for sender */ 1415 tabent.result = r; 1416 tabent.flags = flags | AMF_DONE; 1417 if (flags & AMF_NOTIFY) do_notify = TRUE; 1418 else if (r != OK && (flags & AMF_NOTIFY_ERR)) do_notify = TRUE; 1419 A_INSRT(i); /* Copy results to sender */ 1420 1421 break; 1422 } 1423 1424 if (do_notify) 1425 mini_notify(proc_addr(ASYNCM), src_ptr->p_endpoint); 1426 1427 if (done) { 1428 privp->s_asyntab = -1; 1429 privp->s_asynsize = 0; 1430 } else { 1431 set_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id); 1432 } 1433 1434 asyn_error: 1435 return(r); 1436 } 1437 1438 /*===========================================================================* 1439 * cancel_async * 1440 *===========================================================================*/ 1441 int cancel_async(struct proc *src_ptr, struct proc *dst_ptr) 1442 { 1443 /* Cancel asynchronous messages from src to dst, because dst is not interested 1444 * in them (e.g., dst has been restarted) */ 1445 int done, do_notify; 1446 unsigned int flags, i; 1447 size_t size; 1448 endpoint_t dst; 1449 struct proc *caller_ptr; 1450 struct priv *privp; 1451 asynmsg_t tabent; 1452 vir_bytes table_v; 1453 1454 privp = priv(src_ptr); 1455 if (!(privp->s_flags & SYS_PROC)) return(EPERM); 1456 size = privp->s_asynsize; 1457 table_v = privp->s_asyntab; 1458 1459 /* Clear table pending message flag. We're done unless we're not. */ 1460 privp->s_asyntab = -1; 1461 privp->s_asynsize = 0; 1462 unset_sys_bit(priv(dst_ptr)->s_asyn_pending, privp->s_id); 1463 1464 if (size == 0) return(EAGAIN); 1465 if (!may_send_to(src_ptr, proc_nr(dst_ptr))) return(ECALLDENIED); 1466 1467 caller_ptr = src_ptr; /* Needed for A_ macros later on */ 1468 1469 /* Scan the table */ 1470 do_notify = FALSE; 1471 done = TRUE; 1472 1473 1474 for (i = 0; i < size; i++) { 1475 /* Process each entry in the table and store the result in the table. 1476 * If we're done handling a message, copy the result to the sender. 1477 * Some checks done in mini_senda are duplicated here, as the sender 1478 * could've altered the contents of the table in the mean time. 1479 */ 1480 1481 int r = EDEADSRCDST; /* Cancel delivery due to dead dst */ 1482 1483 /* Copy message to kernel */ 1484 A_RETR(i); 1485 flags = tabent.flags; 1486 dst = tabent.dst; 1487 1488 if (flags == 0) continue; /* Skip empty entries */ 1489 1490 /* 'flags' field must contain only valid bits */ 1491 if(flags & ~(AMF_VALID|AMF_DONE|AMF_NOTIFY|AMF_NOREPLY|AMF_NOTIFY_ERR)) 1492 r = EINVAL; 1493 else if (!(flags & AMF_VALID)) /* Must contain message */ 1494 r = EINVAL; 1495 else if (flags & AMF_DONE) continue; /* Already done processing */ 1496 1497 /* Message must be directed at receiving end */ 1498 if (dst != dst_ptr->p_endpoint) { 1499 done = FALSE; 1500 continue; 1501 } 1502 1503 /* Store results for sender */ 1504 tabent.result = r; 1505 tabent.flags = flags | AMF_DONE; 1506 if (flags & AMF_NOTIFY) do_notify = TRUE; 1507 else if (r != OK && (flags & AMF_NOTIFY_ERR)) do_notify = TRUE; 1508 A_INSRT(i); /* Copy results to sender */ 1509 } 1510 1511 if (do_notify) 1512 mini_notify(proc_addr(ASYNCM), src_ptr->p_endpoint); 1513 1514 if (!done) { 1515 privp->s_asyntab = table_v; 1516 privp->s_asynsize = size; 1517 } 1518 1519 asyn_error: 1520 return(OK); 1521 } 1522 1523 /*===========================================================================* 1524 * enqueue * 1525 *===========================================================================*/ 1526 void enqueue( 1527 register struct proc *rp /* this process is now runnable */ 1528 ) 1529 { 1530 /* Add 'rp' to one of the queues of runnable processes. This function is 1531 * responsible for inserting a process into one of the scheduling queues. 1532 * The mechanism is implemented here. The actual scheduling policy is 1533 * defined in sched() and pick_proc(). 1534 * 1535 * This function can be used x-cpu as it always uses the queues of the cpu the 1536 * process is assigned to. 1537 */ 1538 int q = rp->p_priority; /* scheduling queue to use */ 1539 struct proc **rdy_head, **rdy_tail; 1540 1541 assert(proc_is_runnable(rp)); 1542 1543 assert(q >= 0); 1544 1545 rdy_head = get_cpu_var(rp->p_cpu, run_q_head); 1546 rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail); 1547 1548 /* Now add the process to the queue. */ 1549 if (!rdy_head[q]) { /* add to empty queue */ 1550 rdy_head[q] = rdy_tail[q] = rp; /* create a new queue */ 1551 rp->p_nextready = NULL; /* mark new end */ 1552 } 1553 else { /* add to tail of queue */ 1554 rdy_tail[q]->p_nextready = rp; /* chain tail of queue */ 1555 rdy_tail[q] = rp; /* set new queue tail */ 1556 rp->p_nextready = NULL; /* mark new end */ 1557 } 1558 1559 if (cpuid == rp->p_cpu) { 1560 /* 1561 * enqueueing a process with a higher priority than the current one, 1562 * it gets preempted. The current process must be preemptible. Testing 1563 * the priority also makes sure that a process does not preempt itself 1564 */ 1565 struct proc * p; 1566 p = get_cpulocal_var(proc_ptr); 1567 assert(p); 1568 if((p->p_priority > rp->p_priority) && 1569 (priv(p)->s_flags & PREEMPTIBLE)) 1570 RTS_SET(p, RTS_PREEMPTED); /* calls dequeue() */ 1571 } 1572 #ifdef CONFIG_SMP 1573 /* 1574 * if the process was enqueued on a different cpu and the cpu is idle, i.e. 1575 * the time is off, we need to wake up that cpu and let it schedule this new 1576 * process 1577 */ 1578 else if (get_cpu_var(rp->p_cpu, cpu_is_idle)) { 1579 smp_schedule(rp->p_cpu); 1580 } 1581 #endif 1582 1583 /* Make note of when this process was added to queue */ 1584 read_tsc_64(&(get_cpulocal_var(proc_ptr)->p_accounting.enter_queue)); 1585 1586 1587 #if DEBUG_SANITYCHECKS 1588 assert(runqueues_ok_local()); 1589 #endif 1590 } 1591 1592 /*===========================================================================* 1593 * enqueue_head * 1594 *===========================================================================*/ 1595 /* 1596 * put a process at the front of its run queue. It comes handy when a process is 1597 * preempted and removed from run queue to not to have a currently not-runnable 1598 * process on a run queue. We have to put this process back at the fron to be 1599 * fair 1600 */ 1601 static void enqueue_head(struct proc *rp) 1602 { 1603 const int q = rp->p_priority; /* scheduling queue to use */ 1604 1605 struct proc **rdy_head, **rdy_tail; 1606 1607 assert(proc_ptr_ok(rp)); 1608 assert(proc_is_runnable(rp)); 1609 1610 /* 1611 * the process was runnable without its quantum expired when dequeued. A 1612 * process with no time left should have been handled else and differently 1613 */ 1614 assert(rp->p_cpu_time_left); 1615 1616 assert(q >= 0); 1617 1618 1619 rdy_head = get_cpu_var(rp->p_cpu, run_q_head); 1620 rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail); 1621 1622 /* Now add the process to the queue. */ 1623 if (!rdy_head[q]) { /* add to empty queue */ 1624 rdy_head[q] = rdy_tail[q] = rp; /* create a new queue */ 1625 rp->p_nextready = NULL; /* mark new end */ 1626 } else { /* add to head of queue */ 1627 rp->p_nextready = rdy_head[q]; /* chain head of queue */ 1628 rdy_head[q] = rp; /* set new queue head */ 1629 } 1630 1631 /* Make note of when this process was added to queue */ 1632 read_tsc_64(&(get_cpulocal_var(proc_ptr->p_accounting.enter_queue))); 1633 1634 1635 /* Process accounting for scheduling */ 1636 rp->p_accounting.dequeues--; 1637 rp->p_accounting.preempted++; 1638 1639 #if DEBUG_SANITYCHECKS 1640 assert(runqueues_ok_local()); 1641 #endif 1642 } 1643 1644 /*===========================================================================* 1645 * dequeue * 1646 *===========================================================================*/ 1647 void dequeue(struct proc *rp) 1648 /* this process is no longer runnable */ 1649 { 1650 /* A process must be removed from the scheduling queues, for example, because 1651 * it has blocked. If the currently active process is removed, a new process 1652 * is picked to run by calling pick_proc(). 1653 * 1654 * This function can operate x-cpu as it always removes the process from the 1655 * queue of the cpu the process is currently assigned to. 1656 */ 1657 int q = rp->p_priority; /* queue to use */ 1658 struct proc **xpp; /* iterate over queue */ 1659 struct proc *prev_xp; 1660 u64_t tsc, tsc_delta; 1661 1662 struct proc **rdy_tail; 1663 1664 assert(proc_ptr_ok(rp)); 1665 assert(!proc_is_runnable(rp)); 1666 1667 /* Side-effect for kernel: check if the task's stack still is ok? */ 1668 assert (!iskernelp(rp) || *priv(rp)->s_stack_guard == STACK_GUARD); 1669 1670 rdy_tail = get_cpu_var(rp->p_cpu, run_q_tail); 1671 1672 /* Now make sure that the process is not in its ready queue. Remove the 1673 * process if it is found. A process can be made unready even if it is not 1674 * running by being sent a signal that kills it. 1675 */ 1676 prev_xp = NULL; 1677 for (xpp = get_cpu_var_ptr(rp->p_cpu, run_q_head[q]); *xpp; 1678 xpp = &(*xpp)->p_nextready) { 1679 if (*xpp == rp) { /* found process to remove */ 1680 *xpp = (*xpp)->p_nextready; /* replace with next chain */ 1681 if (rp == rdy_tail[q]) { /* queue tail removed */ 1682 rdy_tail[q] = prev_xp; /* set new tail */ 1683 } 1684 1685 break; 1686 } 1687 prev_xp = *xpp; /* save previous in chain */ 1688 } 1689 1690 1691 /* Process accounting for scheduling */ 1692 rp->p_accounting.dequeues++; 1693 1694 /* this is not all that accurate on virtual machines, especially with 1695 IO bound processes that only spend a short amount of time in the queue 1696 at a time. */ 1697 if (rp->p_accounting.enter_queue) { 1698 read_tsc_64(&tsc); 1699 tsc_delta = tsc - rp->p_accounting.enter_queue; 1700 rp->p_accounting.time_in_queue = rp->p_accounting.time_in_queue + 1701 tsc_delta; 1702 rp->p_accounting.enter_queue = 0; 1703 } 1704 1705 1706 #if DEBUG_SANITYCHECKS 1707 assert(runqueues_ok_local()); 1708 #endif 1709 } 1710 1711 /*===========================================================================* 1712 * pick_proc * 1713 *===========================================================================*/ 1714 static struct proc * pick_proc(void) 1715 { 1716 /* Decide who to run now. A new process is selected an returned. 1717 * When a billable process is selected, record it in 'bill_ptr', so that the 1718 * clock task can tell who to bill for system time. 1719 * 1720 * This function always uses the run queues of the local cpu! 1721 */ 1722 register struct proc *rp; /* process to run */ 1723 struct proc **rdy_head; 1724 int q; /* iterate over queues */ 1725 1726 /* Check each of the scheduling queues for ready processes. The number of 1727 * queues is defined in proc.h, and priorities are set in the task table. 1728 * If there are no processes ready to run, return NULL. 1729 */ 1730 rdy_head = get_cpulocal_var(run_q_head); 1731 for (q=0; q < NR_SCHED_QUEUES; q++) { 1732 if(!(rp = rdy_head[q])) { 1733 TRACE(VF_PICKPROC, printf("cpu %d queue %d empty\n", cpuid, q);); 1734 continue; 1735 } 1736 assert(proc_is_runnable(rp)); 1737 if (priv(rp)->s_flags & BILLABLE) 1738 get_cpulocal_var(bill_ptr) = rp; /* bill for system time */ 1739 return rp; 1740 } 1741 return NULL; 1742 } 1743 1744 /*===========================================================================* 1745 * endpoint_lookup * 1746 *===========================================================================*/ 1747 struct proc *endpoint_lookup(endpoint_t e) 1748 { 1749 int n; 1750 1751 if(!isokendpt(e, &n)) return NULL; 1752 1753 return proc_addr(n); 1754 } 1755 1756 /*===========================================================================* 1757 * isokendpt_f * 1758 *===========================================================================*/ 1759 #if DEBUG_ENABLE_IPC_WARNINGS 1760 int isokendpt_f(file, line, e, p, fatalflag) 1761 const char *file; 1762 int line; 1763 #else 1764 int isokendpt_f(e, p, fatalflag) 1765 #endif 1766 endpoint_t e; 1767 int *p; 1768 const int fatalflag; 1769 { 1770 int ok = 0; 1771 /* Convert an endpoint number into a process number. 1772 * Return nonzero if the process is alive with the corresponding 1773 * generation number, zero otherwise. 1774 * 1775 * This function is called with file and line number by the 1776 * isokendpt_d macro if DEBUG_ENABLE_IPC_WARNINGS is defined, 1777 * otherwise without. This allows us to print the where the 1778 * conversion was attempted, making the errors verbose without 1779 * adding code for that at every call. 1780 * 1781 * If fatalflag is nonzero, we must panic if the conversion doesn't 1782 * succeed. 1783 */ 1784 *p = _ENDPOINT_P(e); 1785 ok = 0; 1786 if(isokprocn(*p) && !isemptyn(*p) && proc_addr(*p)->p_endpoint == e) 1787 ok = 1; 1788 if(!ok && fatalflag) 1789 panic("invalid endpoint: %d", e); 1790 return ok; 1791 } 1792 1793 static void notify_scheduler(struct proc *p) 1794 { 1795 message m_no_quantum; 1796 int err; 1797 1798 assert(!proc_kernel_scheduler(p)); 1799 1800 /* dequeue the process */ 1801 RTS_SET(p, RTS_NO_QUANTUM); 1802 /* 1803 * Notify the process's scheduler that it has run out of 1804 * quantum. This is done by sending a message to the scheduler 1805 * on the process's behalf 1806 */ 1807 m_no_quantum.m_source = p->p_endpoint; 1808 m_no_quantum.m_type = SCHEDULING_NO_QUANTUM; 1809 m_no_quantum.m_krn_lsys_schedule.acnt_queue = cpu_time_2_ms(p->p_accounting.time_in_queue); 1810 m_no_quantum.m_krn_lsys_schedule.acnt_deqs = p->p_accounting.dequeues; 1811 m_no_quantum.m_krn_lsys_schedule.acnt_ipc_sync = p->p_accounting.ipc_sync; 1812 m_no_quantum.m_krn_lsys_schedule.acnt_ipc_async = p->p_accounting.ipc_async; 1813 m_no_quantum.m_krn_lsys_schedule.acnt_preempt = p->p_accounting.preempted; 1814 m_no_quantum.m_krn_lsys_schedule.acnt_cpu = cpuid; 1815 m_no_quantum.m_krn_lsys_schedule.acnt_cpu_load = cpu_load(); 1816 1817 /* Reset accounting */ 1818 reset_proc_accounting(p); 1819 1820 if ((err = mini_send(p, p->p_scheduler->p_endpoint, 1821 &m_no_quantum, FROM_KERNEL))) { 1822 panic("WARNING: Scheduling: mini_send returned %d\n", err); 1823 } 1824 } 1825 1826 void proc_no_time(struct proc * p) 1827 { 1828 if (!proc_kernel_scheduler(p) && priv(p)->s_flags & PREEMPTIBLE) { 1829 /* this dequeues the process */ 1830 notify_scheduler(p); 1831 } 1832 else { 1833 /* 1834 * non-preemptible processes only need their quantum to 1835 * be renewed. In fact, they by pass scheduling 1836 */ 1837 p->p_cpu_time_left = ms_2_cpu_time(p->p_quantum_size_ms); 1838 #if DEBUG_RACE 1839 RTS_SET(p, RTS_PREEMPTED); 1840 RTS_UNSET(p, RTS_PREEMPTED); 1841 #endif 1842 } 1843 } 1844 1845 void reset_proc_accounting(struct proc *p) 1846 { 1847 p->p_accounting.preempted = 0; 1848 p->p_accounting.ipc_sync = 0; 1849 p->p_accounting.ipc_async = 0; 1850 p->p_accounting.dequeues = 0; 1851 p->p_accounting.time_in_queue = 0; 1852 p->p_accounting.enter_queue = 0; 1853 } 1854 1855 void copr_not_available_handler(void) 1856 { 1857 struct proc * p; 1858 struct proc ** local_fpu_owner; 1859 /* 1860 * Disable the FPU exception (both for the kernel and for the process 1861 * once it's scheduled), and initialize or restore the FPU state. 1862 */ 1863 1864 disable_fpu_exception(); 1865 1866 p = get_cpulocal_var(proc_ptr); 1867 1868 /* if FPU is not owned by anyone, do not store anything */ 1869 local_fpu_owner = get_cpulocal_var_ptr(fpu_owner); 1870 if (*local_fpu_owner != NULL) { 1871 assert(*local_fpu_owner != p); 1872 save_local_fpu(*local_fpu_owner, FALSE /*retain*/); 1873 } 1874 1875 /* 1876 * restore the current process' state and let it run again, do not 1877 * schedule! 1878 */ 1879 if (restore_fpu(p) != OK) { 1880 /* Restoring FPU state failed. This is always the process's own 1881 * fault. Send a signal, and schedule another process instead. 1882 */ 1883 *local_fpu_owner = NULL; /* release FPU */ 1884 cause_sig(proc_nr(p), SIGFPE); 1885 return; 1886 } 1887 1888 *local_fpu_owner = p; 1889 context_stop(proc_addr(KERNEL)); 1890 restore_user_context(p); 1891 NOT_REACHABLE; 1892 } 1893 1894 void release_fpu(struct proc * p) { 1895 struct proc ** fpu_owner_ptr; 1896 1897 fpu_owner_ptr = get_cpu_var_ptr(p->p_cpu, fpu_owner); 1898 1899 if (*fpu_owner_ptr == p) 1900 *fpu_owner_ptr = NULL; 1901 } 1902 1903 void ser_dump_proc() 1904 { 1905 struct proc *pp; 1906 1907 for (pp= BEG_PROC_ADDR; pp < END_PROC_ADDR; pp++) 1908 { 1909 if (isemptyp(pp)) 1910 continue; 1911 print_proc_recursive(pp); 1912 } 1913 } 1914 1915 void increase_proc_signals(struct proc *p) 1916 { 1917 p->p_signal_received++; 1918 } 1919