1 /* 2 * Changes: 3 * Jan 22, 2010: Created (Cristiano Giuffrida) 4 */ 5 6 #include "inc.h" 7 8 #include "kernel/proc.h" 9 10 static int check_request(struct rs_start *rs_start); 11 12 /*===========================================================================* 13 * do_up * 14 *===========================================================================*/ 15 int do_up(m_ptr) 16 message *m_ptr; /* request message pointer */ 17 { 18 /* A request was made to start a new system service. */ 19 struct rproc *rp; 20 struct rprocpub *rpub; 21 int r; 22 struct rs_start rs_start; 23 int noblock; 24 25 /* Check if the call can be allowed. */ 26 if((r = check_call_permission(m_ptr->m_source, RS_UP, NULL)) != OK) 27 return r; 28 29 /* Allocate a new system service slot. */ 30 r = alloc_slot(&rp); 31 if(r != OK) { 32 printf("RS: do_up: unable to allocate a new slot: %d\n", r); 33 return r; 34 } 35 rpub = rp->r_pub; 36 37 /* Copy the request structure. */ 38 r = copy_rs_start(m_ptr->m_source, m_ptr->m_rs_req.addr, &rs_start); 39 if (r != OK) { 40 return r; 41 } 42 r = check_request(&rs_start); 43 if (r != OK) { 44 return r; 45 } 46 noblock = (rs_start.rss_flags & RSS_NOBLOCK); 47 48 /* Initialize the slot as requested. */ 49 r = init_slot(rp, &rs_start, m_ptr->m_source); 50 if(r != OK) { 51 printf("RS: do_up: unable to init the new slot: %d\n", r); 52 return r; 53 } 54 55 /* Check for duplicates */ 56 if(lookup_slot_by_label(rpub->label)) { 57 printf("RS: service with the same label '%s' already exists\n", 58 rpub->label); 59 return EBUSY; 60 } 61 if(rpub->dev_nr>0 && lookup_slot_by_dev_nr(rpub->dev_nr)) { 62 printf("RS: service with the same device number %d already exists\n", 63 rpub->dev_nr); 64 return EBUSY; 65 } 66 67 /* All information was gathered. Now try to start the system service. */ 68 r = start_service(rp); 69 if(r != OK) { 70 return r; 71 } 72 73 /* Unblock the caller immediately if requested. */ 74 if(noblock) { 75 return OK; 76 } 77 78 /* Late reply - send a reply when service completes initialization. */ 79 rp->r_flags |= RS_LATEREPLY; 80 rp->r_caller = m_ptr->m_source; 81 rp->r_caller_request = RS_UP; 82 83 return EDONTREPLY; 84 } 85 86 /*===========================================================================* 87 * do_down * 88 *===========================================================================*/ 89 int do_down(message *m_ptr) 90 { 91 register struct rproc *rp; 92 int s; 93 char label[RS_MAX_LABEL_LEN]; 94 95 /* Copy label. */ 96 s = copy_label(m_ptr->m_source, m_ptr->m_rs_req.addr, 97 m_ptr->m_rs_req.len, label, sizeof(label)); 98 if(s != OK) { 99 return s; 100 } 101 102 /* Lookup slot by label. */ 103 rp = lookup_slot_by_label(label); 104 if(!rp) { 105 if(rs_verbose) 106 printf("RS: do_down: service '%s' not found\n", label); 107 return(ESRCH); 108 } 109 110 /* Check if the call can be allowed. */ 111 if((s = check_call_permission(m_ptr->m_source, RS_DOWN, rp)) != OK) 112 return s; 113 114 /* Stop service. */ 115 if (rp->r_flags & RS_TERMINATED) { 116 /* A recovery script is requesting us to bring down the service. 117 * The service is already gone, simply perform cleanup. 118 */ 119 if(rs_verbose) 120 printf("RS: recovery script performs service down...\n"); 121 unpublish_service(rp); 122 cleanup_service(rp); 123 return(OK); 124 } 125 stop_service(rp,RS_EXITING); 126 127 /* Late reply - send a reply when service dies. */ 128 rp->r_flags |= RS_LATEREPLY; 129 rp->r_caller = m_ptr->m_source; 130 rp->r_caller_request = RS_DOWN; 131 132 return EDONTREPLY; 133 } 134 135 /*===========================================================================* 136 * do_restart * 137 *===========================================================================*/ 138 int do_restart(message *m_ptr) 139 { 140 struct rproc *rp; 141 int s, r; 142 char label[RS_MAX_LABEL_LEN]; 143 char script[MAX_SCRIPT_LEN]; 144 145 /* Copy label. */ 146 s = copy_label(m_ptr->m_source, m_ptr->m_rs_req.addr, 147 m_ptr->m_rs_req.len, label, sizeof(label)); 148 if(s != OK) { 149 return s; 150 } 151 152 /* Lookup slot by label. */ 153 rp = lookup_slot_by_label(label); 154 if(!rp) { 155 if(rs_verbose) 156 printf("RS: do_restart: service '%s' not found\n", label); 157 return(ESRCH); 158 } 159 160 /* Check if the call can be allowed. */ 161 if((r = check_call_permission(m_ptr->m_source, RS_RESTART, rp)) != OK) 162 return r; 163 164 /* We can only be asked to restart a service from a recovery script. */ 165 if (! (rp->r_flags & RS_TERMINATED) ) { 166 if(rs_verbose) 167 printf("RS: %s is still running\n", srv_to_string(rp)); 168 return EBUSY; 169 } 170 171 if(rs_verbose) 172 printf("RS: recovery script performs service restart...\n"); 173 174 /* Restart the service, but make sure we don't call the script again. */ 175 strcpy(script, rp->r_script); 176 rp->r_script[0] = '\0'; 177 restart_service(rp); 178 strcpy(rp->r_script, script); 179 180 return OK; 181 } 182 183 /*===========================================================================* 184 * do_clone * 185 *===========================================================================*/ 186 int do_clone(message *m_ptr) 187 { 188 struct rproc *rp; 189 struct rprocpub *rpub; 190 int s, r; 191 char label[RS_MAX_LABEL_LEN]; 192 193 /* Copy label. */ 194 s = copy_label(m_ptr->m_source, m_ptr->m_rs_req.addr, 195 m_ptr->m_rs_req.len, label, sizeof(label)); 196 if(s != OK) { 197 return s; 198 } 199 200 /* Lookup slot by label. */ 201 rp = lookup_slot_by_label(label); 202 if(!rp) { 203 if(rs_verbose) 204 printf("RS: do_clone: service '%s' not found\n", label); 205 return(ESRCH); 206 } 207 rpub = rp->r_pub; 208 209 /* Check if the call can be allowed. */ 210 if((r = check_call_permission(m_ptr->m_source, RS_CLONE, rp)) != OK) 211 return r; 212 213 /* Don't clone if a replica is already available. */ 214 if(rp->r_next_rp) { 215 return EEXIST; 216 } 217 218 /* Clone the service as requested. */ 219 rpub->sys_flags |= SF_USE_REPL; 220 if ((r = clone_service(rp, RST_SYS_PROC)) != OK) { 221 rpub->sys_flags &= ~SF_USE_REPL; 222 return r; 223 } 224 225 return OK; 226 } 227 228 /*===========================================================================* 229 * do_edit * 230 *===========================================================================*/ 231 int do_edit(message *m_ptr) 232 { 233 struct rproc *rp; 234 struct rprocpub *rpub; 235 struct rs_start rs_start; 236 int r; 237 char label[RS_MAX_LABEL_LEN]; 238 239 /* Copy the request structure. */ 240 r = copy_rs_start(m_ptr->m_source, m_ptr->m_rs_req.addr, &rs_start); 241 if (r != OK) { 242 return r; 243 } 244 245 /* Copy label. */ 246 r = copy_label(m_ptr->m_source, rs_start.rss_label.l_addr, 247 rs_start.rss_label.l_len, label, sizeof(label)); 248 if(r != OK) { 249 return r; 250 } 251 252 /* Lookup slot by label. */ 253 rp = lookup_slot_by_label(label); 254 if(!rp) { 255 if(rs_verbose) 256 printf("RS: do_edit: service '%s' not found\n", label); 257 return ESRCH; 258 } 259 rpub = rp->r_pub; 260 261 /* Check if the call can be allowed. */ 262 if((r = check_call_permission(m_ptr->m_source, RS_EDIT, rp)) != OK) 263 return r; 264 265 if(rs_verbose) 266 printf("RS: %s edits settings\n", srv_to_string(rp)); 267 268 /* Synch the privilege structure with the kernel. */ 269 if ((r = sys_getpriv(&rp->r_priv, rpub->endpoint)) != OK) { 270 printf("RS: do_edit: unable to synch privilege structure: %d\n", r); 271 return r; 272 } 273 274 /* Tell scheduler this process is finished */ 275 if ((r = sched_stop(rp->r_scheduler, rpub->endpoint)) != OK) { 276 printf("RS: do_edit: scheduler won't give up process: %d\n", r); 277 return r; 278 } 279 280 /* Edit the slot as requested. */ 281 if((r = edit_slot(rp, &rs_start, m_ptr->m_source)) != OK) { 282 printf("RS: do_edit: unable to edit the existing slot: %d\n", r); 283 return r; 284 } 285 286 /* Update privilege structure. */ 287 r = sys_privctl(rpub->endpoint, SYS_PRIV_UPDATE_SYS, &rp->r_priv); 288 if(r != OK) { 289 printf("RS: do_edit: unable to update privilege structure: %d\n", r); 290 return r; 291 } 292 293 /* Update VM calls. */ 294 if ((r = vm_set_priv(rpub->endpoint, &rpub->vm_call_mask[0], 295 !!(rp->r_priv.s_flags & SYS_PROC))) != OK) { 296 printf("RS: do_edit: failed: %d\n", r); 297 return r; 298 } 299 300 /* Reinitialize scheduling. */ 301 if ((r = sched_init_proc(rp)) != OK) { 302 printf("RS: do_edit: unable to reinitialize scheduling: %d\n", r); 303 return r; 304 } 305 306 /* Cleanup old replicas and create a new one, if necessary. */ 307 if(rpub->sys_flags & SF_USE_REPL) { 308 if(rp->r_next_rp) { 309 cleanup_service(rp->r_next_rp); 310 rp->r_next_rp = NULL; 311 } 312 if ((r = clone_service(rp, RST_SYS_PROC)) != OK) { 313 printf("RS: warning: unable to clone %s\n", srv_to_string(rp)); 314 } 315 } 316 317 return OK; 318 } 319 320 /*===========================================================================* 321 * do_refresh * 322 *===========================================================================*/ 323 int do_refresh(message *m_ptr) 324 { 325 register struct rproc *rp; 326 int s; 327 char label[RS_MAX_LABEL_LEN]; 328 329 /* Copy label. */ 330 s = copy_label(m_ptr->m_source, m_ptr->m_rs_req.addr, 331 m_ptr->m_rs_req.len, label, sizeof(label)); 332 if(s != OK) { 333 return s; 334 } 335 336 /* Lookup slot by label. */ 337 rp = lookup_slot_by_label(label); 338 if(!rp) { 339 if(rs_verbose) 340 printf("RS: do_refresh: service '%s' not found\n", label); 341 return(ESRCH); 342 } 343 344 /* Check if the call can be allowed. */ 345 if((s = check_call_permission(m_ptr->m_source, RS_REFRESH, rp)) != OK) 346 return s; 347 348 /* Refresh service. */ 349 if(rs_verbose) 350 printf("RS: %s refreshing\n", srv_to_string(rp)); 351 stop_service(rp,RS_REFRESHING); 352 353 return OK; 354 } 355 356 /*===========================================================================* 357 * do_shutdown * 358 *===========================================================================*/ 359 int do_shutdown(message *m_ptr) 360 { 361 int slot_nr; 362 struct rproc *rp; 363 int r; 364 365 /* Check if the call can be allowed. */ 366 if (m_ptr != NULL) { 367 if((r = check_call_permission(m_ptr->m_source, RS_SHUTDOWN, NULL)) != OK) 368 return r; 369 } 370 371 if(rs_verbose) 372 printf("RS: shutting down...\n"); 373 374 /* Set flag to tell RS we are shutting down. */ 375 shutting_down = TRUE; 376 377 /* Don't restart dead services. */ 378 for (slot_nr = 0; slot_nr < NR_SYS_PROCS; slot_nr++) { 379 rp = &rproc[slot_nr]; 380 if (rp->r_flags & RS_IN_USE) { 381 rp->r_flags |= RS_EXITING; 382 } 383 } 384 return(OK); 385 } 386 387 /*===========================================================================* 388 * do_init_ready * 389 *===========================================================================*/ 390 int do_init_ready(message *m_ptr) 391 { 392 int who_p; 393 message m; 394 struct rproc *rp; 395 struct rprocpub *rpub; 396 int result, is_rs; 397 int r; 398 399 is_rs = (m_ptr->m_source == RS_PROC_NR); 400 who_p = _ENDPOINT_P(m_ptr->m_source); 401 result = m_ptr->m_rs_init.result; 402 403 /* Check for RS failing initialization first. */ 404 if(is_rs && result != OK) { 405 return result; 406 } 407 408 rp = rproc_ptr[who_p]; 409 rpub = rp->r_pub; 410 411 /* Make sure the originating service was requested to initialize. */ 412 if(! (rp->r_flags & RS_INITIALIZING) ) { 413 if(rs_verbose) 414 printf("RS: do_init_ready: got unexpected init ready msg from %d\n", 415 m_ptr->m_source); 416 return EINVAL; 417 } 418 419 /* Check if something went wrong and the service failed to init. 420 * In that case, kill the service. 421 */ 422 if(result != OK) { 423 if(rs_verbose) 424 printf("RS: %s initialization error: %s\n", srv_to_string(rp), 425 init_strerror(result)); 426 if (result == ERESTART) 427 rp->r_flags |= RS_REINCARNATE; 428 crash_service(rp); /* simulate crash */ 429 return EDONTREPLY; 430 } 431 432 /* Mark the slot as no longer initializing. */ 433 rp->r_flags &= ~RS_INITIALIZING; 434 rp->r_check_tm = 0; 435 getticks(&rp->r_alive_tm); 436 437 /* Reply and unblock the service before doing anything else. */ 438 m.m_type = OK; 439 reply(rpub->endpoint, rp, &m); 440 441 /* See if a late reply has to be sent. */ 442 late_reply(rp, OK); 443 444 if(rs_verbose) 445 printf("RS: %s initialized\n", srv_to_string(rp)); 446 447 /* If the service has completed initialization after a live 448 * update, end the update now. 449 */ 450 if(rp->r_flags & RS_UPDATING) { 451 printf("RS: update succeeded\n"); 452 end_update(OK, RS_DONTREPLY); 453 } 454 455 /* If the service has completed initialization after a crash 456 * make the new instance active and cleanup the old replica. 457 */ 458 if(rp->r_prev_rp) { 459 cleanup_service(rp->r_prev_rp); 460 rp->r_prev_rp = NULL; 461 rp->r_restarts += 1; 462 463 if(rs_verbose) 464 printf("RS: %s completed restart\n", srv_to_string(rp)); 465 } 466 467 /* If we must keep a replica of this system service, create it now. */ 468 if(rpub->sys_flags & SF_USE_REPL) { 469 if ((r = clone_service(rp, RST_SYS_PROC)) != OK) { 470 printf("RS: warning: unable to clone %s\n", srv_to_string(rp)); 471 } 472 } 473 474 return is_rs ? OK : EDONTREPLY; /* return what the caller expects */ 475 } 476 477 /*===========================================================================* 478 * do_update * 479 *===========================================================================*/ 480 int do_update(message *m_ptr) 481 { 482 struct rproc *rp; 483 struct rproc *new_rp; 484 struct rprocpub *rpub; 485 struct rs_start rs_start; 486 int noblock, do_self_update; 487 int s; 488 char label[RS_MAX_LABEL_LEN]; 489 int lu_state; 490 int prepare_maxtime; 491 492 /* Copy the request structure. */ 493 s = copy_rs_start(m_ptr->m_source, m_ptr->m_rs_req.addr, &rs_start); 494 if (s != OK) { 495 return s; 496 } 497 noblock = (rs_start.rss_flags & RSS_NOBLOCK); 498 do_self_update = (rs_start.rss_flags & RSS_SELF_LU); 499 s = check_request(&rs_start); 500 if (s != OK) { 501 return s; 502 } 503 504 /* Copy label. */ 505 s = copy_label(m_ptr->m_source, rs_start.rss_label.l_addr, 506 rs_start.rss_label.l_len, label, sizeof(label)); 507 if(s != OK) { 508 return s; 509 } 510 511 /* Lookup slot by label. */ 512 rp = lookup_slot_by_label(label); 513 if(!rp) { 514 if(rs_verbose) 515 printf("RS: do_update: service '%s' not found\n", label); 516 return ESRCH; 517 } 518 rpub = rp->r_pub; 519 520 /* Check if the call can be allowed. */ 521 if((s = check_call_permission(m_ptr->m_source, RS_UPDATE, rp)) != OK) 522 return s; 523 524 /* Retrieve live update state. */ 525 lu_state = m_ptr->m_rs_update.state; 526 if(lu_state == SEF_LU_STATE_NULL) { 527 return(EINVAL); 528 } 529 530 /* Retrieve prepare max time. */ 531 prepare_maxtime = m_ptr->m_rs_update.prepare_maxtime; 532 if(prepare_maxtime) { 533 if(prepare_maxtime < 0 || prepare_maxtime > RS_MAX_PREPARE_MAXTIME) { 534 return(EINVAL); 535 } 536 } 537 else { 538 prepare_maxtime = RS_DEFAULT_PREPARE_MAXTIME; 539 } 540 541 /* Make sure we are not already updating. */ 542 if(rupdate.flags & RS_UPDATING) { 543 if(rs_verbose) 544 printf("RS: do_update: an update is already in progress\n"); 545 return EBUSY; 546 } 547 548 /* A self update live updates a service instance into a replica, a regular 549 * update live updates a service instance into a new version, as specified 550 * by the given binary. 551 */ 552 if(do_self_update) { 553 if(rs_verbose) 554 printf("RS: %s performs self update\n", srv_to_string(rp)); 555 556 /* Clone the system service and use the replica as the new version. */ 557 s = clone_service(rp, LU_SYS_PROC); 558 if(s != OK) { 559 printf("RS: do_update: unable to clone service: %d\n", s); 560 return s; 561 } 562 } 563 else { 564 if(rs_verbose) 565 printf("RS: %s performs regular update\n", srv_to_string(rp)); 566 567 /* Allocate a system service slot for the new version. */ 568 s = alloc_slot(&new_rp); 569 if(s != OK) { 570 printf("RS: do_update: unable to allocate a new slot: %d\n", s); 571 return s; 572 } 573 574 /* Initialize the slot as requested. */ 575 s = init_slot(new_rp, &rs_start, m_ptr->m_source); 576 if(s != OK) { 577 printf("RS: do_update: unable to init the new slot: %d\n", s); 578 return s; 579 } 580 581 /* Let the new version inherit defaults from the old one. */ 582 inherit_service_defaults(rp, new_rp); 583 584 /* Link the two versions. */ 585 rp->r_new_rp = new_rp; 586 new_rp->r_old_rp = rp; 587 588 /* Create new version of the service but don't let it run. */ 589 new_rp->r_priv.s_flags |= LU_SYS_PROC; 590 s = create_service(new_rp); 591 if(s != OK) { 592 printf("RS: do_update: unable to create a new service: %d\n", s); 593 return s; 594 } 595 } 596 597 /* Mark both versions as updating. */ 598 rp->r_flags |= RS_UPDATING; 599 rp->r_new_rp->r_flags |= RS_UPDATING; 600 rupdate.flags |= RS_UPDATING; 601 getticks(&rupdate.prepare_tm); 602 rupdate.prepare_maxtime = prepare_maxtime; 603 rupdate.rp = rp; 604 605 if(rs_verbose) 606 printf("RS: %s updating\n", srv_to_string(rp)); 607 608 /* If RS is updating, set up signal managers for the new instance. 609 * The current RS instance must be made the backup signal manager to 610 * support rollback in case of a crash during initialization. 611 */ 612 if(rp->r_priv.s_flags & ROOT_SYS_PROC) { 613 new_rp = rp->r_new_rp; 614 615 s = update_sig_mgrs(new_rp, SELF, new_rp->r_pub->endpoint); 616 if(s != OK) { 617 cleanup_service(new_rp); 618 return s; 619 } 620 } 621 622 if(noblock) { 623 /* Unblock the caller immediately if requested. */ 624 m_ptr->m_type = OK; 625 reply(m_ptr->m_source, NULL, m_ptr); 626 } 627 else { 628 /* Send a reply when the new version completes initialization. */ 629 rp->r_flags |= RS_LATEREPLY; 630 rp->r_caller = m_ptr->m_source; 631 rp->r_caller_request = RS_UPDATE; 632 } 633 634 /* Request to update. */ 635 m_ptr->m_type = RS_LU_PREPARE; 636 if(rpub->endpoint == RS_PROC_NR) { 637 /* RS can process the request directly. */ 638 do_sef_lu_request(m_ptr); 639 } 640 else { 641 /* Send request message to the system service. */ 642 asynsend3(rpub->endpoint, m_ptr, AMF_NOREPLY); 643 } 644 645 return EDONTREPLY; 646 } 647 648 /*===========================================================================* 649 * do_upd_ready * 650 *===========================================================================*/ 651 int do_upd_ready(message *m_ptr) 652 { 653 struct rproc *rp, *old_rp, *new_rp; 654 int who_p; 655 int result; 656 int is_rs; 657 int r; 658 659 who_p = _ENDPOINT_P(m_ptr->m_source); 660 rp = rproc_ptr[who_p]; 661 result = m_ptr->m_rs_update.result; 662 is_rs = (m_ptr->m_source == RS_PROC_NR); 663 664 /* Make sure the originating service was requested to prepare for update. */ 665 if(rp != rupdate.rp) { 666 if(rs_verbose) 667 printf("RS: do_upd_ready: got unexpected update ready msg from %d\n", 668 m_ptr->m_source); 669 return EINVAL; 670 } 671 672 /* Check if something went wrong and the service failed to prepare 673 * for the update. In that case, end the update process. The old version will 674 * be replied to and continue executing. 675 */ 676 if(result != OK) { 677 end_update(result, RS_REPLY); 678 679 printf("RS: update failed: %s\n", lu_strerror(result)); 680 return is_rs ? result : EDONTREPLY; /* return what the caller expects */ 681 } 682 683 old_rp = rp; 684 new_rp = rp->r_new_rp; 685 686 /* If RS itself is updating, yield control to the new version immediately. */ 687 if(is_rs) { 688 r = init_service(new_rp, SEF_INIT_LU); 689 if(r != OK) { 690 panic("unable to initialize the new RS instance: %d", r); 691 } 692 r = sys_privctl(new_rp->r_pub->endpoint, SYS_PRIV_YIELD, NULL); 693 if(r != OK) { 694 panic("unable to yield control to the new RS instance: %d", r); 695 } 696 /* If we get this far, the new version failed to initialize. Rollback. */ 697 r = srv_update(RS_PROC_NR, new_rp->r_pub->endpoint); 698 assert(r == OK); /* can't fail */ 699 end_update(ERESTART, RS_REPLY); 700 return ERESTART; 701 } 702 703 /* Perform the update. */ 704 r = update_service(&old_rp, &new_rp, RS_SWAP); 705 if(r != OK) { 706 end_update(r, RS_REPLY); 707 printf("RS: update failed: error %d\n", r); 708 return EDONTREPLY; 709 } 710 711 /* Let the new version run. */ 712 r = run_service(new_rp, SEF_INIT_LU); 713 if(r != OK) { 714 /* Something went wrong. Rollback. */ 715 r = update_service(&new_rp, &old_rp, RS_SWAP); 716 assert(r == OK); /* can't fail */ 717 end_update(r, RS_REPLY); 718 printf("RS: update failed: error %d\n", r); 719 return EDONTREPLY; 720 } 721 722 return EDONTREPLY; 723 } 724 725 /*===========================================================================* 726 * do_period * 727 *===========================================================================*/ 728 void do_period(m_ptr) 729 message *m_ptr; 730 { 731 register struct rproc *rp; 732 register struct rprocpub *rpub; 733 clock_t now = m_ptr->m_notify.timestamp; 734 int s; 735 long period; 736 737 /* If an update is in progress, check its status. */ 738 if(rupdate.flags & RS_UPDATING) { 739 update_period(m_ptr); 740 } 741 742 /* Search system services table. Only check slots that are in use and not 743 * updating. 744 */ 745 for (rp=BEG_RPROC_ADDR; rp<END_RPROC_ADDR; rp++) { 746 rpub = rp->r_pub; 747 if ((rp->r_flags & RS_ACTIVE) && !(rp->r_flags & RS_UPDATING)) { 748 749 /* Compute period. */ 750 period = rp->r_period; 751 if(rp->r_flags & RS_INITIALIZING) { 752 period = RS_INIT_T; 753 } 754 755 /* If the service is to be revived (because it repeatedly exited, 756 * and was not directly restarted), the binary backoff field is 757 * greater than zero. 758 */ 759 if (rp->r_backoff > 0) { 760 rp->r_backoff -= 1; 761 if (rp->r_backoff == 0) { 762 restart_service(rp); 763 } 764 } 765 766 /* If the service was signaled with a SIGTERM and fails to respond, 767 * kill the system service with a SIGKILL signal. 768 */ 769 else if (rp->r_stop_tm > 0 && now - rp->r_stop_tm > 2*RS_DELTA_T 770 && rp->r_pid > 0) { 771 rp->r_stop_tm = 0; 772 crash_service(rp); /* simulate crash */ 773 } 774 775 /* There seems to be no special conditions. If the service has a 776 * period assigned check its status. 777 */ 778 else if (period > 0) { 779 780 /* Check if an answer to a status request is still pending. If 781 * the service didn't respond within time, kill it to simulate 782 * a crash. The failure will be detected and the service will 783 * be restarted automatically. Give the service a free pass if 784 * somebody is initializing. There may be some weird dependencies 785 * if another service is, for example, restarting at the same 786 * time. 787 */ 788 if (rp->r_alive_tm < rp->r_check_tm) { 789 if (now - rp->r_alive_tm > 2*period && 790 rp->r_pid > 0 && !(rp->r_flags & RS_NOPINGREPLY)) { 791 if(rs_verbose) 792 printf("RS: %s reported late\n", srv_to_string(rp)); 793 if(lookup_slot_by_flags(RS_INITIALIZING)) { 794 /* Skip for now. */ 795 if(rs_verbose) 796 printf("RS: %s gets a free pass\n", 797 srv_to_string(rp)); 798 rp->r_alive_tm = now; 799 rp->r_check_tm = now+1; 800 continue; 801 } 802 rp->r_flags |= RS_NOPINGREPLY; 803 crash_service(rp); /* simulate crash */ 804 } 805 } 806 807 /* No answer pending. Check if a period expired since the last 808 * check and, if so request the system service's status. 809 */ 810 else if (now - rp->r_check_tm > rp->r_period) { 811 ipc_notify(rpub->endpoint); /* request status */ 812 rp->r_check_tm = now; /* mark time */ 813 } 814 } 815 } 816 } 817 818 /* Reschedule a synchronous alarm for the next period. */ 819 if (OK != (s=sys_setalarm(RS_DELTA_T, 0))) 820 panic("couldn't set alarm: %d", s); 821 } 822 823 /*===========================================================================* 824 * do_sigchld * 825 *===========================================================================*/ 826 void do_sigchld() 827 { 828 /* PM informed us that there are dead children to cleanup. Go get them. */ 829 pid_t pid; 830 int status; 831 struct rproc *rp; 832 struct rproc **rps; 833 int i, nr_rps; 834 835 if(rs_verbose) 836 printf("RS: got SIGCHLD signal, cleaning up dead children\n"); 837 838 while ( (pid = waitpid(-1, &status, WNOHANG)) != 0 ) { 839 rp = lookup_slot_by_pid(pid); 840 if(rp != NULL) { 841 842 if(rs_verbose) 843 printf("RS: %s exited via another signal manager\n", 844 srv_to_string(rp)); 845 846 /* The slot is still there. This means RS is not the signal 847 * manager assigned to the process. Ignore the event but 848 * free slots for all the service instances and send a late 849 * reply if necessary. 850 */ 851 get_service_instances(rp, &rps, &nr_rps); 852 for(i=0;i<nr_rps;i++) { 853 if(rupdate.flags & RS_UPDATING) { 854 rupdate.flags &= ~RS_UPDATING; 855 } 856 free_slot(rps[i]); 857 } 858 } 859 } 860 } 861 862 /*===========================================================================* 863 * do_getsysinfo * 864 *===========================================================================*/ 865 int do_getsysinfo(m_ptr) 866 message *m_ptr; 867 { 868 vir_bytes src_addr, dst_addr; 869 int dst_proc; 870 size_t len; 871 int s; 872 873 /* Check if the call can be allowed. */ 874 if((s = check_call_permission(m_ptr->m_source, 0, NULL)) != OK) 875 return s; 876 877 switch(m_ptr->m_lsys_getsysinfo.what) { 878 case SI_PROC_TAB: 879 src_addr = (vir_bytes) rproc; 880 len = sizeof(struct rproc) * NR_SYS_PROCS; 881 break; 882 case SI_PROCPUB_TAB: 883 src_addr = (vir_bytes) rprocpub; 884 len = sizeof(struct rprocpub) * NR_SYS_PROCS; 885 break; 886 default: 887 return(EINVAL); 888 } 889 890 if (len != m_ptr->m_lsys_getsysinfo.size) 891 return(EINVAL); 892 893 dst_proc = m_ptr->m_source; 894 dst_addr = m_ptr->m_lsys_getsysinfo.where; 895 return sys_datacopy(SELF, src_addr, dst_proc, dst_addr, len); 896 } 897 898 /*===========================================================================* 899 * do_lookup * 900 *===========================================================================*/ 901 int do_lookup(m_ptr) 902 message *m_ptr; 903 { 904 static char namebuf[100]; 905 int len, r; 906 struct rproc *rrp; 907 struct rprocpub *rrpub; 908 909 len = m_ptr->m_rs_req.name_len; 910 911 if(len < 2 || len >= sizeof(namebuf)) { 912 printf("RS: len too weird (%d)\n", len); 913 return EINVAL; 914 } 915 916 if((r=sys_datacopy(m_ptr->m_source, (vir_bytes) m_ptr->m_rs_req.name, 917 SELF, (vir_bytes) namebuf, len)) != OK) { 918 printf("RS: name copy failed\n"); 919 return r; 920 921 } 922 923 namebuf[len] = '\0'; 924 925 rrp = lookup_slot_by_label(namebuf); 926 if(!rrp) { 927 return ESRCH; 928 } 929 rrpub = rrp->r_pub; 930 m_ptr->m_rs_req.endpoint = rrpub->endpoint; 931 932 return OK; 933 } 934 935 /*===========================================================================* 936 * check_request * 937 *===========================================================================*/ 938 static int check_request(struct rs_start *rs_start) 939 { 940 /* Verify scheduling parameters */ 941 if (rs_start->rss_scheduler != KERNEL && 942 (rs_start->rss_scheduler < 0 || 943 rs_start->rss_scheduler > LAST_SPECIAL_PROC_NR)) { 944 printf("RS: check_request: invalid scheduler %d\n", 945 rs_start->rss_scheduler); 946 return EINVAL; 947 } 948 if (rs_start->rss_priority >= NR_SCHED_QUEUES) { 949 printf("RS: check_request: priority %u out of range\n", 950 rs_start->rss_priority); 951 return EINVAL; 952 } 953 if (rs_start->rss_quantum <= 0) { 954 printf("RS: check_request: quantum %u out of range\n", 955 rs_start->rss_quantum); 956 return EINVAL; 957 } 958 959 if (rs_start->rss_cpu == RS_CPU_BSP) 960 rs_start->rss_cpu = machine.bsp_id; 961 else if (rs_start->rss_cpu == RS_CPU_DEFAULT) { 962 /* keep the default value */ 963 } else if (rs_start->rss_cpu < 0) 964 return EINVAL; 965 else if (rs_start->rss_cpu > machine.processors_count) { 966 printf("RS: cpu number %d out of range 0-%d, using BSP\n", 967 rs_start->rss_cpu, machine.processors_count); 968 rs_start->rss_cpu = machine.bsp_id; 969 } 970 971 /* Verify signal manager. */ 972 if (rs_start->rss_sigmgr != SELF && 973 (rs_start->rss_sigmgr < 0 || 974 rs_start->rss_sigmgr > LAST_SPECIAL_PROC_NR)) { 975 printf("RS: check_request: invalid signal manager %d\n", 976 rs_start->rss_sigmgr); 977 return EINVAL; 978 } 979 980 return OK; 981 } 982 983