1 /* $OpenBSD: job.c,v 1.136 2015/07/28 14:22:26 espie Exp $ */ 2 /* $NetBSD: job.c,v 1.16 1996/11/06 17:59:08 christos Exp $ */ 3 4 /* 5 * Copyright (c) 2012 Marc Espie. 6 * 7 * Extensive code modifications for the OpenBSD project. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBSD 22 * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 /* 31 * Copyright (c) 1988, 1989, 1990 The Regents of the University of California. 32 * Copyright (c) 1988, 1989 by Adam de Boor 33 * Copyright (c) 1989 by Berkeley Softworks 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Adam de Boor. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 /*- 65 * job.c -- 66 * handle the creation etc. of our child processes. 67 * 68 * Interface: 69 * Job_Make Start the creation of the given target. 70 * 71 * Job_Init Called to initialize this module. 72 * 73 * Job_Begin execute commands attached to the .BEGIN target 74 * if any. 75 * 76 * can_start_job Return true if we can start job 77 * 78 * Job_Empty Return true if the job table is completely 79 * empty. 80 * 81 * Job_Finish Perform any final processing which needs doing. 82 * This includes the execution of any commands 83 * which have been/were attached to the .END 84 * target. 85 * 86 * Job_AbortAll Abort all current jobs. It doesn't 87 * handle output or do anything for the jobs, 88 * just kills them. 89 * 90 * Job_Wait Wait for all running jobs to finish. 91 */ 92 93 #include <sys/types.h> 94 #include <sys/wait.h> 95 #include <ctype.h> 96 #include <errno.h> 97 #include <fcntl.h> 98 #include <signal.h> 99 #include <stdarg.h> 100 #include <stdio.h> 101 #include <stdlib.h> 102 #include <string.h> 103 #include <unistd.h> 104 #include "config.h" 105 #include "defines.h" 106 #include "job.h" 107 #include "engine.h" 108 #include "pathnames.h" 109 #include "var.h" 110 #include "targ.h" 111 #include "error.h" 112 #include "extern.h" 113 #include "lst.h" 114 #include "gnode.h" 115 #include "memory.h" 116 #include "make.h" 117 #include "buf.h" 118 119 static int aborting = 0; /* why is the make aborting? */ 120 #define ABORT_ERROR 1 /* Because of an error */ 121 #define ABORT_INTERRUPT 2 /* Because it was interrupted */ 122 #define ABORT_WAIT 3 /* Waiting for jobs to finish */ 123 124 static int maxJobs; /* The most children we can run at once */ 125 static int nJobs; /* Number of jobs already allocated */ 126 static bool no_new_jobs; /* Mark recursive shit so we shouldn't start 127 * something else at the same time 128 */ 129 Job *runningJobs; /* Jobs currently running a process */ 130 Job *errorJobs; /* Jobs in error at end */ 131 static Job *heldJobs; /* Jobs not running yet because of expensive */ 132 static pid_t mypid; /* Used for printing debugging messages */ 133 134 static volatile sig_atomic_t got_fatal; 135 136 static volatile sig_atomic_t got_SIGINT, got_SIGHUP, got_SIGQUIT, got_SIGTERM, 137 got_SIGINFO; 138 139 static sigset_t sigset, emptyset; 140 141 static void handle_fatal_signal(int); 142 static void handle_siginfo(void); 143 static void postprocess_job(Job *, bool); 144 static Job *prepare_job(GNode *); 145 static void determine_job_next_step(Job *); 146 static void remove_job(Job *, bool); 147 static void may_continue_job(Job *); 148 static void continue_job(Job *); 149 static Job *reap_finished_job(pid_t); 150 static bool reap_jobs(void); 151 152 static void loop_handle_running_jobs(void); 153 static bool expensive_job(Job *); 154 static bool expensive_command(const char *); 155 static void setup_signal(int); 156 static void notice_signal(int); 157 static void setup_all_signals(void); 158 static const char *really_kill(Job *, int); 159 static void kill_with_doas_maybe(pid_t, int, const char *); 160 static void debug_kill_printf(const char *, ...); 161 static void debug_vprintf(const char *, va_list); 162 static void may_remove_target(Job *); 163 static const char *really_kill(Job *, int); 164 static void print_error(Job *); 165 static void internal_print_errors(void); 166 167 static int dying_signal = 0; 168 169 const char * basedirectory = NULL; 170 171 static void 172 kill_with_doas_maybe(pid_t pid, int signo, const char *p) 173 { 174 char buf[32]; /* largely enough */ 175 int sudo; 176 177 for (;*p != '\0'; p++) { 178 if (*p == 's') 179 sudo = 1; 180 else if (*p == 'd') 181 sudo = 0; 182 else 183 continue; 184 if (sudo && p[1] != 'u' || !sudo && p[1] != 'o') 185 continue; 186 p++; 187 if (sudo && p[1] != 'd' || !sudo && p[1] != 'a') 188 continue; 189 p++; 190 if (sudo && p[1] != 'o' || !sudo && p[1] != 's') 191 continue; 192 snprintf(buf, sizeof buf, "%s -n /bin/kill -%d %ld", 193 sudo ? "sudo" : "doas", 194 signo, (long)pid); 195 debug_kill_printf("trying to kill with %s", buf); 196 system(buf); 197 return; 198 } 199 200 } 201 202 static const char * 203 really_kill(Job *job, int signo) 204 { 205 pid_t pid = job->pid; 206 if (getpgid(pid) != getpgrp()) { 207 if (killpg(pid, signo) == 0) 208 return "group got signal"; 209 pid = -pid; 210 } else { 211 if (kill(pid, signo) == 0) 212 return "process got signal"; 213 } 214 if (errno == ESRCH) { 215 job->flags |= JOB_LOST; 216 return "not found"; 217 } else if (errno == EPERM) { 218 kill_with_doas_maybe(pid, signo, job->cmd); 219 return ""; 220 } else 221 return "should not happen"; 222 } 223 224 static void 225 may_remove_target(Job *j) 226 { 227 int dying = check_dying_signal(); 228 229 if (dying && !noExecute && !Targ_Precious(j->node)) { 230 const char *file = Var(TARGET_INDEX, j->node); 231 int r = eunlink(file); 232 233 if (DEBUG(JOB) && r == -1) 234 fprintf(stderr, " *** would unlink %s\n", file); 235 if (r != -1) 236 fprintf(stderr, " *** %s removed\n", file); 237 } 238 } 239 240 static void 241 buf_addcurdir(BUFFER *buf) 242 { 243 const char *v = Var_Value(".CURDIR"); 244 if (basedirectory != NULL) { 245 size_t len = strlen(basedirectory); 246 if (strncmp(basedirectory, v, len) == 0 && 247 v[len] == '/') { 248 v += len+1; 249 } else if (strcmp(basedirectory, v) == 0) { 250 Buf_AddString(buf, "."); 251 return; 252 } 253 } 254 Buf_AddString(buf, v); 255 } 256 257 static const char * 258 shortened_curdir(void) 259 { 260 static BUFFER buf; 261 bool first = true; 262 if (first) { 263 Buf_Init(&buf, 0); 264 buf_addcurdir(&buf); 265 first = false; 266 } 267 return Buf_Retrieve(&buf); 268 } 269 270 static void 271 quick_error(Job *j, int signo, bool first) 272 { 273 if (first) { 274 fprintf(stderr, "*** Signal SIG%s", sys_signame[signo]); 275 fprintf(stderr, " in %s (", shortened_curdir()); 276 } else 277 fprintf(stderr, " "); 278 279 fprintf(stderr, "%s", j->node->name); 280 free(j->cmd); 281 } 282 283 static void 284 print_error(Job *j) 285 { 286 static bool first = true; 287 BUFFER buf; 288 289 Buf_Init(&buf, 0); 290 291 if (j->exit_type == JOB_EXIT_BAD) 292 Buf_printf(&buf, "*** Error %d", j->code); 293 else if (j->exit_type == JOB_SIGNALED) { 294 if (j->code < NSIG) 295 Buf_printf(&buf, "*** Signal SIG%s", 296 sys_signame[j->code]); 297 else 298 Buf_printf(&buf, "*** unknown signal %d", j->code); 299 } else 300 Buf_printf(&buf, "*** Should not happen %d/%d", 301 j->exit_type, j->code); 302 if (DEBUG(KILL) && (j->flags & JOB_LOST)) 303 Buf_AddChar(&buf, '!'); 304 if (first) { 305 Buf_AddString(&buf, " in "); 306 buf_addcurdir(&buf); 307 first = false; 308 } 309 Buf_printf(&buf, " (%s:%lu", j->location->fname, j->location->lineno); 310 Buf_printf(&buf, " '%s'", j->node->name); 311 if ((j->flags & (JOB_SILENT | JOB_IS_EXPENSIVE)) == JOB_SILENT 312 && Buf_Size(&buf) < 140-2) { 313 size_t len = strlen(j->cmd); 314 Buf_AddString(&buf, ": "); 315 if (len + Buf_Size(&buf) < 140) 316 Buf_AddString(&buf, j->cmd); 317 else { 318 Buf_AddChars(&buf, 140 - Buf_Size(&buf), j->cmd); 319 Buf_AddString(&buf, "..."); 320 } 321 } 322 fprintf(stderr, "%s)\n", Buf_Retrieve(&buf)); 323 Buf_Destroy(&buf); 324 free(j->cmd); 325 } 326 static void 327 quick_summary(int signo) 328 { 329 Job *j, *k, *jnext; 330 bool first = true; 331 332 k = errorJobs; 333 errorJobs = NULL; 334 for (j = k; j != NULL; j = jnext) { 335 jnext = j->next; 336 if ((j->exit_type == JOB_EXIT_BAD && j->code == signo+128) || 337 (j->exit_type == JOB_SIGNALED && j->code == signo)) { 338 quick_error(j, signo, first); 339 first = false; 340 } else { 341 j->next = errorJobs; 342 errorJobs = j; 343 } 344 } 345 if (!first) 346 fprintf(stderr, ")\n"); 347 } 348 349 static void 350 internal_print_errors() 351 { 352 Job *j, *k, *jnext; 353 int dying; 354 355 if (!errorJobs) 356 fprintf(stderr, "Stop in %s\n", shortened_curdir()); 357 358 for (j = errorJobs; j != NULL; j = j->next) 359 may_remove_target(j); 360 dying = check_dying_signal(); 361 if (dying) 362 quick_summary(dying); 363 while (errorJobs != NULL) { 364 k = errorJobs; 365 errorJobs = NULL; 366 for (j = k; j != NULL; j = jnext) { 367 jnext = j->next; 368 if (j->location->fname == k->location->fname) 369 print_error(j); 370 else { 371 j->next = errorJobs; 372 errorJobs = j; 373 } 374 } 375 } 376 } 377 378 void 379 print_errors(void) 380 { 381 handle_all_signals(); 382 internal_print_errors(); 383 } 384 385 static void 386 setup_signal(int sig) 387 { 388 if (signal(sig, SIG_IGN) != SIG_IGN) { 389 (void)signal(sig, notice_signal); 390 sigaddset(&sigset, sig); 391 } 392 } 393 394 static void 395 notice_signal(int sig) 396 { 397 398 switch(sig) { 399 case SIGINT: 400 got_SIGINT++; 401 got_fatal = 1; 402 break; 403 case SIGHUP: 404 got_SIGHUP++; 405 got_fatal = 1; 406 break; 407 case SIGQUIT: 408 got_SIGQUIT++; 409 got_fatal = 1; 410 break; 411 case SIGTERM: 412 got_SIGTERM++; 413 got_fatal = 1; 414 break; 415 case SIGINFO: 416 got_SIGINFO++; 417 break; 418 case SIGCHLD: 419 break; 420 } 421 } 422 423 static void 424 setup_all_signals(void) 425 { 426 sigemptyset(&sigset); 427 sigemptyset(&emptyset); 428 /* 429 * Catch the four signals that POSIX specifies if they aren't ignored. 430 * handle_signal will take care of calling JobInterrupt if appropriate. 431 */ 432 setup_signal(SIGINT); 433 setup_signal(SIGHUP); 434 setup_signal(SIGQUIT); 435 setup_signal(SIGTERM); 436 /* Display running jobs on SIGINFO */ 437 setup_signal(SIGINFO); 438 /* Have to see SIGCHLD */ 439 setup_signal(SIGCHLD); 440 got_fatal = 0; 441 } 442 443 static void 444 handle_siginfo(void) 445 { 446 static BUFFER buf; 447 static size_t length = 0; 448 449 Job *job; 450 bool first = true; 451 452 got_SIGINFO = 0; 453 /* we have to store the info in a buffer, because status from all 454 * makes running would get intermixed otherwise 455 */ 456 457 if (length == 0) { 458 Buf_Init(&buf, 0); 459 Buf_printf(&buf, "%s in ", Var_Value("MAKE")); 460 buf_addcurdir(&buf); 461 Buf_AddString(&buf, ": "); 462 length = Buf_Size(&buf); 463 } else 464 Buf_Truncate(&buf, length); 465 466 for (job = runningJobs; job != NULL ; job = job->next) { 467 if (!first) 468 Buf_puts(&buf, ", "); 469 first = false; 470 Buf_puts(&buf, job->node->name); 471 } 472 Buf_puts(&buf, first ? "nothing running\n" : "\n"); 473 474 fputs(Buf_Retrieve(&buf), stderr); 475 } 476 477 int 478 check_dying_signal(void) 479 { 480 sigset_t set; 481 if (dying_signal) 482 return dying_signal; 483 sigpending(&set); 484 if (got_SIGINT || sigismember(&set, SIGINT)) 485 return dying_signal = SIGINT; 486 if (got_SIGHUP || sigismember(&set, SIGHUP)) 487 return dying_signal = SIGHUP; 488 if (got_SIGQUIT || sigismember(&set, SIGQUIT)) 489 return dying_signal = SIGQUIT; 490 if (got_SIGTERM || sigismember(&set, SIGTERM)) 491 return dying_signal = SIGTERM; 492 return 0; 493 } 494 495 void 496 handle_all_signals(void) 497 { 498 if (got_SIGINFO) 499 handle_siginfo(); 500 while (got_fatal) { 501 got_fatal = 0; 502 aborting = ABORT_INTERRUPT; 503 504 if (got_SIGINT) { 505 got_SIGINT=0; 506 handle_fatal_signal(SIGINT); 507 } 508 if (got_SIGHUP) { 509 got_SIGHUP=0; 510 handle_fatal_signal(SIGHUP); 511 } 512 if (got_SIGQUIT) { 513 got_SIGQUIT=0; 514 handle_fatal_signal(SIGQUIT); 515 } 516 if (got_SIGTERM) { 517 got_SIGTERM=0; 518 handle_fatal_signal(SIGTERM); 519 } 520 } 521 } 522 523 static void 524 debug_vprintf(const char *fmt, va_list va) 525 { 526 (void)printf("[%ld] ", (long)mypid); 527 (void)vprintf(fmt, va); 528 fflush(stdout); 529 } 530 531 void 532 debug_job_printf(const char *fmt, ...) 533 { 534 if (DEBUG(JOB)) { 535 va_list va; 536 va_start(va, fmt); 537 debug_vprintf(fmt, va); 538 va_end(va); 539 } 540 } 541 542 static void 543 debug_kill_printf(const char *fmt, ...) 544 { 545 if (DEBUG(KILL)) { 546 va_list va; 547 va_start(va, fmt); 548 debug_vprintf(fmt, va); 549 va_end(va); 550 } 551 } 552 553 /*- 554 *----------------------------------------------------------------------- 555 * postprocess_job -- 556 * Do final processing for the given job including updating 557 * parents and starting new jobs as available/necessary. 558 * 559 * Side Effects: 560 * If we got an error and are aborting (aborting == ABORT_ERROR) and 561 * the job list is now empty, we are done for the day. 562 * If we recognized an error we set the aborting flag 563 * to ABORT_ERROR so no more jobs will be started. 564 *----------------------------------------------------------------------- 565 */ 566 /*ARGSUSED*/ 567 568 static void 569 postprocess_job(Job *job, bool okay) 570 { 571 if (okay && 572 aborting != ABORT_ERROR && 573 aborting != ABORT_INTERRUPT) { 574 /* As long as we aren't aborting and the job didn't return a 575 * non-zero status that we shouldn't ignore, we call 576 * Make_Update to update the parents. */ 577 job->node->built_status = MADE; 578 Make_Update(job->node); 579 free(job); 580 } 581 582 if (errorJobs != NULL && !keepgoing && 583 aborting != ABORT_INTERRUPT) 584 aborting = ABORT_ERROR; 585 586 if (aborting == ABORT_ERROR && DEBUG(QUICKDEATH)) 587 handle_fatal_signal(SIGINT); 588 if (aborting == ABORT_ERROR && Job_Empty()) 589 Finish(); 590 } 591 592 /* expensive jobs handling: in order to avoid forking an exponential number 593 * of jobs, make tries to figure out "recursive make" configurations. 594 * It may err on the side of caution. 595 * Basically, a command is "expensive" if it's likely to fork an extra 596 * level of make: either by looking at the command proper, or if it has 597 * some specific qualities ('+cmd' are likely to be recursive, as are 598 * .MAKE: commands). It's possible to explicitly say some targets are 599 * expensive or cheap with .EXPENSIVE or .CHEAP. 600 * 601 * While an expensive command is running, no_new_jobs 602 * is set, so jobs that would fork new processes are accumulated in the 603 * heldJobs list instead. 604 * 605 * This heuristics is also used on error exit: we display silent commands 606 * that failed, unless those ARE expensive commands: expensive commands 607 * are likely to not be failing by themselves, but to be the result of 608 * a cascade of failures in descendant makes. 609 */ 610 void 611 determine_expensive_job(Job *job) 612 { 613 if (expensive_job(job)) { 614 job->flags |= JOB_IS_EXPENSIVE; 615 no_new_jobs = true; 616 } else 617 job->flags &= ~JOB_IS_EXPENSIVE; 618 if (DEBUG(EXPENSIVE)) 619 fprintf(stderr, "[%ld] Target %s running %.50s: %s\n", 620 (long)mypid, job->node->name, job->cmd, 621 job->flags & JOB_IS_EXPENSIVE ? "expensive" : "cheap"); 622 } 623 624 static bool 625 expensive_job(Job *job) 626 { 627 if (job->node->type & OP_CHEAP) 628 return false; 629 if (job->node->type & (OP_EXPENSIVE | OP_MAKE)) 630 return true; 631 return expensive_command(job->cmd); 632 } 633 634 static bool 635 expensive_command(const char *s) 636 { 637 const char *p; 638 bool include = false; 639 bool expensive = false; 640 641 /* okay, comments are cheap, always */ 642 if (*s == '#') 643 return false; 644 /* and commands we always execute are expensive */ 645 if (*s == '+') 646 return true; 647 648 for (p = s; *p != '\0'; p++) { 649 if (*p == ' ' || *p == '\t') { 650 include = false; 651 if (p[1] == '-' && p[2] == 'I') 652 include = true; 653 } 654 if (include) 655 continue; 656 /* KMP variant, avoid looking twice at the same 657 * letter. 658 */ 659 if (*p != 'm') 660 continue; 661 if (p[1] != 'a') 662 continue; 663 p++; 664 if (p[1] != 'k') 665 continue; 666 p++; 667 if (p[1] != 'e') 668 continue; 669 p++; 670 expensive = true; 671 while (p[1] != '\0' && p[1] != ' ' && p[1] != '\t') { 672 if (p[1] == '.' || p[1] == '/') { 673 expensive = false; 674 break; 675 } 676 p++; 677 } 678 if (expensive) 679 return true; 680 } 681 return false; 682 } 683 684 static Job * 685 prepare_job(GNode *gn) 686 { 687 /* a new job is prepared unless its commands are bogus (we don't 688 * have anything for it), or if we're in touch mode. 689 * 690 * Note that even in noexec mode, some commands may still run 691 * thanks to the +cmd construct. 692 */ 693 if (node_find_valid_commands(gn)) { 694 if (touchFlag) { 695 Job_Touch(gn); 696 return NULL; 697 } else { 698 Job *job; 699 700 job = emalloc(sizeof(Job)); 701 if (job == NULL) 702 Punt("can't create job: out of memory"); 703 704 job_attach_node(job, gn); 705 return job; 706 } 707 } else { 708 node_failure(gn); 709 return NULL; 710 } 711 } 712 713 static void 714 may_continue_job(Job *job) 715 { 716 if (no_new_jobs) { 717 if (DEBUG(EXPENSIVE)) 718 fprintf(stderr, "[%ld] expensive -> hold %s\n", 719 (long)mypid, job->node->name); 720 job->next = heldJobs; 721 heldJobs = job; 722 } else 723 continue_job(job); 724 } 725 726 static void 727 continue_job(Job *job) 728 { 729 bool finished = job_run_next(job); 730 if (finished) 731 remove_job(job, true); 732 else 733 determine_expensive_job(job); 734 } 735 736 /*- 737 *----------------------------------------------------------------------- 738 * Job_Make -- 739 * Start a target-creation process going for the target described 740 * by the graph node gn. 741 * 742 * Side Effects: 743 * A new Job node is created and its commands continued, which 744 * may fork the first command of that job. 745 *----------------------------------------------------------------------- 746 */ 747 void 748 Job_Make(GNode *gn) 749 { 750 Job *job; 751 752 job = prepare_job(gn); 753 if (!job) 754 return; 755 nJobs++; 756 may_continue_job(job); 757 } 758 759 static void 760 determine_job_next_step(Job *job) 761 { 762 bool okay; 763 if (job->flags & JOB_IS_EXPENSIVE) { 764 no_new_jobs = false; 765 if (DEBUG(EXPENSIVE)) 766 fprintf(stderr, "[%ld] " 767 "Returning from expensive target %s, " 768 "allowing new jobs\n", (long)mypid, 769 job->node->name); 770 } 771 772 okay = job->exit_type == JOB_EXIT_OKAY; 773 if (!okay || job->next_cmd == NULL) 774 remove_job(job, okay); 775 else 776 may_continue_job(job); 777 } 778 779 static void 780 remove_job(Job *job, bool okay) 781 { 782 nJobs--; 783 postprocess_job(job, okay); 784 while (!no_new_jobs) { 785 if (heldJobs != NULL) { 786 job = heldJobs; 787 heldJobs = heldJobs->next; 788 if (DEBUG(EXPENSIVE)) 789 fprintf(stderr, "[%ld] cheap -> release %s\n", 790 (long)mypid, job->node->name); 791 continue_job(job); 792 } else 793 break; 794 } 795 } 796 797 /* 798 * job = reap_finished_job(pid): 799 * retrieve and remove a job from runningJobs, based on its pid 800 * 801 * Note that we remove it right away, so that handle_signals() 802 * is accurate. 803 */ 804 static Job * 805 reap_finished_job(pid_t pid) 806 { 807 Job **j, *job; 808 809 for (j = &runningJobs; *j != NULL; j = &((*j)->next)) 810 if ((*j)->pid == pid) { 811 job = *j; 812 *j = job->next; 813 return job; 814 } 815 816 return NULL; 817 } 818 819 /* 820 * classic waitpid handler: retrieve as many dead children as possible. 821 * returns true if succesful 822 */ 823 static bool 824 reap_jobs(void) 825 { 826 pid_t pid; /* pid of dead child */ 827 int status; /* Exit/termination status */ 828 bool reaped = false; 829 Job *job; 830 831 while ((pid = waitpid(WAIT_ANY, &status, WNOHANG)) > 0) { 832 reaped = true; 833 job = reap_finished_job(pid); 834 835 if (job == NULL) { 836 Punt("Child (%ld) not in table?", (long)pid); 837 } else { 838 job_handle_status(job, status); 839 determine_job_next_step(job); 840 } 841 } 842 /* sanity check, should not happen */ 843 if (pid == -1 && errno == ECHILD && runningJobs != NULL) 844 Punt("Process has no children, but runningJobs is not empty ?"); 845 return reaped; 846 } 847 848 void 849 handle_running_jobs(void) 850 { 851 sigset_t old; 852 /* reaping children in the presence of caught signals */ 853 854 /* first, we make sure to hold on new signals, to synchronize 855 * reception of new stuff on sigsuspend 856 */ 857 sigprocmask(SIG_BLOCK, &sigset, &old); 858 /* note this will NOT loop until runningJobs == NULL. 859 * It's merely an optimisation, namely that we don't need to go 860 * through the logic if no job is present. As soon as a job 861 * gets reaped, we WILL exit the loop through the break. 862 */ 863 while (runningJobs != NULL) { 864 /* did we already have pending stuff that advances things ? 865 * then handle_all_signals() will not return 866 * or reap_jobs() will reap_jobs() 867 */ 868 handle_all_signals(); 869 if (reap_jobs()) 870 break; 871 /* okay, so it's safe to suspend, we have nothing to do but 872 * wait... 873 */ 874 sigsuspend(&emptyset); 875 } 876 sigprocmask(SIG_SETMASK, &old, NULL); 877 } 878 879 void 880 handle_one_job(Job *job) 881 { 882 int stat; 883 int status; 884 sigset_t old; 885 886 sigprocmask(SIG_BLOCK, &sigset, &old); 887 while (1) { 888 handle_all_signals(); 889 stat = waitpid(job->pid, &status, WNOHANG); 890 if (stat == job->pid) 891 break; 892 sigsuspend(&emptyset); 893 } 894 runningJobs = NULL; 895 job_handle_status(job, status); 896 sigprocmask(SIG_SETMASK, &old, NULL); 897 } 898 899 static void 900 loop_handle_running_jobs() 901 { 902 while (runningJobs != NULL) 903 handle_running_jobs(); 904 } 905 906 void 907 Job_Init(int maxproc) 908 { 909 runningJobs = NULL; 910 heldJobs = NULL; 911 errorJobs = NULL; 912 maxJobs = maxproc; 913 mypid = getpid(); 914 915 nJobs = 0; 916 917 aborting = 0; 918 setup_all_signals(); 919 } 920 921 bool 922 can_start_job(void) 923 { 924 if (aborting || nJobs >= maxJobs) 925 return false; 926 else 927 return true; 928 } 929 930 bool 931 Job_Empty(void) 932 { 933 return runningJobs == NULL; 934 } 935 936 /*- 937 *----------------------------------------------------------------------- 938 * handle_fatal_signal -- 939 * Handle the receipt of a fatal interrupt 940 * 941 * Side Effects: 942 * All children are killed. Another job may be started if there 943 * is an interrupt target and the signal was SIGINT. 944 *----------------------------------------------------------------------- 945 */ 946 static void 947 handle_fatal_signal(int signo) 948 { 949 Job *job; 950 951 debug_kill_printf("handle_fatal_signal(%d) called.\n", signo); 952 953 dying_signal = signo; 954 for (job = runningJobs; job != NULL; job = job->next) { 955 debug_kill_printf("passing to " 956 "child %ld running %s: %s\n", (long)job->pid, 957 job->node->name, really_kill(job, signo)); 958 may_remove_target(job); 959 } 960 961 if (signo == SIGINT && !touchFlag) { 962 if ((interrupt_node->type & OP_DUMMY) == 0) { 963 ignoreErrors = false; 964 965 Job_Make(interrupt_node); 966 } 967 } 968 loop_handle_running_jobs(); 969 internal_print_errors(); 970 971 /* die by that signal */ 972 sigprocmask(SIG_BLOCK, &sigset, NULL); 973 signal(signo, SIG_DFL); 974 kill(getpid(), signo); 975 sigprocmask(SIG_SETMASK, &emptyset, NULL); 976 /*NOTREACHED*/ 977 fprintf(stderr, "This should never happen\n"); 978 exit(1); 979 } 980 981 /* 982 *----------------------------------------------------------------------- 983 * Job_Finish -- 984 * Do final processing such as the running of the commands 985 * attached to the .END target. 986 * 987 * return true if fatal errors have happened. 988 *----------------------------------------------------------------------- 989 */ 990 bool 991 Job_Finish(void) 992 { 993 bool problem = errorJobs != NULL; 994 995 if ((end_node->type & OP_DUMMY) == 0) { 996 if (problem) { 997 Error("Errors reported so .END ignored"); 998 } else { 999 Job_Make(end_node); 1000 loop_handle_running_jobs(); 1001 } 1002 } 1003 return problem; 1004 } 1005 1006 void 1007 Job_Begin(void) 1008 { 1009 if ((begin_node->type & OP_DUMMY) == 0) { 1010 Job_Make(begin_node); 1011 loop_handle_running_jobs(); 1012 } 1013 } 1014 1015 /*- 1016 *----------------------------------------------------------------------- 1017 * Job_Wait -- 1018 * Waits for all running jobs to finish and returns. Sets 'aborting' 1019 * to ABORT_WAIT to prevent other jobs from starting. 1020 * 1021 * Side Effects: 1022 * Currently running jobs finish. 1023 * 1024 *----------------------------------------------------------------------- 1025 */ 1026 void 1027 Job_Wait(void) 1028 { 1029 aborting = ABORT_WAIT; 1030 loop_handle_running_jobs(); 1031 aborting = 0; 1032 } 1033 1034 /*- 1035 *----------------------------------------------------------------------- 1036 * Job_AbortAll -- 1037 * Abort all currently running jobs without handling output or anything. 1038 * This function is to be called only in the event of a major 1039 * error. 1040 * 1041 * Side Effects: 1042 * All children are killed 1043 *----------------------------------------------------------------------- 1044 */ 1045 void 1046 Job_AbortAll(void) 1047 { 1048 Job *job; /* the job descriptor in that element */ 1049 int foo; 1050 1051 aborting = ABORT_ERROR; 1052 1053 for (job = runningJobs; job != NULL; job = job->next) { 1054 killpg(job->pid, SIGINT); 1055 killpg(job->pid, SIGKILL); 1056 } 1057 1058 /* 1059 * Catch as many children as want to report in at first, then give up 1060 */ 1061 while (waitpid(WAIT_ANY, &foo, WNOHANG) > 0) 1062 continue; 1063 } 1064