xref: /openbsd/usr.bin/make/job.c (revision d415bd75)
1 /*	$OpenBSD: job.c,v 1.165 2023/09/04 11:35:11 espie Exp $	*/
2 /*	$NetBSD: job.c,v 1.16 1996/11/06 17:59:08 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 2012 Marc Espie.
6  *
7  * Extensive code modifications for the OpenBSD project.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OPENBSD
22  * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 /*
31  * Copyright (c) 1988, 1989, 1990 The Regents of the University of California.
32  * Copyright (c) 1988, 1989 by Adam de Boor
33  * Copyright (c) 1989 by Berkeley Softworks
34  * All rights reserved.
35  *
36  * This code is derived from software contributed to Berkeley by
37  * Adam de Boor.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  */
63 
64 /*-
65  * job.c --
66  *	handle the creation etc. of our child processes.
67  *
68  * Interface:
69  *	Job_Make		Start the creation of the given target.
70  *
71  *	Job_Init		Called to initialize this module.
72  *
73  *	can_start_job		Return true if we can start job
74  *
75  *	Job_Empty		Return true if the job table is completely
76  *				empty.
77  *
78  *	Job_AbortAll		Abort all current jobs. It doesn't
79  *				handle output or do anything for the jobs,
80  *				just kills them.
81  *
82  *	Job_Wait		Wait for all running jobs to finish.
83  */
84 
85 #include <sys/types.h>
86 #include <sys/wait.h>
87 #include <ctype.h>
88 #include <errno.h>
89 #include <fcntl.h>
90 #include <signal.h>
91 #include <stdarg.h>
92 #include <stdio.h>
93 #include <stdlib.h>
94 #include <string.h>
95 #include <unistd.h>
96 #include "defines.h"
97 #include "job.h"
98 #include "engine.h"
99 #include "pathnames.h"
100 #include "var.h"
101 #include "targ.h"
102 #include "error.h"
103 #include "extern.h"
104 #include "lst.h"
105 #include "gnode.h"
106 #include "memory.h"
107 #include "buf.h"
108 #include "enginechoice.h"
109 
110 static int	aborting = 0;	    /* why is the make aborting? */
111 #define ABORT_ERROR	1	    /* Because of an error */
112 #define ABORT_INTERRUPT 2	    /* Because it was interrupted */
113 #define ABORT_WAIT	3	    /* Waiting for jobs to finish */
114 
115 static bool	no_new_jobs;	/* Mark recursive shit so we shouldn't start
116 				 * something else at the same time
117 				 */
118 bool sequential;
119 Job *runningJobs;		/* Jobs currently running a process */
120 Job *errorJobs;			/* Jobs in error at end */
121 Job *availableJobs;		/* Pool of available jobs */
122 static Job *heldJobs;		/* Jobs not running yet because of expensive */
123 static pid_t mypid;		/* Used for printing debugging messages */
124 static Job *extra_job;		/* Needed for .INTERRUPT */
125 
126 static volatile sig_atomic_t got_fatal;
127 
128 static volatile sig_atomic_t got_SIGINT, got_SIGHUP, got_SIGQUIT, got_SIGTERM,
129     got_SIGINFO;
130 
131 static sigset_t sigset, emptyset, origset;
132 
133 static void handle_fatal_signal(int);
134 static void handle_siginfo(void);
135 static void postprocess_job(Job *);
136 static void determine_job_next_step(Job *);
137 static void may_continue_job(Job *);
138 static Job *reap_finished_job(pid_t);
139 static bool reap_jobs(void);
140 static void may_continue_heldback_jobs(void);
141 
142 static bool expensive_job(Job *);
143 static bool expensive_command(const char *);
144 static void setup_signal(int);
145 static void notice_signal(int);
146 static void setup_all_signals(void);
147 static const char *really_kill(Job *, int);
148 static void debug_kill_printf(const char *, ...);
149 static void debug_vprintf(const char *, va_list);
150 static void may_remove_target(Job *);
151 static void print_error(Job *);
152 static void internal_print_errors(void);
153 
154 static int dying_signal = 0;
155 
156 const char *	basedirectory = NULL;
157 
158 static const char *
159 really_kill(Job *job, int signo)
160 {
161 	pid_t pid = job->pid;
162 	if (getpgid(pid) != getpgrp()) {
163 		if (killpg(pid, signo) == 0)
164 			return "group got signal";
165 	} else {
166 		if (kill(pid, signo) == 0)
167 			return "process got signal";
168 	}
169 	if (errno == ESRCH)
170 		job->flags |= JOB_LOST;
171 	return strerror(errno);
172 }
173 
174 static void
175 may_remove_target(Job *j)
176 {
177 	int dying = check_dying_signal();
178 
179 	if (dying && !noExecute && !Targ_Precious(j->node)) {
180 		const char *file = Var(TARGET_INDEX, j->node);
181 		int r = eunlink(file);
182 
183 		if (DEBUG(JOB) && r == -1)
184 			fprintf(stderr, " *** would unlink %s\n", file);
185 		if (r != -1)
186 			fprintf(stderr, " *** %s removed\n", file);
187 	}
188 }
189 
190 static void
191 buf_addcurdir(BUFFER *buf)
192 {
193 	const char *v = Var_Value(".CURDIR");
194 	if (basedirectory != NULL) {
195 		size_t len = strlen(basedirectory);
196 		if (strncmp(basedirectory, v, len) == 0 &&
197 		    v[len] == '/') {
198 			v += len+1;
199 		} else if (strcmp(basedirectory, v) == 0) {
200 			Buf_AddString(buf, ".");
201 			return;
202 		}
203 	}
204 	Buf_AddString(buf, v);
205 }
206 
207 static const char *
208 shortened_curdir(void)
209 {
210 	static BUFFER buf;
211 	static bool first = true;
212 	if (first) {
213 		Buf_Init(&buf, 0);
214 		buf_addcurdir(&buf);
215 		first = false;
216 	}
217 	return Buf_Retrieve(&buf);
218 }
219 
220 static void
221 quick_error(Job *j, int signo, bool first)
222 {
223 	if (first) {
224 		fprintf(stderr, "*** Signal SIG%s", sys_signame[signo]);
225 		fprintf(stderr, " in %s (", shortened_curdir());
226 	} else
227 		fprintf(stderr, " ");
228 
229 	fprintf(stderr, "%s", j->node->name);
230 	free(j->cmd);
231 }
232 
233 static void
234 print_error(Job *j)
235 {
236 	static bool first = true;
237 	BUFFER buf;
238 
239 	Buf_Init(&buf, 0);
240 
241 	if (j->exit_type == JOB_EXIT_BAD)
242 		Buf_printf(&buf, "*** Error %d", j->code);
243 	else if (j->exit_type == JOB_SIGNALED) {
244 		if (j->code < NSIG)
245 			Buf_printf(&buf, "*** Signal SIG%s",
246 			    sys_signame[j->code]);
247 		else
248 			Buf_printf(&buf, "*** unknown signal %d", j->code);
249 	} else
250 		Buf_printf(&buf, "*** Should not happen %d/%d",
251 		    j->exit_type, j->code);
252 	if (DEBUG(KILL) && (j->flags & JOB_LOST))
253 		Buf_AddChar(&buf, '!');
254 	if (first) {
255 		Buf_AddString(&buf, " in ");
256 		buf_addcurdir(&buf);
257 		first = false;
258 	}
259 	Buf_printf(&buf, " (%s:%lu", j->location->fname, j->location->lineno);
260 	Buf_printf(&buf, " '%s'", j->node->name);
261 	if ((j->flags & (JOB_SILENT | JOB_IS_EXPENSIVE)) == JOB_SILENT
262 	    && Buf_Size(&buf) < 140-2) {
263 		size_t len = strlen(j->cmd);
264 		Buf_AddString(&buf, ": ");
265 		if (len + Buf_Size(&buf) < 140)
266 			Buf_AddString(&buf, j->cmd);
267 		else {
268 			Buf_AddChars(&buf, 140 - Buf_Size(&buf), j->cmd);
269 			Buf_AddString(&buf, "...");
270 		}
271 	}
272 	fprintf(stderr, "%s)\n", Buf_Retrieve(&buf));
273 	Buf_Destroy(&buf);
274 	free(j->cmd);
275 }
276 static void
277 quick_summary(int signo)
278 {
279 	Job *j, *k, *jnext;
280 	bool first = true;
281 
282 	k = errorJobs;
283 	errorJobs = NULL;
284 	for (j = k; j != NULL; j = jnext) {
285 		jnext = j->next;
286 		if ((j->exit_type == JOB_EXIT_BAD && j->code == signo+128) ||
287 		    (j->exit_type == JOB_SIGNALED && j->code == signo)) {
288 			quick_error(j, signo, first);
289 			first = false;
290 		} else {
291 			j->next = errorJobs;
292 			errorJobs = j;
293 		}
294 	}
295 	if (!first)
296 		fprintf(stderr, ")\n");
297 }
298 
299 static void
300 internal_print_errors()
301 {
302 	Job *j, *k, *jnext;
303 	int dying;
304 
305 	if (!errorJobs)
306 		fprintf(stderr, "Stop in %s\n", shortened_curdir());
307 
308 	for (j = errorJobs; j != NULL; j = j->next)
309 		may_remove_target(j);
310 	dying = check_dying_signal();
311 	if (dying)
312 		quick_summary(dying);
313 	/* Print errors grouped by file name. */
314 	while (errorJobs != NULL) {
315 		/* Select the first job. */
316 		k = errorJobs;
317 		errorJobs = NULL;
318 		for (j = k; j != NULL; j = jnext) {
319 			jnext = j->next;
320 			if (j->location->fname == k->location->fname)
321 				/* Print errors with the same filename. */
322 				print_error(j);
323 			else {
324 				/* Keep others for the next iteration. */
325 				j->next = errorJobs;
326 				errorJobs = j;
327 			}
328 		}
329 	}
330 }
331 
332 void
333 print_errors(void)
334 {
335 	handle_all_signals();
336 	internal_print_errors();
337 }
338 
339 static void
340 setup_signal(int sig)
341 {
342 	if (signal(sig, SIG_IGN) != SIG_IGN) {
343 		(void)signal(sig, notice_signal);
344 		sigaddset(&sigset, sig);
345 	}
346 }
347 
348 static void
349 notice_signal(int sig)
350 {
351 
352 	switch(sig) {
353 	case SIGINT:
354 		got_SIGINT++;
355 		got_fatal = 1;
356 		break;
357 	case SIGHUP:
358 		got_SIGHUP++;
359 		got_fatal = 1;
360 		break;
361 	case SIGQUIT:
362 		got_SIGQUIT++;
363 		got_fatal = 1;
364 		break;
365 	case SIGTERM:
366 		got_SIGTERM++;
367 		got_fatal = 1;
368 		break;
369 	case SIGINFO:
370 		got_SIGINFO++;
371 		break;
372 	case SIGCHLD:
373 		break;
374 	}
375 }
376 
377 void
378 Sigset_Init()
379 {
380 	sigemptyset(&emptyset);
381 	sigprocmask(SIG_BLOCK, &emptyset, &origset);
382 }
383 
384 static void
385 setup_all_signals(void)
386 {
387 	sigemptyset(&sigset);
388 	/*
389 	 * Catch the four signals that POSIX specifies if they aren't ignored.
390 	 * handle_signal will take care of calling JobInterrupt if appropriate.
391 	 */
392 	setup_signal(SIGINT);
393 	setup_signal(SIGHUP);
394 	setup_signal(SIGQUIT);
395 	setup_signal(SIGTERM);
396 	/* Display running jobs on SIGINFO */
397 	setup_signal(SIGINFO);
398 	/* Have to see SIGCHLD */
399 	setup_signal(SIGCHLD);
400 	got_fatal = 0;
401 }
402 
403 static void
404 handle_siginfo(void)
405 {
406 	static BUFFER buf;
407 	static size_t length = 0;
408 
409 	Job *job;
410 	bool first = true;
411 
412 	got_SIGINFO = 0;
413 	/* we have to store the info in a buffer, because status from all
414 	 * makes running would get intermixed otherwise
415 	 */
416 
417 	if (length == 0) {
418 		Buf_Init(&buf, 0);
419 		Buf_printf(&buf, "%s in ", Var_Value("MAKE"));
420 		buf_addcurdir(&buf);
421 		Buf_AddString(&buf, ": ");
422 		length = Buf_Size(&buf);
423 	} else
424 		Buf_Truncate(&buf, length);
425 
426 	for (job = runningJobs; job != NULL ; job = job->next) {
427 		if (!first)
428 			Buf_puts(&buf, ", ");
429 		first = false;
430 		Buf_puts(&buf, job->node->name);
431 	}
432 	Buf_puts(&buf, first ? "nothing running\n" : "\n");
433 
434 	fputs(Buf_Retrieve(&buf), stderr);
435 }
436 
437 int
438 check_dying_signal(void)
439 {
440 	sigset_t set;
441 	if (dying_signal)
442 		return dying_signal;
443 	sigpending(&set);
444 	if (got_SIGINT || sigismember(&set, SIGINT))
445 		return dying_signal = SIGINT;
446 	if (got_SIGHUP || sigismember(&set, SIGHUP))
447 		return dying_signal = SIGHUP;
448 	if (got_SIGQUIT || sigismember(&set, SIGQUIT))
449 		return dying_signal = SIGQUIT;
450 	if (got_SIGTERM || sigismember(&set, SIGTERM))
451 		return dying_signal = SIGTERM;
452 	return 0;
453 }
454 
455 void
456 handle_all_signals(void)
457 {
458 	if (got_SIGINFO)
459 		handle_siginfo();
460 	while (got_fatal) {
461 		got_fatal = 0;
462 		aborting = ABORT_INTERRUPT;
463 
464 		if (got_SIGINT) {
465 			got_SIGINT=0;
466 			handle_fatal_signal(SIGINT);
467 		}
468 		if (got_SIGHUP) {
469 			got_SIGHUP=0;
470 			handle_fatal_signal(SIGHUP);
471 		}
472 		if (got_SIGQUIT) {
473 			got_SIGQUIT=0;
474 			handle_fatal_signal(SIGQUIT);
475 		}
476 		if (got_SIGTERM) {
477 			got_SIGTERM=0;
478 			handle_fatal_signal(SIGTERM);
479 		}
480 	}
481 }
482 
483 static void
484 debug_vprintf(const char *fmt, va_list va)
485 {
486 	(void)printf("[%ld] ", (long)mypid);
487 	(void)vprintf(fmt, va);
488 	fflush(stdout);
489 }
490 
491 void
492 debug_job_printf(const char *fmt, ...)
493 {
494 	if (DEBUG(JOB)) {
495 		va_list va;
496 		va_start(va, fmt);
497 		debug_vprintf(fmt, va);
498 		va_end(va);
499 	}
500 }
501 
502 static void
503 debug_kill_printf(const char *fmt, ...)
504 {
505 	if (DEBUG(KILL)) {
506 		va_list va;
507 		va_start(va, fmt);
508 		debug_vprintf(fmt, va);
509 		va_end(va);
510 	}
511 }
512 
513 /*-
514  *-----------------------------------------------------------------------
515  * postprocess_job  --
516  *	Do final processing for the given job including updating
517  *	parents and starting new jobs as available/necessary.
518  *
519  * Side Effects:
520  *	If we got an error and are aborting (aborting == ABORT_ERROR) and
521  *	the job list is now empty, we are done for the day.
522  *	If we recognized an error we set the aborting flag
523  *	to ABORT_ERROR so no more jobs will be started.
524  *-----------------------------------------------------------------------
525  */
526 
527 static void
528 postprocess_job(Job *job)
529 {
530 	if (job->exit_type == JOB_EXIT_OKAY &&
531 	    aborting != ABORT_ERROR &&
532 	    aborting != ABORT_INTERRUPT) {
533 		/* As long as we aren't aborting and the job didn't return a
534 		 * non-zero status that we shouldn't ignore, we call
535 		 * Make_Update to update the parents. */
536 		job->node->built_status = REBUILT;
537 		engine_node_updated(job->node);
538 	}
539 	if (job->flags & JOB_KEEPERROR) {
540 		job->next = errorJobs;
541 		errorJobs = job;
542 	} else {
543 		job->next = availableJobs;
544 		availableJobs = job;
545 	}
546 
547 	if (errorJobs != NULL && aborting != ABORT_INTERRUPT)
548 		aborting = ABORT_ERROR;
549 
550 	if (aborting == ABORT_ERROR && DEBUG(QUICKDEATH))
551 		handle_fatal_signal(SIGINT);
552 	if (aborting == ABORT_ERROR && Job_Empty())
553 		Finish();
554 }
555 
556 /* expensive jobs handling: in order to avoid forking an exponential number
557  * of jobs, make tries to figure out "recursive make" configurations.
558  * It may err on the side of caution.
559  * Basically, a command is "expensive" if it's likely to fork an extra
560  * level of make: either by looking at the command proper, or if it has
561  * some specific qualities ('+cmd' are likely to be recursive, as are
562  * .MAKE: commands).  It's possible to explicitly say some targets are
563  * expensive or cheap with .EXPENSIVE or .CHEAP.
564  *
565  * While an expensive command is running, no_new_jobs
566  * is set, so jobs that would fork new processes are accumulated in the
567  * heldJobs list instead.
568  *
569  * XXX This heuristics is also used on error exit: we display silent commands
570  * that failed, unless those ARE expensive commands: expensive commands are
571  * likely to not be failing by themselves, but to be the result of a cascade of
572  * failures in descendant makes.
573  */
574 void
575 determine_expensive_job(Job *job)
576 {
577 	if (expensive_job(job)) {
578 		job->flags |= JOB_IS_EXPENSIVE;
579 		no_new_jobs = true;
580 	} else
581 		job->flags &= ~JOB_IS_EXPENSIVE;
582 	if (DEBUG(EXPENSIVE))
583 		fprintf(stderr, "[%ld] Target %s running %.50s: %s\n",
584 		    (long)mypid, job->node->name, job->cmd,
585 		    job->flags & JOB_IS_EXPENSIVE ? "expensive" : "cheap");
586 }
587 
588 static bool
589 expensive_job(Job *job)
590 {
591 	if (job->node->type & OP_CHEAP)
592 		return false;
593 	if (job->node->type & (OP_EXPENSIVE | OP_MAKE))
594 		return true;
595 	return expensive_command(job->cmd);
596 }
597 
598 static bool
599 expensive_command(const char *s)
600 {
601 	const char *p;
602 	bool include = false;
603 	bool expensive = false;
604 
605 	/* okay, comments are cheap, always */
606 	if (*s == '#')
607 		return false;
608 	/* and commands we always execute are expensive */
609 	if (*s == '+')
610 		return true;
611 
612 	for (p = s; *p != '\0'; p++) {
613 		if (*p == ' ' || *p == '\t') {
614 			include = false;
615 			if (p[1] == '-' && p[2] == 'I')
616 				include = true;
617 		}
618 		if (include)
619 			continue;
620 		/* KMP variant, avoid looking twice at the same
621 		 * letter.
622 		 */
623 		if (*p != 'm')
624 			continue;
625 		if (p[1] != 'a')
626 			continue;
627 		p++;
628 		if (p[1] != 'k')
629 			continue;
630 		p++;
631 		if (p[1] != 'e')
632 			continue;
633 		p++;
634 		expensive = true;
635 		while (p[1] != '\0' && p[1] != ' ' && p[1] != '\t') {
636 			if (p[1] == '.' || p[1] == '/') {
637 				expensive = false;
638 				break;
639 			}
640 		    	p++;
641 		}
642 		if (expensive)
643 			return true;
644 	}
645 	return false;
646 }
647 
648 static void
649 may_continue_job(Job *job)
650 {
651 	if (no_new_jobs) {
652 		if (DEBUG(EXPENSIVE))
653 			fprintf(stderr, "[%ld] expensive -> hold %s\n",
654 			    (long)mypid, job->node->name);
655 		job->next = heldJobs;
656 		heldJobs = job;
657 	} else {
658 		bool finished = job_run_next(job);
659 		if (finished)
660 			postprocess_job(job);
661 		else if (!sequential)
662 			determine_expensive_job(job);
663 	}
664 }
665 
666 static void
667 may_continue_heldback_jobs()
668 {
669 	while (!no_new_jobs) {
670 		if (heldJobs != NULL) {
671 			Job *job = heldJobs;
672 			heldJobs = heldJobs->next;
673 			if (DEBUG(EXPENSIVE))
674 				fprintf(stderr, "[%ld] cheap -> release %s\n",
675 				    (long)mypid, job->node->name);
676 			may_continue_job(job);
677 		} else
678 			break;
679 	}
680 }
681 
682 /*-
683  *-----------------------------------------------------------------------
684  * Job_Make  --
685  *	Start a target-creation process going for the target described
686  *	by the graph node gn.
687  *
688  * Side Effects:
689  *	A new Job node is created and  its commands continued, which
690  *	may fork the first command of that job.
691  *-----------------------------------------------------------------------
692  */
693 void
694 Job_Make(GNode *gn)
695 {
696 	Job *job = availableJobs;
697 
698 	assert(job != NULL);
699 	availableJobs = availableJobs->next;
700 	job_attach_node(job, gn);
701 	may_continue_job(job);
702 }
703 
704 static void
705 determine_job_next_step(Job *job)
706 {
707 	if (job->flags & JOB_IS_EXPENSIVE) {
708 		no_new_jobs = false;
709 		if (DEBUG(EXPENSIVE))
710 			fprintf(stderr, "[%ld] "
711 			    "Returning from expensive target %s, "
712 			    "allowing new jobs\n", (long)mypid,
713 			    job->node->name);
714 	}
715 
716 	if (job->exit_type != JOB_EXIT_OKAY || job->next_cmd == NULL)
717 		postprocess_job(job);
718 	else
719 		may_continue_job(job);
720 }
721 
722 /*
723  * job = reap_finished_job(pid):
724  * 	retrieve and remove a job from runningJobs, based on its pid
725  *
726  *	Note that we remove it right away, so that handle_signals()
727  *	is accurate.
728  */
729 static Job *
730 reap_finished_job(pid_t pid)
731 {
732 	Job **j, *job;
733 
734 	for (j = &runningJobs; *j != NULL; j = &((*j)->next))
735 		if ((*j)->pid == pid) {
736 			job = *j;
737 			*j = job->next;
738 			return job;
739 		}
740 
741 	return NULL;
742 }
743 
744 /*
745  * classic waitpid handler: retrieve as many dead children as possible.
746  * returns true if successful
747  */
748 static bool
749 reap_jobs(void)
750 {
751  	pid_t pid;	/* pid of dead child */
752  	int status;	/* Exit/termination status */
753 	bool reaped = false;
754 	Job *job;
755 
756 	while ((pid = waitpid(WAIT_ANY, &status, WNOHANG)) > 0) {
757 		if (WIFSTOPPED(status))
758 			continue;
759 		reaped = true;
760 		job = reap_finished_job(pid);
761 
762 		if (job == NULL) {
763 			Punt("Child (%ld) with status %d not in table?",
764 			    (long)pid, status);
765 		} else {
766 			handle_job_status(job, status);
767 			determine_job_next_step(job);
768 		}
769 		may_continue_heldback_jobs();
770 	}
771 	/* sanity check, should not happen */
772 	if (pid == -1 && errno == ECHILD && runningJobs != NULL)
773 		Punt("Process has no children, but runningJobs is not empty ?");
774 	return reaped;
775 }
776 
777 void
778 reset_signal_mask()
779 {
780 	sigprocmask(SIG_SETMASK, &origset, NULL);
781 }
782 
783 void
784 handle_running_jobs(void)
785 {
786 	/* reaping children in the presence of caught signals */
787 
788 	/* first, we make sure to hold on new signals, to synchronize
789 	 * reception of new stuff on sigsuspend
790 	 */
791 	sigprocmask(SIG_BLOCK, &sigset, NULL);
792 	/* note this will NOT loop until runningJobs == NULL.
793 	 * It's merely an optimisation, namely that we don't need to go
794 	 * through the logic if no job is present. As soon as a job
795 	 * gets reaped, we WILL exit the loop through the break.
796 	 */
797 	while (runningJobs != NULL) {
798 		/* did we already have pending stuff that advances things ?
799 		 * then handle_all_signals() will not return
800 		 * or reap_jobs() will reap_jobs()
801 		 */
802 		handle_all_signals();
803 		if (reap_jobs())
804 			break;
805 		/* okay, so it's safe to suspend, we have nothing to do but
806 		 * wait...
807 		 */
808 		sigsuspend(&emptyset);
809 	}
810 	reset_signal_mask();
811 }
812 
813 void
814 loop_handle_running_jobs()
815 {
816 	while (runningJobs != NULL)
817 		handle_running_jobs();
818 }
819 
820 void
821 Job_Init(int maxJobs)
822 {
823 	Job *j;
824 	int i;
825 
826 	runningJobs = NULL;
827 	heldJobs = NULL;
828 	errorJobs = NULL;
829 	availableJobs = NULL;
830 	sequential = maxJobs == 1;
831 
832 	/* we allocate n+1 jobs, since we may need an extra job for
833 	 * running .INTERRUPT.  */
834 	j = ereallocarray(NULL, sizeof(Job), maxJobs+1);
835 	for (i = 0; i != maxJobs; i++) {
836 		j[i].next = availableJobs;
837 		availableJobs = &j[i];
838 	}
839 	extra_job = &j[maxJobs];
840 	mypid = getpid();
841 
842 	aborting = 0;
843 	setup_all_signals();
844 }
845 
846 bool
847 can_start_job(void)
848 {
849 	if (aborting || availableJobs == NULL)
850 		return false;
851 	else
852 		return true;
853 }
854 
855 bool
856 Job_Empty(void)
857 {
858 	return runningJobs == NULL;
859 }
860 
861 /*-
862  *-----------------------------------------------------------------------
863  * handle_fatal_signal --
864  *	Handle the receipt of a fatal interrupt
865  *
866  * Side Effects:
867  *	All children are killed. Another job may be started if there
868  *	is an interrupt target and the signal was SIGINT.
869  *-----------------------------------------------------------------------
870  */
871 static void
872 handle_fatal_signal(int signo)
873 {
874 	Job *job;
875 
876 	debug_kill_printf("handle_fatal_signal(%d) called.\n", signo);
877 
878 	dying_signal = signo;
879 	for (job = runningJobs; job != NULL; job = job->next) {
880 		debug_kill_printf("passing to "
881 		    "child %ld running %s: %s\n", (long)job->pid,
882 		    job->node->name, really_kill(job, signo));
883 		may_remove_target(job);
884 	}
885 
886 	if (signo == SIGINT && !touchFlag) {
887 		if ((interrupt_node->type & OP_DUMMY) == 0) {
888 			ignoreErrors = false;
889 			extra_job->next = availableJobs;
890 			availableJobs = extra_job;
891 			Job_Make(interrupt_node);
892 		}
893 	}
894 	loop_handle_running_jobs();
895 	internal_print_errors();
896 
897 	/* die by that signal */
898 	sigprocmask(SIG_BLOCK, &sigset, NULL);
899 	signal(signo, SIG_DFL);
900 	kill(getpid(), signo);
901 	sigprocmask(SIG_SETMASK, &emptyset, NULL);
902 	/*NOTREACHED*/
903 	fprintf(stderr, "This should never happen\n");
904 	exit(1);
905 }
906 
907 /*-
908  *-----------------------------------------------------------------------
909  * Job_Wait --
910  *	Waits for all running jobs to finish and returns. Sets 'aborting'
911  *	to ABORT_WAIT to prevent other jobs from starting.
912  *
913  * Side Effects:
914  *	Currently running jobs finish.
915  *
916  *-----------------------------------------------------------------------
917  */
918 void
919 Job_Wait(void)
920 {
921 	aborting = ABORT_WAIT;
922 	loop_handle_running_jobs();
923 	aborting = 0;
924 }
925 
926 /*-
927  *-----------------------------------------------------------------------
928  * Job_AbortAll --
929  *	Abort all currently running jobs without handling output or anything.
930  *	This function is to be called only in the event of a major
931  *	error.
932  *
933  * Side Effects:
934  *	All children are killed
935  *-----------------------------------------------------------------------
936  */
937 void
938 Job_AbortAll(void)
939 {
940 	Job *job;	/* the job descriptor in that element */
941 	int foo;
942 
943 	aborting = ABORT_ERROR;
944 
945 	for (job = runningJobs; job != NULL; job = job->next) {
946 		debug_kill_printf("abort: send SIGINT to "
947 		    "child %ld running %s: %s\n",
948 		    (long)job->pid, job->node->name, really_kill(job, SIGINT));
949 		debug_kill_printf("abort: send SIGKILL to "
950 		    "child %ld running %s: %s\n",
951 		    (long)job->pid, job->node->name, really_kill(job, SIGKILL));
952 	}
953 
954 	/*
955 	 * Catch as many children as want to report in at first, then give up
956 	 */
957 	while (waitpid(WAIT_ANY, &foo, WNOHANG) > 0)
958 		continue;
959 }
960