xref: /openbsd/usr.bin/make/job.c (revision fc61954a)
1 /*	$OpenBSD: job.c,v 1.136 2015/07/28 14:22:26 espie Exp $	*/
2 /*	$NetBSD: job.c,v 1.16 1996/11/06 17:59:08 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 2012 Marc Espie.
6  *
7  * Extensive code modifications for the OpenBSD project.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OPENBSD
22  * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 /*
31  * Copyright (c) 1988, 1989, 1990 The Regents of the University of California.
32  * Copyright (c) 1988, 1989 by Adam de Boor
33  * Copyright (c) 1989 by Berkeley Softworks
34  * All rights reserved.
35  *
36  * This code is derived from software contributed to Berkeley by
37  * Adam de Boor.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  */
63 
64 /*-
65  * job.c --
66  *	handle the creation etc. of our child processes.
67  *
68  * Interface:
69  *	Job_Make		Start the creation of the given target.
70  *
71  *	Job_Init		Called to initialize this module.
72  *
73  *	Job_Begin		execute commands attached to the .BEGIN target
74  *				if any.
75  *
76  *	can_start_job		Return true if we can start job
77  *
78  *	Job_Empty		Return true if the job table is completely
79  *				empty.
80  *
81  *	Job_Finish		Perform any final processing which needs doing.
82  *				This includes the execution of any commands
83  *				which have been/were attached to the .END
84  *				target.
85  *
86  *	Job_AbortAll		Abort all current jobs. It doesn't
87  *				handle output or do anything for the jobs,
88  *				just kills them.
89  *
90  *	Job_Wait		Wait for all running jobs to finish.
91  */
92 
93 #include <sys/types.h>
94 #include <sys/wait.h>
95 #include <ctype.h>
96 #include <errno.h>
97 #include <fcntl.h>
98 #include <signal.h>
99 #include <stdarg.h>
100 #include <stdio.h>
101 #include <stdlib.h>
102 #include <string.h>
103 #include <unistd.h>
104 #include "config.h"
105 #include "defines.h"
106 #include "job.h"
107 #include "engine.h"
108 #include "pathnames.h"
109 #include "var.h"
110 #include "targ.h"
111 #include "error.h"
112 #include "extern.h"
113 #include "lst.h"
114 #include "gnode.h"
115 #include "memory.h"
116 #include "make.h"
117 #include "buf.h"
118 
119 static int	aborting = 0;	    /* why is the make aborting? */
120 #define ABORT_ERROR	1	    /* Because of an error */
121 #define ABORT_INTERRUPT 2	    /* Because it was interrupted */
122 #define ABORT_WAIT	3	    /* Waiting for jobs to finish */
123 
124 static int	maxJobs;	/* The most children we can run at once */
125 static int	nJobs;		/* Number of jobs already allocated */
126 static bool	no_new_jobs;	/* Mark recursive shit so we shouldn't start
127 				 * something else at the same time
128 				 */
129 Job *runningJobs;		/* Jobs currently running a process */
130 Job *errorJobs;			/* Jobs in error at end */
131 static Job *heldJobs;		/* Jobs not running yet because of expensive */
132 static pid_t mypid;		/* Used for printing debugging messages */
133 
134 static volatile sig_atomic_t got_fatal;
135 
136 static volatile sig_atomic_t got_SIGINT, got_SIGHUP, got_SIGQUIT, got_SIGTERM,
137     got_SIGINFO;
138 
139 static sigset_t sigset, emptyset;
140 
141 static void handle_fatal_signal(int);
142 static void handle_siginfo(void);
143 static void postprocess_job(Job *, bool);
144 static Job *prepare_job(GNode *);
145 static void determine_job_next_step(Job *);
146 static void remove_job(Job *, bool);
147 static void may_continue_job(Job *);
148 static void continue_job(Job *);
149 static Job *reap_finished_job(pid_t);
150 static bool reap_jobs(void);
151 
152 static void loop_handle_running_jobs(void);
153 static bool expensive_job(Job *);
154 static bool expensive_command(const char *);
155 static void setup_signal(int);
156 static void notice_signal(int);
157 static void setup_all_signals(void);
158 static const char *really_kill(Job *, int);
159 static void kill_with_doas_maybe(pid_t, int, const char *);
160 static void debug_kill_printf(const char *, ...);
161 static void debug_vprintf(const char *, va_list);
162 static void may_remove_target(Job *);
163 static const char *really_kill(Job *, int);
164 static void print_error(Job *);
165 static void internal_print_errors(void);
166 
167 static int dying_signal = 0;
168 
169 const char *	basedirectory = NULL;
170 
171 static void
172 kill_with_doas_maybe(pid_t pid, int signo, const char *p)
173 {
174 	char buf[32]; /* largely enough */
175 	int sudo;
176 
177 	for (;*p != '\0'; p++) {
178 		if (*p == 's')
179 			sudo = 1;
180 		else if (*p == 'd')
181 			sudo = 0;
182 		else
183 			continue;
184 		if (sudo && p[1] != 'u' || !sudo && p[1] != 'o')
185 			continue;
186 		p++;
187 		if (sudo && p[1] != 'd' || !sudo && p[1] != 'a')
188 			continue;
189 		p++;
190 		if (sudo && p[1] != 'o' || !sudo && p[1] != 's')
191 			continue;
192 		snprintf(buf, sizeof buf, "%s -n /bin/kill -%d %ld",
193 		    sudo ? "sudo" : "doas",
194 		    signo, (long)pid);
195 		debug_kill_printf("trying to kill with %s", buf);
196 		system(buf);
197 		return;
198 	}
199 
200 }
201 
202 static const char *
203 really_kill(Job *job, int signo)
204 {
205 	pid_t pid = job->pid;
206 	if (getpgid(pid) != getpgrp()) {
207 		if (killpg(pid, signo) == 0)
208 			return "group got signal";
209 		pid = -pid;
210 	} else {
211 		if (kill(pid, signo) == 0)
212 			return "process got signal";
213 	}
214 	if (errno == ESRCH) {
215 		job->flags |= JOB_LOST;
216 		return "not found";
217 	} else if (errno == EPERM) {
218 		kill_with_doas_maybe(pid, signo, job->cmd);
219 		return "";
220 	} else
221 		return "should not happen";
222 }
223 
224 static void
225 may_remove_target(Job *j)
226 {
227 	int dying = check_dying_signal();
228 
229 	if (dying && !noExecute && !Targ_Precious(j->node)) {
230 		const char *file = Var(TARGET_INDEX, j->node);
231 		int r = eunlink(file);
232 
233 		if (DEBUG(JOB) && r == -1)
234 			fprintf(stderr, " *** would unlink %s\n", file);
235 		if (r != -1)
236 			fprintf(stderr, " *** %s removed\n", file);
237 	}
238 }
239 
240 static void
241 buf_addcurdir(BUFFER *buf)
242 {
243 	const char *v = Var_Value(".CURDIR");
244 	if (basedirectory != NULL) {
245 		size_t len = strlen(basedirectory);
246 		if (strncmp(basedirectory, v, len) == 0 &&
247 		    v[len] == '/') {
248 			v += len+1;
249 		} else if (strcmp(basedirectory, v) == 0) {
250 			Buf_AddString(buf, ".");
251 			return;
252 		}
253 	}
254 	Buf_AddString(buf, v);
255 }
256 
257 static const char *
258 shortened_curdir(void)
259 {
260 	static BUFFER buf;
261 	bool first = true;
262 	if (first) {
263 		Buf_Init(&buf, 0);
264 		buf_addcurdir(&buf);
265 		first = false;
266 	}
267 	return Buf_Retrieve(&buf);
268 }
269 
270 static void
271 quick_error(Job *j, int signo, bool first)
272 {
273 	if (first) {
274 		fprintf(stderr, "*** Signal SIG%s", sys_signame[signo]);
275 		fprintf(stderr, " in %s (", shortened_curdir());
276 	} else
277 		fprintf(stderr, " ");
278 
279 	fprintf(stderr, "%s", j->node->name);
280 	free(j->cmd);
281 }
282 
283 static void
284 print_error(Job *j)
285 {
286 	static bool first = true;
287 	BUFFER buf;
288 
289 	Buf_Init(&buf, 0);
290 
291 	if (j->exit_type == JOB_EXIT_BAD)
292 		Buf_printf(&buf, "*** Error %d", j->code);
293 	else if (j->exit_type == JOB_SIGNALED) {
294 		if (j->code < NSIG)
295 			Buf_printf(&buf, "*** Signal SIG%s",
296 			    sys_signame[j->code]);
297 		else
298 			Buf_printf(&buf, "*** unknown signal %d", j->code);
299 	} else
300 		Buf_printf(&buf, "*** Should not happen %d/%d",
301 		    j->exit_type, j->code);
302 	if (DEBUG(KILL) && (j->flags & JOB_LOST))
303 		Buf_AddChar(&buf, '!');
304 	if (first) {
305 		Buf_AddString(&buf, " in ");
306 		buf_addcurdir(&buf);
307 		first = false;
308 	}
309 	Buf_printf(&buf, " (%s:%lu", j->location->fname, j->location->lineno);
310 	Buf_printf(&buf, " '%s'", j->node->name);
311 	if ((j->flags & (JOB_SILENT | JOB_IS_EXPENSIVE)) == JOB_SILENT
312 	    && Buf_Size(&buf) < 140-2) {
313 		size_t len = strlen(j->cmd);
314 		Buf_AddString(&buf, ": ");
315 		if (len + Buf_Size(&buf) < 140)
316 			Buf_AddString(&buf, j->cmd);
317 		else {
318 			Buf_AddChars(&buf, 140 - Buf_Size(&buf), j->cmd);
319 			Buf_AddString(&buf, "...");
320 		}
321 	}
322 	fprintf(stderr, "%s)\n", Buf_Retrieve(&buf));
323 	Buf_Destroy(&buf);
324 	free(j->cmd);
325 }
326 static void
327 quick_summary(int signo)
328 {
329 	Job *j, *k, *jnext;
330 	bool first = true;
331 
332 	k = errorJobs;
333 	errorJobs = NULL;
334 	for (j = k; j != NULL; j = jnext) {
335 		jnext = j->next;
336 		if ((j->exit_type == JOB_EXIT_BAD && j->code == signo+128) ||
337 		    (j->exit_type == JOB_SIGNALED && j->code == signo)) {
338 			quick_error(j, signo, first);
339 			first = false;
340 		} else {
341 			j->next = errorJobs;
342 			errorJobs = j;
343 		}
344 	}
345 	if (!first)
346 		fprintf(stderr, ")\n");
347 }
348 
349 static void
350 internal_print_errors()
351 {
352 	Job *j, *k, *jnext;
353 	int dying;
354 
355 	if (!errorJobs)
356 		fprintf(stderr, "Stop in %s\n", shortened_curdir());
357 
358 	for (j = errorJobs; j != NULL; j = j->next)
359 		may_remove_target(j);
360 	dying = check_dying_signal();
361 	if (dying)
362 		quick_summary(dying);
363 	while (errorJobs != NULL) {
364 		k = errorJobs;
365 		errorJobs = NULL;
366 		for (j = k; j != NULL; j = jnext) {
367 			jnext = j->next;
368 			if (j->location->fname == k->location->fname)
369 				print_error(j);
370 			else {
371 				j->next = errorJobs;
372 				errorJobs = j;
373 			}
374 		}
375 	}
376 }
377 
378 void
379 print_errors(void)
380 {
381 	handle_all_signals();
382 	internal_print_errors();
383 }
384 
385 static void
386 setup_signal(int sig)
387 {
388 	if (signal(sig, SIG_IGN) != SIG_IGN) {
389 		(void)signal(sig, notice_signal);
390 		sigaddset(&sigset, sig);
391 	}
392 }
393 
394 static void
395 notice_signal(int sig)
396 {
397 
398 	switch(sig) {
399 	case SIGINT:
400 		got_SIGINT++;
401 		got_fatal = 1;
402 		break;
403 	case SIGHUP:
404 		got_SIGHUP++;
405 		got_fatal = 1;
406 		break;
407 	case SIGQUIT:
408 		got_SIGQUIT++;
409 		got_fatal = 1;
410 		break;
411 	case SIGTERM:
412 		got_SIGTERM++;
413 		got_fatal = 1;
414 		break;
415 	case SIGINFO:
416 		got_SIGINFO++;
417 		break;
418 	case SIGCHLD:
419 		break;
420 	}
421 }
422 
423 static void
424 setup_all_signals(void)
425 {
426 	sigemptyset(&sigset);
427 	sigemptyset(&emptyset);
428 	/*
429 	 * Catch the four signals that POSIX specifies if they aren't ignored.
430 	 * handle_signal will take care of calling JobInterrupt if appropriate.
431 	 */
432 	setup_signal(SIGINT);
433 	setup_signal(SIGHUP);
434 	setup_signal(SIGQUIT);
435 	setup_signal(SIGTERM);
436 	/* Display running jobs on SIGINFO */
437 	setup_signal(SIGINFO);
438 	/* Have to see SIGCHLD */
439 	setup_signal(SIGCHLD);
440 	got_fatal = 0;
441 }
442 
443 static void
444 handle_siginfo(void)
445 {
446 	static BUFFER buf;
447 	static size_t length = 0;
448 
449 	Job *job;
450 	bool first = true;
451 
452 	got_SIGINFO = 0;
453 	/* we have to store the info in a buffer, because status from all
454 	 * makes running would get intermixed otherwise
455 	 */
456 
457 	if (length == 0) {
458 		Buf_Init(&buf, 0);
459 		Buf_printf(&buf, "%s in ", Var_Value("MAKE"));
460 		buf_addcurdir(&buf);
461 		Buf_AddString(&buf, ": ");
462 		length = Buf_Size(&buf);
463 	} else
464 		Buf_Truncate(&buf, length);
465 
466 	for (job = runningJobs; job != NULL ; job = job->next) {
467 		if (!first)
468 			Buf_puts(&buf, ", ");
469 		first = false;
470 		Buf_puts(&buf, job->node->name);
471 	}
472 	Buf_puts(&buf, first ? "nothing running\n" : "\n");
473 
474 	fputs(Buf_Retrieve(&buf), stderr);
475 }
476 
477 int
478 check_dying_signal(void)
479 {
480 	sigset_t set;
481 	if (dying_signal)
482 		return dying_signal;
483 	sigpending(&set);
484 	if (got_SIGINT || sigismember(&set, SIGINT))
485 		return dying_signal = SIGINT;
486 	if (got_SIGHUP || sigismember(&set, SIGHUP))
487 		return dying_signal = SIGHUP;
488 	if (got_SIGQUIT || sigismember(&set, SIGQUIT))
489 		return dying_signal = SIGQUIT;
490 	if (got_SIGTERM || sigismember(&set, SIGTERM))
491 		return dying_signal = SIGTERM;
492 	return 0;
493 }
494 
495 void
496 handle_all_signals(void)
497 {
498 	if (got_SIGINFO)
499 		handle_siginfo();
500 	while (got_fatal) {
501 		got_fatal = 0;
502 		aborting = ABORT_INTERRUPT;
503 
504 		if (got_SIGINT) {
505 			got_SIGINT=0;
506 			handle_fatal_signal(SIGINT);
507 		}
508 		if (got_SIGHUP) {
509 			got_SIGHUP=0;
510 			handle_fatal_signal(SIGHUP);
511 		}
512 		if (got_SIGQUIT) {
513 			got_SIGQUIT=0;
514 			handle_fatal_signal(SIGQUIT);
515 		}
516 		if (got_SIGTERM) {
517 			got_SIGTERM=0;
518 			handle_fatal_signal(SIGTERM);
519 		}
520 	}
521 }
522 
523 static void
524 debug_vprintf(const char *fmt, va_list va)
525 {
526 	(void)printf("[%ld] ", (long)mypid);
527 	(void)vprintf(fmt, va);
528 	fflush(stdout);
529 }
530 
531 void
532 debug_job_printf(const char *fmt, ...)
533 {
534 	if (DEBUG(JOB)) {
535 		va_list va;
536 		va_start(va, fmt);
537 		debug_vprintf(fmt, va);
538 		va_end(va);
539 	}
540 }
541 
542 static void
543 debug_kill_printf(const char *fmt, ...)
544 {
545 	if (DEBUG(KILL)) {
546 		va_list va;
547 		va_start(va, fmt);
548 		debug_vprintf(fmt, va);
549 		va_end(va);
550 	}
551 }
552 
553 /*-
554  *-----------------------------------------------------------------------
555  * postprocess_job  --
556  *	Do final processing for the given job including updating
557  *	parents and starting new jobs as available/necessary.
558  *
559  * Side Effects:
560  *	If we got an error and are aborting (aborting == ABORT_ERROR) and
561  *	the job list is now empty, we are done for the day.
562  *	If we recognized an error we set the aborting flag
563  *	to ABORT_ERROR so no more jobs will be started.
564  *-----------------------------------------------------------------------
565  */
566 /*ARGSUSED*/
567 
568 static void
569 postprocess_job(Job *job, bool okay)
570 {
571 	if (okay &&
572 	    aborting != ABORT_ERROR &&
573 	    aborting != ABORT_INTERRUPT) {
574 		/* As long as we aren't aborting and the job didn't return a
575 		 * non-zero status that we shouldn't ignore, we call
576 		 * Make_Update to update the parents. */
577 		job->node->built_status = MADE;
578 		Make_Update(job->node);
579 		free(job);
580 	}
581 
582 	if (errorJobs != NULL && !keepgoing &&
583 	    aborting != ABORT_INTERRUPT)
584 		aborting = ABORT_ERROR;
585 
586 	if (aborting == ABORT_ERROR && DEBUG(QUICKDEATH))
587 		handle_fatal_signal(SIGINT);
588 	if (aborting == ABORT_ERROR && Job_Empty())
589 		Finish();
590 }
591 
592 /* expensive jobs handling: in order to avoid forking an exponential number
593  * of jobs, make tries to figure out "recursive make" configurations.
594  * It may err on the side of caution.
595  * Basically, a command is "expensive" if it's likely to fork an extra
596  * level of make: either by looking at the command proper, or if it has
597  * some specific qualities ('+cmd' are likely to be recursive, as are
598  * .MAKE: commands).  It's possible to explicitly say some targets are
599  * expensive or cheap with .EXPENSIVE or .CHEAP.
600  *
601  * While an expensive command is running, no_new_jobs
602  * is set, so jobs that would fork new processes are accumulated in the
603  * heldJobs list instead.
604  *
605  * This heuristics is also used on error exit: we display silent commands
606  * that failed, unless those ARE expensive commands: expensive commands
607  * are likely to not be failing by themselves, but to be the result of
608  * a cascade of failures in descendant makes.
609  */
610 void
611 determine_expensive_job(Job *job)
612 {
613 	if (expensive_job(job)) {
614 		job->flags |= JOB_IS_EXPENSIVE;
615 		no_new_jobs = true;
616 	} else
617 		job->flags &= ~JOB_IS_EXPENSIVE;
618 	if (DEBUG(EXPENSIVE))
619 		fprintf(stderr, "[%ld] Target %s running %.50s: %s\n",
620 		    (long)mypid, job->node->name, job->cmd,
621 		    job->flags & JOB_IS_EXPENSIVE ? "expensive" : "cheap");
622 }
623 
624 static bool
625 expensive_job(Job *job)
626 {
627 	if (job->node->type & OP_CHEAP)
628 		return false;
629 	if (job->node->type & (OP_EXPENSIVE | OP_MAKE))
630 		return true;
631 	return expensive_command(job->cmd);
632 }
633 
634 static bool
635 expensive_command(const char *s)
636 {
637 	const char *p;
638 	bool include = false;
639 	bool expensive = false;
640 
641 	/* okay, comments are cheap, always */
642 	if (*s == '#')
643 		return false;
644 	/* and commands we always execute are expensive */
645 	if (*s == '+')
646 		return true;
647 
648 	for (p = s; *p != '\0'; p++) {
649 		if (*p == ' ' || *p == '\t') {
650 			include = false;
651 			if (p[1] == '-' && p[2] == 'I')
652 				include = true;
653 		}
654 		if (include)
655 			continue;
656 		/* KMP variant, avoid looking twice at the same
657 		 * letter.
658 		 */
659 		if (*p != 'm')
660 			continue;
661 		if (p[1] != 'a')
662 			continue;
663 		p++;
664 		if (p[1] != 'k')
665 			continue;
666 		p++;
667 		if (p[1] != 'e')
668 			continue;
669 		p++;
670 		expensive = true;
671 		while (p[1] != '\0' && p[1] != ' ' && p[1] != '\t') {
672 			if (p[1] == '.' || p[1] == '/') {
673 				expensive = false;
674 				break;
675 			}
676 		    	p++;
677 		}
678 		if (expensive)
679 			return true;
680 	}
681 	return false;
682 }
683 
684 static Job *
685 prepare_job(GNode *gn)
686 {
687 	/* a new job is prepared unless its commands are bogus (we don't
688 	 * have anything for it), or if we're in touch mode.
689 	 *
690 	 * Note that even in noexec mode, some commands may still run
691 	 * thanks to the +cmd construct.
692 	 */
693 	if (node_find_valid_commands(gn)) {
694 		if (touchFlag) {
695 			Job_Touch(gn);
696 			return NULL;
697 		} else {
698 			Job *job;
699 
700 			job = emalloc(sizeof(Job));
701 			if (job == NULL)
702 				Punt("can't create job: out of memory");
703 
704 			job_attach_node(job, gn);
705 			return job;
706 		}
707 	} else {
708 		node_failure(gn);
709 		return NULL;
710 	}
711 }
712 
713 static void
714 may_continue_job(Job *job)
715 {
716 	if (no_new_jobs) {
717 		if (DEBUG(EXPENSIVE))
718 			fprintf(stderr, "[%ld] expensive -> hold %s\n",
719 			    (long)mypid, job->node->name);
720 		job->next = heldJobs;
721 		heldJobs = job;
722 	} else
723 		continue_job(job);
724 }
725 
726 static void
727 continue_job(Job *job)
728 {
729 	bool finished = job_run_next(job);
730 	if (finished)
731 		remove_job(job, true);
732 	else
733 		determine_expensive_job(job);
734 }
735 
736 /*-
737  *-----------------------------------------------------------------------
738  * Job_Make  --
739  *	Start a target-creation process going for the target described
740  *	by the graph node gn.
741  *
742  * Side Effects:
743  *	A new Job node is created and  its commands continued, which
744  *	may fork the first command of that job.
745  *-----------------------------------------------------------------------
746  */
747 void
748 Job_Make(GNode *gn)
749 {
750 	Job *job;
751 
752 	job = prepare_job(gn);
753 	if (!job)
754 		return;
755 	nJobs++;
756 	may_continue_job(job);
757 }
758 
759 static void
760 determine_job_next_step(Job *job)
761 {
762 	bool okay;
763 	if (job->flags & JOB_IS_EXPENSIVE) {
764 		no_new_jobs = false;
765 		if (DEBUG(EXPENSIVE))
766 			fprintf(stderr, "[%ld] "
767 			    "Returning from expensive target %s, "
768 			    "allowing new jobs\n", (long)mypid,
769 			    job->node->name);
770 	}
771 
772 	okay = job->exit_type == JOB_EXIT_OKAY;
773 	if (!okay || job->next_cmd == NULL)
774 		remove_job(job, okay);
775 	else
776 		may_continue_job(job);
777 }
778 
779 static void
780 remove_job(Job *job, bool okay)
781 {
782 	nJobs--;
783 	postprocess_job(job, okay);
784 	while (!no_new_jobs) {
785 		if (heldJobs != NULL) {
786 			job = heldJobs;
787 			heldJobs = heldJobs->next;
788 			if (DEBUG(EXPENSIVE))
789 				fprintf(stderr, "[%ld] cheap -> release %s\n",
790 				    (long)mypid, job->node->name);
791 			continue_job(job);
792 		} else
793 			break;
794 	}
795 }
796 
797 /*
798  * job = reap_finished_job(pid):
799  * 	retrieve and remove a job from runningJobs, based on its pid
800  *
801  *	Note that we remove it right away, so that handle_signals()
802  *	is accurate.
803  */
804 static Job *
805 reap_finished_job(pid_t pid)
806 {
807 	Job **j, *job;
808 
809 	for (j = &runningJobs; *j != NULL; j = &((*j)->next))
810 		if ((*j)->pid == pid) {
811 			job = *j;
812 			*j = job->next;
813 			return job;
814 		}
815 
816 	return NULL;
817 }
818 
819 /*
820  * classic waitpid handler: retrieve as many dead children as possible.
821  * returns true if succesful
822  */
823 static bool
824 reap_jobs(void)
825 {
826  	pid_t pid;	/* pid of dead child */
827  	int status;	/* Exit/termination status */
828 	bool reaped = false;
829 	Job *job;
830 
831 	while ((pid = waitpid(WAIT_ANY, &status, WNOHANG)) > 0) {
832 		reaped = true;
833 		job = reap_finished_job(pid);
834 
835 		if (job == NULL) {
836 			Punt("Child (%ld) not in table?", (long)pid);
837 		} else {
838 			job_handle_status(job, status);
839 			determine_job_next_step(job);
840 		}
841 	}
842 	/* sanity check, should not happen */
843 	if (pid == -1 && errno == ECHILD && runningJobs != NULL)
844 		Punt("Process has no children, but runningJobs is not empty ?");
845 	return reaped;
846 }
847 
848 void
849 handle_running_jobs(void)
850 {
851 	sigset_t old;
852 	/* reaping children in the presence of caught signals */
853 
854 	/* first, we make sure to hold on new signals, to synchronize
855 	 * reception of new stuff on sigsuspend
856 	 */
857 	sigprocmask(SIG_BLOCK, &sigset, &old);
858 	/* note this will NOT loop until runningJobs == NULL.
859 	 * It's merely an optimisation, namely that we don't need to go
860 	 * through the logic if no job is present. As soon as a job
861 	 * gets reaped, we WILL exit the loop through the break.
862 	 */
863 	while (runningJobs != NULL) {
864 		/* did we already have pending stuff that advances things ?
865 		 * then handle_all_signals() will not return
866 		 * or reap_jobs() will reap_jobs()
867 		 */
868 		handle_all_signals();
869 		if (reap_jobs())
870 			break;
871 		/* okay, so it's safe to suspend, we have nothing to do but
872 		 * wait...
873 		 */
874 		sigsuspend(&emptyset);
875 	}
876 	sigprocmask(SIG_SETMASK, &old, NULL);
877 }
878 
879 void
880 handle_one_job(Job *job)
881 {
882 	int stat;
883 	int status;
884 	sigset_t old;
885 
886 	sigprocmask(SIG_BLOCK, &sigset, &old);
887 	while (1) {
888 		handle_all_signals();
889 		stat = waitpid(job->pid, &status, WNOHANG);
890 		if (stat == job->pid)
891 			break;
892 		sigsuspend(&emptyset);
893 	}
894 	runningJobs = NULL;
895 	job_handle_status(job, status);
896 	sigprocmask(SIG_SETMASK, &old, NULL);
897 }
898 
899 static void
900 loop_handle_running_jobs()
901 {
902 	while (runningJobs != NULL)
903 		handle_running_jobs();
904 }
905 
906 void
907 Job_Init(int maxproc)
908 {
909 	runningJobs = NULL;
910 	heldJobs = NULL;
911 	errorJobs = NULL;
912 	maxJobs = maxproc;
913 	mypid = getpid();
914 
915 	nJobs = 0;
916 
917 	aborting = 0;
918 	setup_all_signals();
919 }
920 
921 bool
922 can_start_job(void)
923 {
924 	if (aborting || nJobs >= maxJobs)
925 		return false;
926 	else
927 		return true;
928 }
929 
930 bool
931 Job_Empty(void)
932 {
933 	return runningJobs == NULL;
934 }
935 
936 /*-
937  *-----------------------------------------------------------------------
938  * handle_fatal_signal --
939  *	Handle the receipt of a fatal interrupt
940  *
941  * Side Effects:
942  *	All children are killed. Another job may be started if there
943  *	is an interrupt target and the signal was SIGINT.
944  *-----------------------------------------------------------------------
945  */
946 static void
947 handle_fatal_signal(int signo)
948 {
949 	Job *job;
950 
951 	debug_kill_printf("handle_fatal_signal(%d) called.\n", signo);
952 
953 	dying_signal = signo;
954 	for (job = runningJobs; job != NULL; job = job->next) {
955 		debug_kill_printf("passing to "
956 		    "child %ld running %s: %s\n", (long)job->pid,
957 		    job->node->name, really_kill(job, signo));
958 		may_remove_target(job);
959 	}
960 
961 	if (signo == SIGINT && !touchFlag) {
962 		if ((interrupt_node->type & OP_DUMMY) == 0) {
963 			ignoreErrors = false;
964 
965 			Job_Make(interrupt_node);
966 		}
967 	}
968 	loop_handle_running_jobs();
969 	internal_print_errors();
970 
971 	/* die by that signal */
972 	sigprocmask(SIG_BLOCK, &sigset, NULL);
973 	signal(signo, SIG_DFL);
974 	kill(getpid(), signo);
975 	sigprocmask(SIG_SETMASK, &emptyset, NULL);
976 	/*NOTREACHED*/
977 	fprintf(stderr, "This should never happen\n");
978 	exit(1);
979 }
980 
981 /*
982  *-----------------------------------------------------------------------
983  * Job_Finish --
984  *	Do final processing such as the running of the commands
985  *	attached to the .END target.
986  *
987  *	return true if fatal errors have happened.
988  *-----------------------------------------------------------------------
989  */
990 bool
991 Job_Finish(void)
992 {
993 	bool problem = errorJobs != NULL;
994 
995 	if ((end_node->type & OP_DUMMY) == 0) {
996 		if (problem) {
997 			Error("Errors reported so .END ignored");
998 		} else {
999 			Job_Make(end_node);
1000 			loop_handle_running_jobs();
1001 		}
1002 	}
1003 	return problem;
1004 }
1005 
1006 void
1007 Job_Begin(void)
1008 {
1009 	if ((begin_node->type & OP_DUMMY) == 0) {
1010 		Job_Make(begin_node);
1011 		loop_handle_running_jobs();
1012 	}
1013 }
1014 
1015 /*-
1016  *-----------------------------------------------------------------------
1017  * Job_Wait --
1018  *	Waits for all running jobs to finish and returns. Sets 'aborting'
1019  *	to ABORT_WAIT to prevent other jobs from starting.
1020  *
1021  * Side Effects:
1022  *	Currently running jobs finish.
1023  *
1024  *-----------------------------------------------------------------------
1025  */
1026 void
1027 Job_Wait(void)
1028 {
1029 	aborting = ABORT_WAIT;
1030 	loop_handle_running_jobs();
1031 	aborting = 0;
1032 }
1033 
1034 /*-
1035  *-----------------------------------------------------------------------
1036  * Job_AbortAll --
1037  *	Abort all currently running jobs without handling output or anything.
1038  *	This function is to be called only in the event of a major
1039  *	error.
1040  *
1041  * Side Effects:
1042  *	All children are killed
1043  *-----------------------------------------------------------------------
1044  */
1045 void
1046 Job_AbortAll(void)
1047 {
1048 	Job *job;	/* the job descriptor in that element */
1049 	int foo;
1050 
1051 	aborting = ABORT_ERROR;
1052 
1053 	for (job = runningJobs; job != NULL; job = job->next) {
1054 		killpg(job->pid, SIGINT);
1055 		killpg(job->pid, SIGKILL);
1056 	}
1057 
1058 	/*
1059 	 * Catch as many children as want to report in at first, then give up
1060 	 */
1061 	while (waitpid(WAIT_ANY, &foo, WNOHANG) > 0)
1062 		continue;
1063 }
1064