1 /* -*- c-file-style: "java"; indent-tabs-mode: nil; tab-width: 4; fill-column: 78 -*-
2 *
3 * lsdistcc -- A simple distcc server discovery program
4 * Assumes all distcc servers are in DNS and are named distcc1...distccN.
5 *
6 * Copyright 2005 Google Inc.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 2
11 * of the License, or (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
21 * USA.
22 */
23 /* Program to autodetect listening distcc servers by looking in DNS
24 * for hosts named according to a given format.
25 * hosts are considered good servers based solely on whether their
26 * name fits the format and whether they are listening on the right port
27 * (and optionally whether they respond when you send them a compile job).
28 * Stops looking for servers after the first one it doesn't find in DNS.
29 * Prints results to stdout.
30 * Terminates with error status if no servers found.
31 *
32 * Examples:
33 *
34 * In your build script, add the lines
35 * DISTCC_HOSTS=`lsdistcc`
36 * export DISTCC_HOSTS
37 * before the line that invokes make.
38 *
39 * Or, in your Makefile, add the lines
40 * export DISTCC_HOSTS = $(shell lsdistcc)
41 *
42 * Changelog:
43 *
44 * Wed Jun 20 2007 - Manos Renieris, Google
45 * Added -P option.
46 *
47 * Mon Jun 4 2007 - Manos Renieris, Google
48 * Reformatted in 80 columns.
49 *
50 * Tue Jan 31 2006 - Dan Kegel, Google
51 * Added -x option to list down hosts with ,down suffix (since
52 * in sharded server cache mode, the hash space is partitioned
53 * over all servers regardless of whether they're up or down at the moment)
54 *
55 * Thu Jan 5 2006 - Dan Kegel, Google
56 * Actually read the output from the server and partially parse it.
57 *
58 * Sat Nov 26 2005 - Dan Kegel, Google
59 * Added -l option, improved -v output
60 *
61 * Tue Nov 22 2005 - Dan Kegel & Dongmin Zhang, Google
62 * added -pcc option to check that server actually responds when you send
63 * it a job
64 * added -c0 option to disable connect check
65 *
66 * Thu Oct 13 2005 - Dan Kegel, Google
67 * use rslave to do asynchronous-ish hostname lookup, do all connects
68 * in parallel
69 *
70 * Wed Oct 5 2005 - Dan Kegel, Google
71 * Added -d, -m options
72 *
73 * Fri Sep 16 2005 - Dan Kegel, Google
74 * Created
75 * Added -v option
76 --------------------------------------------------------------------------*/
77
78 #include <config.h>
79
80 #include <netdb.h>
81 #include <assert.h>
82 #include <stdio.h>
83 #include <string.h>
84 #include <unistd.h>
85 #include <stdlib.h>
86 #include <signal.h>
87 #include <errno.h>
88 #include <fcntl.h>
89 #include <sys/types.h>
90 #include <sys/socket.h>
91 #include <poll.h>
92 #include <sys/resource.h>
93 #include <sys/time.h>
94 #include <netinet/in.h>
95
96 #include "distcc.h"
97 #include "clinet.h"
98 #include "netutil.h"
99 #include "util.h"
100 #include "trace.h"
101 #include "rslave.h"
102 #include "../lzo/minilzo.h"
103
104 /* Linux calls this setrlimit() argument NOFILE; bsd calls it OFILE */
105 #ifndef RLIMIT_NOFILE
106 #define RLIMIT_NOFILE RLIMIT_OFILE
107 #endif
108
109 enum status_e { STATE_LOOKUP = 0,
110 STATE_CONNECT,
111 STATE_CONNECTING,
112 STATE_READ_DONEPKT,
113 STATE_READ_STATPKT,
114 STATE_READ_REST,
115 STATE_CLOSE,
116 STATE_DONE};
117
118 struct state_s {
119 rslave_request_t req;
120 rslave_result_t res;
121 struct timeval start;
122 struct timeval deadline;
123 char curhdrbuf[12];
124 int curhdrlen;
125 enum status_e status;
126 int ntries;
127 int fd;
128 int up; /* default is 0, set to 1 on success */
129 };
130 typedef struct state_s state_t;
131
132 /* Default parameters */
133 #define DEFAULT_FORMAT "distcc%d" /* hostname format */
134 #define DEFAULT_PORT 3632 /* TCP port to connect to */
135 #define DEFAULT_PROTOCOL 1 /* protocol we'll try to speak */
136 #define DEFAULT_BIGTIMEOUT 7 /* max total runtime, seconds */
137 #define DEFAULT_DNSTIMEOUT_MS 500 /* individual DNS timeout, msec */
138 #define DEFAULT_CONNTIMEOUT_MS 900 /* individual connect timeout, msec */
139 #define DEFAULT_COMPTIMEOUT_MS 1500 /* individual compile timeout, msec
140 (FIXME: should be lower) */
141 #define DEFAULT_OVERLAP 1 /* number of simultaneous DNS queries -1 */
142 #define DEFAULT_DNSGAP 0 /* number of missing hosts in DNS before
143 we stop looking */
144 #define DEFAULT_COMPILER "none"
145
146 char canned_query[1000];
147 size_t canned_query_len = 0;
148
149 int opt_latency = 0;
150 int opt_numeric = 0;
151 int opt_overlap = DEFAULT_OVERLAP;
152 int opt_dnsgap = DEFAULT_DNSGAP;
153 int opt_port = DEFAULT_PORT;
154 int opt_protocol = DEFAULT_PROTOCOL;
155 int opt_bigtimeout_sec = DEFAULT_BIGTIMEOUT;
156 int opt_conntimeout_ms = DEFAULT_CONNTIMEOUT_MS;
157 int opt_comptimeout_ms = DEFAULT_COMPTIMEOUT_MS;
158 int opt_dnstimeout_ms = DEFAULT_DNSTIMEOUT_MS;
159 int opt_verbose = 0;
160 int opt_domain = 0;
161 int opt_match = 0;
162 int opt_bang_down = 0;
163 const char *opt_compiler = NULL;
164
165
166 const char *protocol_suffix[] = { NULL, /* to make the rest 1-based */
167 "",
168 ",lzo",
169 ",lzo,cpp" };
170
171 #define MAXHOSTS 500
172 #define MAXTRIES 5 /* this constant can't be changed without
173 changing some code */
174 #define MAXFDS (MAXHOSTS+2)
175
176 /* just plain globals */
177 int fd2state[MAXHOSTS+1000]; /* kludge - fragile */
178 int nok;
179 int ndone;
180
181 /* globals used by other compilation units */
182 const char *rs_program_name = "lsdistcc";
183
184 /* Forward declarations (solely to prevent compiler warnings) */
185 void usage(void);
186 int bitcompare(const unsigned char *a, const unsigned char *b, int nbits);
187 void timeout_handler(int x);
188 void get_thename(const char**sformat, const char *domain_name,
189 int i, char *thename);
190 int detect_distcc_servers(const char **argv, int argc, int opti,
191 int bigtimeout, int dnstimeout, int matchbits,
192 int overlap, int dnsgap);
193 void server_read_packet_header(state_t *sp);
194 void server_handle_event(state_t *sp);
195
usage(void)196 void usage(void) {
197 printf("Usage: lsdistcc [-tTIMEOUT] [-mBITS] [-nvd] [format]\n\
198 Uses 'for i=1... sprintf(format, i)' to construct names of servers,\n\
199 stops after %d seconds or at second server that doesn't resolve,\n\
200 prints the names of all such servers listening on distcc's port.\n\
201 Default format is %s. \n\
202 If a list of host names are given in the command line,\n\
203 lsdistcc will only check those hosts. \n\
204 Options:\n\
205 -l Output latency in milliseconds after each hostname\n\
206 (not including DNS latency)\n\
207 -n Print IP address rather than name\n\
208 -x Append ,down to down hosts in host list\n\
209 -tTIMEOUT Set number of seconds to stop searching after [%d]\n\
210 -hHTIMEOUT Set number of milliseconds before retrying gethostbyname [%d]\n\
211 -cCTIMEOUT Set number of milliseconds before giving up on connect [%d]\n\
212 (0 to inhibit connect)\n\
213 -kKTIMEOUT Set number of milliseconds before giving up on compile [%d]\n\
214 (0 to inhibit compile)\n\
215 -mBITS Set number of bits of address that must match first host found [0]\n\
216 -oOVERLAP Set number of extra DNS requests to send [%d]\n\
217 -gDNSGAP Set number of missing DNS entries to tolerate [%d]\n\
218 -rPORT Port to connect to [%d]\n\
219 -PPROTOCOL Protocol version to use (1-3) [%d]\n\
220 -pCOMPILER Name of compiler to use [%s]\n\
221 -d Append DNS domain name to format\n\
222 -v Verbose\n\
223 \n\
224 Example:\n\
225 lsdistcc -l -p$COMPILER\n\
226 lsdistcc -p$COMPILER hosta somehost hostx hosty\n\
227 ", DEFAULT_BIGTIMEOUT,
228 DEFAULT_FORMAT,
229 DEFAULT_BIGTIMEOUT,
230 DEFAULT_DNSTIMEOUT_MS,
231 DEFAULT_CONNTIMEOUT_MS,
232 DEFAULT_COMPTIMEOUT_MS,
233 DEFAULT_OVERLAP,
234 DEFAULT_DNSGAP,
235 DEFAULT_PORT,
236 DEFAULT_PROTOCOL,
237 DEFAULT_COMPILER);
238 exit(1);
239 }
240
241
242 /* Compare first nbits of a[] and b[]
243 * If nbits is 1, only compares the MSB of a[0] and b[0]
244 * Return 0 on equal, nonzero on nonequal
245 */
bitcompare(const unsigned char * a,const unsigned char * b,int nbits)246 int bitcompare(const unsigned char *a, const unsigned char *b, int nbits)
247 {
248 int fullbytes = nbits/8;
249 int leftoverbits = nbits & 7;
250
251 if (fullbytes) {
252 int d = memcmp((char *)a, (char *)b, (size_t) fullbytes);
253 if (d)
254 return d;
255 }
256
257 if (leftoverbits) {
258 int mask = 0;
259 int i;
260 for (i=0; i<leftoverbits; i++)
261 mask |= (1 << (7-i));
262 /* printf("mask %x, a[%d] %x, b[%d] %x\n", mask,
263 fullbytes, a[fullbytes], fullbytes, b[fullbytes]); */
264 return ((a[fullbytes] ^ b[fullbytes]) & mask);
265 }
266 return 0;
267 }
268
269 #if 0
270 #include <assert.h>
271 main()
272 {
273 assert(bitcompare("0", "0", 8) == 0);
274 assert(bitcompare("0", "1", 8) != 0);
275 assert(bitcompare("0", "1", 7) == 0);
276 }
277 #endif
278
279
280 /* On timeout, silently terminate program */
timeout_handler(int x)281 void timeout_handler(int x)
282 {
283 (void) x;
284
285 if (opt_verbose > 0)
286 fprintf(stderr, "Timeout!\n");
287
288 /* FIXME: is it legal to call exit here? */
289 exit(0);
290 }
291
generate_query(void)292 static void generate_query(void)
293 {
294 const char* program = "int foo(){return 0;}";
295 unsigned char lzod_program[1000];
296 unsigned char lzo_work_mem[LZO1X_1_MEM_COMPRESS];
297 lzo_uint lzod_program_len;
298
299 lzo1x_1_compress((const unsigned char *)program, strlen(program),
300 lzod_program, &lzod_program_len,
301 lzo_work_mem);
302
303 switch (opt_protocol) {
304 case 1: {
305 static const char canned_query_fmt_protocol_1[]=
306 "DIST00000001"
307 "ARGC00000005"
308 "ARGV%08x%s"
309 "ARGV00000002-c"
310 "ARGV00000007hello.c"
311 "ARGV00000002-o"
312 "ARGV00000007hello.o"
313 "DOTI%08x%s";
314 sprintf(canned_query,
315 canned_query_fmt_protocol_1,
316 (unsigned)strlen(opt_compiler), opt_compiler,
317 (unsigned)strlen(program), program);
318 canned_query_len = strlen(canned_query);
319 break;
320 }
321
322 case 2: {
323 static const char canned_query_fmt_protocol_2[]=
324 "DIST00000002"
325 "ARGC00000005"
326 "ARGV%08x%s"
327 "ARGV00000002-c"
328 "ARGV00000007hello.c"
329 "ARGV00000002-o"
330 "ARGV00000007hello.o"
331 "DOTI%08x";
332 sprintf(canned_query,
333 canned_query_fmt_protocol_2,
334 (unsigned)strlen(opt_compiler),
335 opt_compiler,
336 (unsigned)lzod_program_len);
337
338 canned_query_len = strlen(canned_query) + lzod_program_len;
339 memcpy(canned_query + strlen(canned_query),
340 lzod_program, lzod_program_len);
341
342 break;
343 }
344
345 case 3: {
346 static const char canned_query_fmt_protocol_3[]=
347 "DIST00000003"
348 "CDIR00000001/"
349 "ARGC00000005"
350 "ARGV%08x%s"
351 "ARGV00000002-c"
352 "ARGV00000007hello.c"
353 "ARGV00000002-o"
354 "ARGV00000007hello.o"
355 "NFIL00000001"
356 "NAME00000008/hello.c"
357 "FILE%08x";
358
359 sprintf(canned_query,
360 canned_query_fmt_protocol_3,
361 (unsigned)strlen(opt_compiler),
362 opt_compiler,
363 (unsigned)lzod_program_len);
364
365 canned_query_len = strlen(canned_query) + lzod_program_len;
366 memcpy(canned_query + strlen(canned_query),
367 lzod_program, lzod_program_len);
368 break;
369 }
370 }
371 }
372
373 /* Try reading a protocol packet header */
server_read_packet_header(state_t * sp)374 void server_read_packet_header(state_t *sp)
375 {
376 int arg;
377 int nread;
378
379 nread = read(sp->fd, sp->curhdrbuf + sp->curhdrlen,
380 (size_t)(12 - sp->curhdrlen));
381 if (nread == 0) {
382 /* A nonblocking read returning zero bytes means EOF.
383 * FIXME: it may mean this only on the first read after poll said
384 * bytes were ready, so beware of false EOFs here?
385 */
386 if (opt_verbose > 0)
387 fprintf(stderr, "lsdistcc: premature EOF while waiting for "
388 "result from server %s\n",
389 sp->req.hname);
390 sp->status = STATE_CLOSE;
391 return;
392 }
393
394 if (nread > 0)
395 sp->curhdrlen += nread;
396
397 if (sp->curhdrlen < 12)
398 return;
399
400 arg = (int)strtol(sp->curhdrbuf+4, NULL, 16);
401
402 if (opt_verbose > 2) {
403 int i;
404 printf("Got hdr '%12.12s' = ", sp->curhdrbuf);
405 for (i=0; i < sp->curhdrlen; i++)
406 printf("%2x", sp->curhdrbuf[i]);
407 printf("\n");
408 }
409
410 /* Parse and validate the packet header, move on to next state */
411 switch (sp->status) {
412 case STATE_READ_DONEPKT:
413 if (memcmp(sp->curhdrbuf, "DONE", 4) != 0) {
414 if (opt_verbose > 1)
415 fprintf(stderr,
416 "%s wrong protocol; expected DONE, got %4.4s!\n",
417 sp->req.hname, sp->curhdrbuf);
418 sp->status = STATE_CLOSE;
419 break;
420 }
421 if (arg != opt_protocol) {
422 if (opt_verbose > 1)
423 fprintf(stderr,
424 "%s wrong protocol, expected %d got %d!\n",
425 sp->req.hname,
426 opt_protocol,
427 arg);
428 sp->status = STATE_CLOSE;
429 break;
430 }
431 /* No body to this type. Read next packet. */
432 sp->curhdrlen = 0;
433 sp->status = STATE_READ_STATPKT;
434 break;
435
436 case STATE_READ_STATPKT:
437 if (memcmp(sp->curhdrbuf, "STAT", 4) != 0) {
438 if (opt_verbose > 1)
439 fprintf(stderr,
440 "%s wrong protocol! Expected STAT, got %4.4s\n",
441 sp->req.hname, sp->curhdrbuf);
442 sp->status = STATE_CLOSE;
443 break;
444 }
445 if (arg != 0) {
446 if (opt_verbose > 1) {
447 /* FIXME: only conditional because my server uses load shedding */
448 fprintf(stderr,
449 "lsdistcc: warning: test compile on %s failed! "
450 "status 0x%x\n",
451 sp->req.hname, arg);
452 }
453 sp->status = STATE_CLOSE;
454 break;
455 }
456 /* No body to this type. Read next packet. */
457 sp->curhdrlen = 0;
458 sp->status = STATE_READ_REST;
459 break;
460
461 default:
462 fprintf(stderr, "bug\n");
463 exit(1);
464 }
465 }
466
467 /* Grind state machine for a single server */
468 /* Take one transition through the state machine, unless that takes you
469 to STATE_CLOSE, in which case go through that state too, into STATE_DONE
470 */
471
server_handle_event(state_t * sp)472 void server_handle_event(state_t *sp)
473 {
474 struct timeval now;
475 gettimeofday(&now, 0);
476
477 do {
478 struct sockaddr_in sa;
479
480 if (opt_verbose > 2)
481 fprintf(stderr,
482 "now %lld %ld: server_handle_event: %s: state %d\n",
483 (long long) now.tv_sec, (long) now.tv_usec/1000,
484 sp->req.hname, sp->status);
485
486 switch (sp->status) {
487 case STATE_CONNECT:
488 if (opt_conntimeout_ms == 0) {
489 sp->fd = -1;
490 sp->up = 1;
491 sp->status = STATE_CLOSE;
492 break;
493 }
494
495 /* Now do a nonblocking connect to that address */
496 memset(&sa, 0, sizeof sa);
497 sa.sin_family = AF_INET;
498 sa.sin_port = htons(opt_port);
499 memcpy(&sa.sin_addr, sp->res.addr, 4);
500
501 if ((sp->fd = socket(sa.sin_family, SOCK_STREAM, 0)) == -1) {
502 fprintf(stderr, "failed to create socket: %s", strerror(errno));
503 sp->status = STATE_DONE;
504 } else {
505 dcc_set_nonblocking(sp->fd);
506 /* start the nonblocking connect... */
507 if (opt_verbose > 0)
508 fprintf(stderr,
509 "now %lld %ld: Connecting to %s\n",
510 (long long) now.tv_sec, (long) now.tv_usec/1000,
511 sp->req.hname);
512 if (connect(sp->fd, (struct sockaddr *)&sa, sizeof(sa))
513 && errno != EINPROGRESS) {
514 if (opt_verbose > 0)
515 fprintf(stderr, "failed to connect socket: %s",
516 strerror(errno));
517 sp->status = STATE_CLOSE;
518 } else {
519 sp->status = STATE_CONNECTING;
520 fd2state[sp->fd] = sp->res.id;
521 gettimeofday(&now, 0);
522 sp->start = now;
523 sp->deadline = now;
524 sp->deadline.tv_usec += 1000 * opt_conntimeout_ms;
525 sp->deadline.tv_sec += sp->deadline.tv_usec / 1000000;
526 sp->deadline.tv_usec = sp->deadline.tv_usec % 1000000;
527 }
528 }
529 break;
530 case STATE_CONNECTING:
531 {
532 int connecterr;
533 socklen_t len = sizeof(connecterr);
534 int nsend;
535 int nsent;
536
537 if (getsockopt(sp->fd, SOL_SOCKET, SO_ERROR,
538 (char *)&connecterr, &len) < 0) {
539 fprintf(stderr, "getsockopt SO_ERROR failed?!");
540 sp->status = STATE_CLOSE;
541 break;
542 }
543 if (connecterr) {
544 if (opt_verbose > 0)
545 fprintf(stderr,
546 "now %lld %ld: Connecting to %s failed "
547 "with errno %d = %s\n",
548 (long long) now.tv_sec, (long) now.tv_usec/1000,
549 sp->req.hname, connecterr, strerror(connecterr));
550 sp->status = STATE_CLOSE; /* not listening */
551 break;
552 }
553 if (opt_comptimeout_ms == 0 || !opt_compiler) {
554 /* connect succeeded, don't need to compile */
555 sp->up = 1;
556 sp->status = STATE_CLOSE;
557 break;
558 }
559 if (opt_verbose > 0)
560 fprintf(stderr,
561 "now %lld %ld: %s: sending compile request\n",
562 (long long) now.tv_sec, (long) now.tv_usec/1000,
563 sp->req.hname);
564 nsend = canned_query_len;
565 nsent = write(sp->fd, canned_query, nsend);
566 if (nsent != nsend) {
567 if (opt_verbose > 1) {
568 if (nsent == -1)
569 fprintf(stderr,
570 "now %lld %ld: Sending to %s failed, "
571 "errno %d\n",
572 (long long) now.tv_sec, (long) now.tv_usec/1000,
573 sp->req.hname, connecterr);
574 else
575 fprintf(stderr,
576 "now %lld %ld: Sending to %s failed, "
577 "nsent %d != nsend %d\n",
578 (long long) now.tv_sec, (long) now.tv_usec/1000,
579 sp->req.hname, nsent, nsend);
580 }
581 /* ??? remote disconnect? Buffer too small? */
582 sp->status = STATE_CLOSE;
583 break;
584 }
585 sp->status=STATE_READ_DONEPKT;
586 sp->curhdrlen = 0;
587 sp->deadline = now;
588 sp->deadline.tv_usec += 1000 * opt_comptimeout_ms;
589 sp->deadline.tv_sec += sp->deadline.tv_usec / 1000000;
590 sp->deadline.tv_usec = sp->deadline.tv_usec % 1000000;
591 }
592 break;
593
594 case STATE_READ_DONEPKT:
595 case STATE_READ_STATPKT:
596 server_read_packet_header(sp);
597 break;
598
599 case STATE_READ_REST:
600 {
601 char buf[1000];
602 int nread;
603 nread = read(sp->fd, buf, sizeof(buf));
604 if (nread == 0) {
605 /* A nonblocking read returning zero bytes means EOF.
606 * FIXME: it may mean this only on the first read after
607 * poll said bytes were ready, so beware of false EOFs here?
608 */
609 sp->up = 1;
610 sp->status = STATE_CLOSE;
611 }
612 }
613 break;
614
615 case STATE_CLOSE:
616 if (sp->fd != -1) {
617 close(sp->fd);
618 sp->fd = -1;
619 }
620
621 if (opt_bang_down || sp->up) {
622 if (opt_numeric)
623 printf("%d.%d.%d.%d", sp->res.addr[0], sp->res.addr[1],
624 sp->res.addr[2], sp->res.addr[3]);
625 else
626 printf("%s", sp->req.hname);
627
628 if (opt_port != DEFAULT_PORT)
629 printf(":%d", opt_port);
630
631 printf("%s", protocol_suffix[opt_protocol]);
632
633 if (opt_bang_down && !sp->up)
634 printf(",down");
635
636 if (opt_latency) {
637 int latency_ms;
638 gettimeofday(&now, 0);
639 latency_ms = (now.tv_usec - sp->start.tv_usec) /
640 1000 + 1000 * (now.tv_sec - sp->start.tv_sec);
641 printf(" %d", latency_ms);
642 }
643 putchar('\n');
644 if (opt_verbose)
645 fflush(stdout);
646 }
647 nok++;
648 sp->status = STATE_DONE;
649 ndone++;
650 break;
651
652 case STATE_DONE:
653 ;
654 default:
655 ;
656 }
657 } while (sp->status == STATE_CLOSE);
658 }
659
660 /* A helper function for detecting all listening distcc servers: this
661 * routine makes one pass through the poll() loop and analyzes what it
662 * sees.
663 */
one_poll_loop(struct rslave_s * rs,struct state_s states[],int start_state,int end_state,int nwithtries[],int * ngotaddr,int * nbaddns,unsigned char firstipaddr[4],int dnstimeout_usec,int matchbits,int overlap,int dnsgap)664 static int one_poll_loop(struct rslave_s* rs, struct state_s states[],
665 int start_state, int end_state,
666 int nwithtries[], int* ngotaddr, int* nbaddns,
667 unsigned char firstipaddr[4], int dnstimeout_usec,
668 int matchbits, int overlap, int dnsgap)
669 {
670 int i;
671 int nfds;
672 struct state_s *sp;
673 int nready;
674 int found;
675 struct timeval now;
676 struct pollfd pollfds[MAXFDS];
677
678 /* See which sockets have any events */
679 nfds = 0;
680 memset(pollfds, 0, sizeof(pollfds));
681 pollfds[nfds].fd = rslave_getfd_fromSlaves(rs);
682 pollfds[nfds++].events = POLLIN;
683 pollfds[nfds].fd = rslave_getfd_toSlaves(rs);
684 /* Decide if we want to be notified if slaves are ready to handle
685 * a DNS request.
686 * To avoid sending too many DNS requests, we avoid sending more if
687 * the number of first tries is greater than 'overlap'
688 * or the number of outstanding DNS requests plus the number of
689 * already satisfied ones would be greater than or equal to the max
690 * number of hosts we're looking for.
691 */
692 pollfds[nfds++].events = ((nwithtries[1] <= overlap) &&
693 (nwithtries[1]+
694 nwithtries[2]+
695 nwithtries[3]+
696 nwithtries[4]+
697 *ngotaddr < end_state)) ? POLLOUT : 0;
698 /* Set interest bits.
699 * When connecting, we want to know if we can write (aka if the
700 * connect has finished); when waiting for a compile to finish,
701 * we want to know if we can read.
702 */
703 for (i=start_state; i<=end_state; i++) {
704 switch (states[i].status) {
705 case STATE_CONNECTING:
706 pollfds[nfds].fd = states[i].fd;
707 pollfds[nfds++].events = POLLOUT;
708 break;
709 case STATE_READ_DONEPKT:
710 case STATE_READ_STATPKT:
711 case STATE_READ_REST:
712 pollfds[nfds].fd = states[i].fd;
713 pollfds[nfds++].events = POLLIN;
714 break;
715 default: ;
716 }
717 }
718 /* When polling, wait for no more than 50 milliseconds.
719 * Anything lower doesn't help performance much.
720 * Anything higher would inflate all our timeouts,
721 * cause retries not to be sent as soon as they should,
722 * and make the program take longer than it should.
723 */
724 nready = poll(pollfds, (unsigned)nfds, 50);
725 if (nready == -1) {
726 fprintf(stderr, "lsdistcc: poll failed: %s\n", strerror(errno));
727 exit(1);
728 }
729 gettimeofday(&now, 0);
730
731
732 /***** Check for timeout events *****/
733 sp = NULL;
734 found = FALSE;
735 for (i=start_state; i<=end_state; i++) {
736 sp = &states[i];
737 if (sp->status == STATE_LOOKUP
738 && sp->ntries > 0 && sp->ntries < MAXTRIES
739 && (sp->deadline.tv_sec < now.tv_sec ||
740 (sp->deadline.tv_sec == now.tv_sec &&
741 sp->deadline.tv_usec < now.tv_usec))) {
742 found = TRUE;
743 nwithtries[sp->ntries]--;
744 sp->ntries++;
745 nwithtries[sp->ntries]++;
746 if (opt_verbose > 0)
747 fprintf(stderr,
748 "now %lld %ld: Resending %s because "
749 "deadline was %lld %ld\n",
750 (long long) now.tv_sec, (long) now.tv_usec/1000,
751 sp->req.hname, (long long) sp->deadline.tv_sec,
752 (long) sp->deadline.tv_usec/1000);
753 break;
754 }
755
756 if (sp->status == STATE_CONNECTING
757 && (sp->deadline.tv_sec < now.tv_sec ||
758 (sp->deadline.tv_sec == now.tv_sec &&
759 sp->deadline.tv_usec < now.tv_usec))) {
760 sp->status = STATE_CLOSE;
761 server_handle_event(sp);
762 if (opt_verbose > 0)
763 fprintf(stderr,
764 "now %lld %ld: %s timed out while connecting\n",
765 (long long) now.tv_sec, (long) now.tv_usec/1000,
766 sp->req.hname);
767 }
768 if ((sp->status == STATE_READ_DONEPKT ||
769 sp->status == STATE_READ_STATPKT ||
770 sp->status == STATE_READ_REST)
771 && (sp->deadline.tv_sec < now.tv_sec ||
772 (sp->deadline.tv_sec == now.tv_sec &&
773 sp->deadline.tv_usec < now.tv_usec))) {
774 sp->status = STATE_CLOSE;
775 server_handle_event(sp);
776 if (opt_verbose > 0)
777 fprintf(stderr,
778 "now %lld %ld: %s timed out while compiling\n",
779 (long long) now.tv_sec, (long) now.tv_usec/1000,
780 sp->req.hname);
781 }
782 }
783 if (!found && (nwithtries[1] <= overlap) &&
784 (pollfds[1].revents & POLLOUT)) {
785 /* Look for a fresh record to send */
786 for (i=start_state; i<=end_state; i++) {
787 sp = &states[i];
788 if (sp->status == STATE_LOOKUP && sp->ntries == 0) {
789 found = TRUE;
790 nwithtries[sp->ntries]--;
791 sp->ntries++;
792 nwithtries[sp->ntries]++;
793 break;
794 }
795 }
796 }
797 /* If we found a record to send or resend, send it,
798 and mark its timeout. */
799 if (found) {
800 if (opt_verbose)
801 fprintf(stderr, "now %lld %ld: Looking up %s\n",
802 (long long) now.tv_sec, (long) now.tv_usec/1000,
803 sp->req.hname);
804 rslave_writeRequest(rs, &sp->req);
805 sp->deadline = now;
806 sp->deadline.tv_usec += dnstimeout_usec;
807 sp->deadline.tv_sec += sp->deadline.tv_usec / 1000000;
808 sp->deadline.tv_usec = sp->deadline.tv_usec % 1000000;
809 }
810
811 /***** Check poll results for DNS results *****/
812 if (pollfds[0].revents & POLLIN) {
813 /* A reply is ready, huzzah! */
814 rslave_result_t result;
815 if (rslave_readResult(rs, &result)) {
816 printf("bug: can't read from pipe\n");
817 } else {
818 /* Find the matching state_t, save the result,
819 and mark it as done */
820 /* printf("result.id %d\n", result.id); fflush(stdout); */
821 assert(result.id >= start_state && result.id <= end_state);
822 sp = &states[result.id];
823 if (sp->status == STATE_LOOKUP) {
824 nwithtries[sp->ntries]--;
825 sp->res = result;
826 (*ngotaddr)++;
827 if (matchbits > 0) {
828 if (*ngotaddr == 1) {
829 memcpy(firstipaddr, result.addr, 4);
830 } else {
831 /* break if new server on a 'different network'
832 than first server */
833 if (bitcompare(firstipaddr, result.addr, matchbits))
834 result.err = -1;
835 }
836 }
837
838 if (result.err) {
839 if (opt_verbose)
840 fprintf(stderr, "now %lld %ld: %s not found\n",
841 (long long) now.tv_sec, (long) now.tv_usec/1000,
842 sp->req.hname);
843 sp->status = STATE_DONE;
844 ndone++;
845 (*nbaddns)++;
846 if (*nbaddns > dnsgap) {
847 int highest = 0;
848 /* start no more lookups */
849 for (i=start_state; i <= end_state; i++)
850 if (states[i].ntries > 0)
851 highest = i;
852 assert(highest <= end_state);
853 if (opt_verbose && end_state != highest)
854 fprintf(stderr,
855 "Already searching up to host %d, "
856 "won't search any higher\n",
857 highest);
858 end_state = highest;
859 assert(end_state <= MAXHOSTS);
860 }
861 } else {
862 sp->status = STATE_CONNECT;
863 server_handle_event(sp);
864 }
865 }
866 }
867 }
868
869 /***** Grind state machine for each remote server *****/
870 for (i=2; i<nfds && i < MAXFDS; i++) {
871 sp = states + fd2state[pollfds[i].fd]; /* FIXME */
872 if (pollfds[i].revents)
873 server_handle_event(sp);
874 }
875 return end_state;
876 }
877
878 /* Get the name based on the sformat. If the first element in sformat is a
879 * format, ignore the rest, and use the format to generate the series of names;
880 * otherwise, copy the name from sformat. Attach domain_name if needed.
881 */
get_thename(const char ** sformat,const char * domain_name,int i,char * thename)882 void get_thename(const char**sformat, const char *domain_name, int i,
883 char *thename)
884 {
885 if (strstr(sformat[0], "%d") != NULL)
886 sprintf(thename, sformat[0], i);
887 else
888 strcpy(thename, sformat[i-1]);
889 if (opt_domain) {
890 strcat(thename, ".");
891 strcat(thename, domain_name);
892 }
893 }
894
895
896 /* Detect all listening distcc servers and print their names to stdout.
897 * Looks for servers numbered 1 through infinity, stops at
898 * first server that doesn't resolve in DNS, or after 'timeout' seconds,
899 * whichever comes first.
900 * On entry:
901 * sformat: format of names of distcc servers to check
902 * bigtimeout: how many seconds to terminate slow run after
903 * dnstimeout: how many milliseconds before retrying a particular
904 * gethostbyname call
905 * matchbits: top matchbits of address must match first host found,
906 else stop searching
907 * overlap: how many extra DNS queries to keep in flight normally
908 * dnsgap: how many missing DNS entries to tolerate
909 * On exit:
910 * returns number of servers found.
911 */
detect_distcc_servers(const char ** argv,int argc,int opti,int bigtimeout,int dnstimeout,int matchbits,int overlap,int dnsgap)912 int detect_distcc_servers(const char **argv, int argc, int opti,
913 int bigtimeout, int dnstimeout,
914 int matchbits, int overlap, int dnsgap)
915 {
916 unsigned char firstipaddr[4];
917 int dnstimeout_usec = dnstimeout * 1000; /* how long before
918 resending gethostbyname */
919 int i;
920 int n = MAXHOSTS;
921 int maxfds = MAXHOSTS + 10;
922 char thename[256];
923
924 struct state_s states[MAXHOSTS+1];
925 int start_state, end_state;
926 int ngotaddr;
927 int nbaddns;
928 int nwithtries[MAXTRIES+1];
929
930 struct rslave_s rs;
931
932 const char *default_format = DEFAULT_FORMAT;
933 const char **sformat = &default_format;
934 const char *domain_name;
935 if (opt_domain) {
936 if (dcc_get_dns_domain(&domain_name)) {
937 fprintf(stderr, "Can't get domain name\n");
938 exit(1);
939 }
940 }
941 if (opti < argc) {
942 if (strstr(argv[opti], "%d") != NULL) {
943 sformat = &argv[opti++];
944 } else {
945 /* A list of host names can be given in the command line */
946 n = argc-opti;
947 sformat = &argv[opti++];
948 }
949 }
950
951 /* Figure out the limit on the number of fd's we can open, as per
952 * the OS. We allow 8 fds for uses other than this in the program
953 * (eg stdin, stdout). If possible, ask the OS for more fds.
954 * We'll ideally use n + 2 fds in our poll loop, so ask for n + 10
955 * fds total.
956 */
957 struct rlimit rlim = {0, 0};
958 getrlimit(RLIMIT_NOFILE, &rlim);
959 if (rlim.rlim_cur < (rlim_t)n + 10) {
960 rlim.rlim_cur = (rlim_t)n + 10;
961 if (rlim.rlim_cur > rlim.rlim_max)
962 rlim.rlim_cur = rlim.rlim_max;
963 setrlimit(RLIMIT_NOFILE, &rlim);
964 getrlimit(RLIMIT_NOFILE, &rlim);
965 if (rlim.rlim_cur > 14)
966 maxfds = (int)(rlim.rlim_cur - 10);
967 }
968
969 /* Don't run longer than bigtimeout seconds */
970 signal(SIGALRM, timeout_handler);
971 alarm((unsigned) bigtimeout);
972
973 if (rslave_init(&rs))
974 return 0;
975
976 ngotaddr = 0;
977 memset(nwithtries, 0, sizeof(nwithtries));
978 memset(states, 0, sizeof(states));
979
980 /* all hosts start off in state 'sent 0' */
981 for (i=1; i<=n; i++) {
982 rslave_request_t *req = &states[i].req;
983 get_thename(sformat, domain_name, i, thename);
984 rslave_request_init(req, thename, i);
985 states[i].status = STATE_LOOKUP;
986 states[i].ntries = 0;
987 nwithtries[0]++;
988 }
989
990 ndone = 0;
991 nok = 0;
992 nbaddns = 0;
993 /* Loop until we're done finding distcc servers. We have to do
994 * this loop in groups, with each group using no more than maxfds
995 * fd's. One call to one_poll_loop uses n + 2 fds.
996 */
997 for (start_state = 1; start_state <= n; start_state = end_state + 1) {
998 int orig_end_state;
999 end_state = start_state + maxfds-2;
1000 if (end_state > n)
1001 end_state = n;
1002 orig_end_state = end_state;
1003 while (ndone < end_state) {
1004 end_state = one_poll_loop(&rs, states, start_state, end_state,
1005 nwithtries, &ngotaddr, &nbaddns,
1006 firstipaddr, dnstimeout_usec,
1007 matchbits, overlap, dnsgap);
1008 }
1009 if (end_state < orig_end_state) {
1010 /* If we lowered end_state, it means we decided to stop
1011 * searching early.
1012 */
1013 break;
1014 }
1015 }
1016 return nok;
1017 }
1018
main(int argc,char ** argv)1019 int main(int argc, char **argv)
1020 {
1021 int opti;
1022 int nfound;
1023
1024 for (opti = 1; opti < argc && argv[opti][0] == '-'; opti++) {
1025 switch (argv[opti][1]) {
1026 case 'm':
1027 opt_match = atoi(argv[opti]+2);
1028 if (opt_match > 31 || opt_match < 0)
1029 usage();
1030 break;
1031 case 't':
1032 opt_bigtimeout_sec = atoi(argv[opti]+2);
1033 if (opt_bigtimeout_sec < 0)
1034 usage();
1035 break;
1036 case 'h':
1037 opt_dnstimeout_ms = atoi(argv[opti]+2);
1038 if (opt_dnstimeout_ms < 0)
1039 usage();
1040 break;
1041 case 'c':
1042 opt_conntimeout_ms = atoi(argv[opti]+2);
1043 if (opt_conntimeout_ms < 0)
1044 usage();
1045 break;
1046 case 'k':
1047 opt_comptimeout_ms = atoi(argv[opti]+2);
1048 if (opt_comptimeout_ms < 0)
1049 usage();
1050 break;
1051 case 'o':
1052 opt_overlap = atoi(argv[opti]+2);
1053 if (opt_overlap < 0)
1054 usage();
1055 break;
1056 case 'g':
1057 opt_dnsgap = atoi(argv[opti]+2);
1058 if (opt_dnsgap < 0)
1059 usage();
1060 break;
1061 case 'P':
1062 opt_protocol = atoi(argv[opti]+2);
1063 if (opt_protocol <= 0 || opt_protocol > 3) {
1064 usage();
1065 }
1066 break;
1067 case 'p':
1068 opt_compiler = argv[opti]+2;
1069 if (! *opt_compiler)
1070 usage();
1071 break;
1072 case 'r':
1073 opt_port = atoi(argv[opti]+2);
1074 if (opt_port <= 0)
1075 usage();
1076 break;
1077 case 'l':
1078 opt_latency = 1;
1079 break;
1080 case 'n':
1081 opt_numeric = 1;
1082 break;
1083 case 'x':
1084 opt_bang_down = 1;
1085 break;
1086 case 'v':
1087 opt_verbose++;
1088 break;
1089 case 'd':
1090 opt_domain++;
1091 break;
1092 default:
1093 usage();
1094 }
1095 }
1096
1097 if (opt_compiler)
1098 generate_query();
1099
1100 nfound = detect_distcc_servers((const char **)argv, argc, opti,
1101 opt_bigtimeout_sec,
1102 opt_dnstimeout_ms,
1103 opt_match,
1104 opt_overlap,
1105 opt_dnsgap);
1106
1107 /* return failure if no servers found */
1108 return (nfound > 0) ? 0 : 1;
1109 }
1110