1 /* -*- c-file-style: "java"; indent-tabs-mode: nil; tab-width: 4; fill-column: 78 -*-
2  *
3  * lsdistcc -- A simple distcc server discovery program
4  * Assumes all distcc servers are in DNS and are named distcc1...distccN.
5  *
6  * Copyright 2005 Google Inc.
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License
10  * as published by the Free Software Foundation; either version 2
11  * of the License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
21  * USA.
22  */
23 /* Program to autodetect listening distcc servers by looking in DNS
24  * for hosts named according to a given format.
25  * hosts are considered good servers based solely on whether their
26  * name fits the format and whether they are listening on the right port
27  * (and optionally whether they respond when you send them a compile job).
28  * Stops looking for servers after the first one it doesn't find in DNS.
29  * Prints results to stdout.
30  * Terminates with error status if no servers found.
31  *
32  * Examples:
33  *
34  * In your build script, add the lines
35  *   DISTCC_HOSTS=`lsdistcc`
36  *   export DISTCC_HOSTS
37  * before the line that invokes make.
38  *
39  * Or, in your Makefile, add the lines
40  *   export DISTCC_HOSTS = $(shell lsdistcc)
41  *
42  * Changelog:
43  *
44  * Wed Jun 20 2007 - Manos Renieris, Google
45  * Added -P option.
46  *
47  * Mon Jun  4 2007 - Manos Renieris, Google
48  * Reformatted in 80 columns.
49  *
50  * Tue Jan 31 2006 - Dan Kegel, Google
51  * Added -x option to list down hosts with ,down suffix (since
52  * in sharded server cache mode, the hash space is partitioned
53  * over all servers regardless of whether they're up or down at the moment)
54  *
55  * Thu Jan 5 2006 - Dan Kegel, Google
56  * Actually read the output from the server and partially parse it.
57  *
58  * Sat Nov 26 2005 - Dan Kegel, Google
59  * Added -l option, improved -v output
60  *
61  * Tue Nov 22 2005 - Dan Kegel & Dongmin Zhang, Google
62  *  added -pcc option to check that server actually responds when you send
63  *      it a job
64  *  added -c0 option to disable connect check
65  *
66  * Thu Oct 13 2005 - Dan Kegel, Google
67  *  use rslave to do asynchronous-ish hostname lookup, do all connects
68  *      in parallel
69  *
70  * Wed Oct  5 2005 - Dan Kegel, Google
71  *  Added -d, -m options
72  *
73  * Fri Sep 16 2005 - Dan Kegel, Google
74  *  Created
75  *  Added -v option
76 --------------------------------------------------------------------------*/
77 
78 #include <config.h>
79 
80 #include <netdb.h>
81 #include <assert.h>
82 #include <stdio.h>
83 #include <string.h>
84 #include <unistd.h>
85 #include <stdlib.h>
86 #include <signal.h>
87 #include <errno.h>
88 #include <fcntl.h>
89 #include <sys/types.h>
90 #include <sys/socket.h>
91 #include <poll.h>
92 #include <sys/resource.h>
93 #include <sys/time.h>
94 #include <netinet/in.h>
95 
96 #include "distcc.h"
97 #include "clinet.h"
98 #include "netutil.h"
99 #include "util.h"
100 #include "trace.h"
101 #include "rslave.h"
102 #include "../lzo/minilzo.h"
103 
104 /* Linux calls this setrlimit() argument NOFILE; bsd calls it OFILE */
105 #ifndef RLIMIT_NOFILE
106 #define RLIMIT_NOFILE  RLIMIT_OFILE
107 #endif
108 
109 enum status_e { STATE_LOOKUP = 0,
110                 STATE_CONNECT,
111                 STATE_CONNECTING,
112                 STATE_READ_DONEPKT,
113                 STATE_READ_STATPKT,
114                 STATE_READ_REST,
115                 STATE_CLOSE,
116                 STATE_DONE};
117 
118 struct state_s {
119     rslave_request_t req;
120     rslave_result_t res;
121     struct timeval start;
122     struct timeval deadline;
123     char curhdrbuf[12];
124     int curhdrlen;
125     enum status_e status;
126     int ntries;
127     int fd;
128     int up;     /* default is 0, set to 1 on success */
129 };
130 typedef struct state_s state_t;
131 
132 /* Default parameters */
133 #define DEFAULT_FORMAT "distcc%d"   /* hostname format */
134 #define DEFAULT_PORT 3632           /* TCP port to connect to */
135 #define DEFAULT_PROTOCOL 1          /* protocol we'll try to speak */
136 #define DEFAULT_BIGTIMEOUT 7        /* max total runtime, seconds */
137 #define DEFAULT_DNSTIMEOUT_MS 500   /* individual DNS timeout, msec */
138 #define DEFAULT_CONNTIMEOUT_MS 900  /* individual connect timeout, msec */
139 #define DEFAULT_COMPTIMEOUT_MS 1500 /* individual compile timeout, msec
140                                        (FIXME: should be lower) */
141 #define DEFAULT_OVERLAP 1           /* number of simultaneous DNS queries -1 */
142 #define DEFAULT_DNSGAP 0            /* number of missing hosts in DNS before
143                                        we stop looking */
144 #define DEFAULT_COMPILER "none"
145 
146 char canned_query[1000];
147 size_t canned_query_len = 0;
148 
149 int opt_latency = 0;
150 int opt_numeric = 0;
151 int opt_overlap = DEFAULT_OVERLAP;
152 int opt_dnsgap = DEFAULT_DNSGAP;
153 int opt_port = DEFAULT_PORT;
154 int opt_protocol = DEFAULT_PROTOCOL;
155 int opt_bigtimeout_sec = DEFAULT_BIGTIMEOUT;
156 int opt_conntimeout_ms = DEFAULT_CONNTIMEOUT_MS;
157 int opt_comptimeout_ms = DEFAULT_COMPTIMEOUT_MS;
158 int opt_dnstimeout_ms = DEFAULT_DNSTIMEOUT_MS;
159 int opt_verbose = 0;
160 int opt_domain = 0;
161 int opt_match = 0;
162 int opt_bang_down = 0;
163 const char *opt_compiler = NULL;
164 
165 
166 const char *protocol_suffix[] = { NULL, /* to make the rest 1-based */
167                                   "",
168                                   ",lzo",
169                                   ",lzo,cpp" };
170 
171 #define MAXHOSTS 500
172 #define MAXTRIES 5       /* this constant can't be changed without
173                               changing some code */
174 #define MAXFDS (MAXHOSTS+2)
175 
176 /* just plain globals */
177 int fd2state[MAXHOSTS+1000];    /* kludge - fragile */
178 int nok;
179 int ndone;
180 
181 /* globals used by other compilation units */
182 const char *rs_program_name = "lsdistcc";
183 
184 /* Forward declarations (solely to prevent compiler warnings) */
185 void usage(void);
186 int bitcompare(const unsigned char *a, const unsigned char *b, int nbits);
187 void timeout_handler(int x);
188 void get_thename(const char**sformat, const char *domain_name,
189                  int i, char *thename);
190 int detect_distcc_servers(const char **argv, int argc, int opti,
191                           int bigtimeout, int dnstimeout, int matchbits,
192                           int overlap, int dnsgap);
193 void server_read_packet_header(state_t *sp);
194 void server_handle_event(state_t *sp);
195 
usage(void)196 void usage(void) {
197         printf("Usage: lsdistcc [-tTIMEOUT] [-mBITS] [-nvd] [format]\n\
198 Uses 'for i=1... sprintf(format, i)' to construct names of servers,\n\
199 stops after %d seconds or at second server that doesn't resolve,\n\
200 prints the names of all such servers listening on distcc's port.\n\
201 Default format is %s. \n\
202 If a list of host names are given in the command line,\n\
203 lsdistcc will only check those hosts. \n\
204 Options:\n\
205 -l       Output latency in milliseconds after each hostname\n\
206            (not including DNS latency)\n\
207 -n       Print IP address rather than name\n\
208 -x       Append ,down to down hosts in host list\n\
209 -tTIMEOUT  Set number of seconds to stop searching after [%d]\n\
210 -hHTIMEOUT Set number of milliseconds before retrying gethostbyname [%d]\n\
211 -cCTIMEOUT Set number of milliseconds before giving up on connect [%d]\n\
212            (0 to inhibit connect)\n\
213 -kKTIMEOUT Set number of milliseconds before giving up on compile [%d]\n\
214            (0 to inhibit compile)\n\
215 -mBITS     Set number of bits of address that must match first host found [0]\n\
216 -oOVERLAP  Set number of extra DNS requests to send [%d]\n\
217 -gDNSGAP   Set number of missing DNS entries to tolerate [%d]\n\
218 -rPORT     Port to connect to [%d]\n\
219 -PPROTOCOL Protocol version to use (1-3) [%d]\n\
220 -pCOMPILER Name of compiler to use [%s]\n\
221 -d       Append DNS domain name to format\n\
222 -v       Verbose\n\
223 \n\
224 Example:\n\
225 lsdistcc -l -p$COMPILER\n\
226 lsdistcc -p$COMPILER hosta somehost hostx hosty\n\
227 ", DEFAULT_BIGTIMEOUT,
228    DEFAULT_FORMAT,
229    DEFAULT_BIGTIMEOUT,
230    DEFAULT_DNSTIMEOUT_MS,
231    DEFAULT_CONNTIMEOUT_MS,
232    DEFAULT_COMPTIMEOUT_MS,
233    DEFAULT_OVERLAP,
234    DEFAULT_DNSGAP,
235    DEFAULT_PORT,
236    DEFAULT_PROTOCOL,
237    DEFAULT_COMPILER);
238         exit(1);
239 }
240 
241 
242 /* Compare first nbits of a[] and b[]
243  * If nbits is 1, only compares the MSB of a[0] and b[0]
244  * Return 0 on equal, nonzero on nonequal
245  */
bitcompare(const unsigned char * a,const unsigned char * b,int nbits)246 int bitcompare(const unsigned char *a, const unsigned char *b, int nbits)
247 {
248     int fullbytes = nbits/8;
249     int leftoverbits = nbits & 7;
250 
251     if (fullbytes) {
252         int d = memcmp((char *)a, (char *)b, (size_t) fullbytes);
253         if (d)
254                 return d;
255     }
256 
257     if (leftoverbits) {
258         int mask = 0;
259         int i;
260         for (i=0; i<leftoverbits; i++)
261             mask |= (1 << (7-i));
262         /* printf("mask %x, a[%d] %x, b[%d] %x\n", mask,
263                   fullbytes, a[fullbytes], fullbytes, b[fullbytes]); */
264         return ((a[fullbytes] ^ b[fullbytes]) & mask);
265     }
266     return 0;
267 }
268 
269 #if 0
270 #include <assert.h>
271 main()
272 {
273     assert(bitcompare("0", "0", 8) == 0);
274     assert(bitcompare("0", "1", 8) != 0);
275     assert(bitcompare("0", "1", 7) == 0);
276 }
277 #endif
278 
279 
280 /* On timeout, silently terminate program  */
timeout_handler(int x)281 void timeout_handler(int x)
282 {
283     (void) x;
284 
285     if (opt_verbose > 0)
286         fprintf(stderr, "Timeout!\n");
287 
288     /* FIXME: is it legal to call exit here? */
289     exit(0);
290 }
291 
generate_query(void)292 static void generate_query(void)
293 {
294     const char* program = "int foo(){return 0;}";
295     unsigned char lzod_program[1000];
296     unsigned char lzo_work_mem[LZO1X_1_MEM_COMPRESS];
297     lzo_uint lzod_program_len;
298 
299     lzo1x_1_compress((const unsigned char *)program, strlen(program),
300                      lzod_program, &lzod_program_len,
301                      lzo_work_mem);
302 
303     switch (opt_protocol) {
304     case 1: {
305         static const char canned_query_fmt_protocol_1[]=
306                                       "DIST00000001"
307                                       "ARGC00000005"
308                                       "ARGV%08x%s"
309                                       "ARGV00000002-c"
310                                       "ARGV00000007hello.c"
311                                       "ARGV00000002-o"
312                                       "ARGV00000007hello.o"
313                                       "DOTI%08x%s";
314         sprintf(canned_query,
315                 canned_query_fmt_protocol_1,
316                  (unsigned)strlen(opt_compiler), opt_compiler,
317                  (unsigned)strlen(program), program);
318         canned_query_len = strlen(canned_query);
319         break;
320     }
321 
322      case 2: {
323         static const char canned_query_fmt_protocol_2[]=
324                                       "DIST00000002"
325                                       "ARGC00000005"
326                                       "ARGV%08x%s"
327                                       "ARGV00000002-c"
328                                       "ARGV00000007hello.c"
329                                       "ARGV00000002-o"
330                                       "ARGV00000007hello.o"
331                                       "DOTI%08x";
332         sprintf(canned_query,
333                 canned_query_fmt_protocol_2,
334                  (unsigned)strlen(opt_compiler),
335                  opt_compiler,
336                  (unsigned)lzod_program_len);
337 
338         canned_query_len = strlen(canned_query) + lzod_program_len;
339         memcpy(canned_query + strlen(canned_query),
340                lzod_program, lzod_program_len);
341 
342         break;
343       }
344 
345      case 3: {
346         static const char canned_query_fmt_protocol_3[]=
347                                       "DIST00000003"
348                                       "CDIR00000001/"
349                                       "ARGC00000005"
350                                       "ARGV%08x%s"
351                                       "ARGV00000002-c"
352                                       "ARGV00000007hello.c"
353                                       "ARGV00000002-o"
354                                       "ARGV00000007hello.o"
355                                       "NFIL00000001"
356                                       "NAME00000008/hello.c"
357                                       "FILE%08x";
358 
359         sprintf(canned_query,
360                 canned_query_fmt_protocol_3,
361                  (unsigned)strlen(opt_compiler),
362                  opt_compiler,
363                  (unsigned)lzod_program_len);
364 
365         canned_query_len = strlen(canned_query) + lzod_program_len;
366         memcpy(canned_query + strlen(canned_query),
367                lzod_program, lzod_program_len);
368         break;
369       }
370     }
371 }
372 
373 /* Try reading a protocol packet header */
server_read_packet_header(state_t * sp)374 void server_read_packet_header(state_t *sp)
375 {
376     int arg;
377     int nread;
378 
379     nread = read(sp->fd, sp->curhdrbuf + sp->curhdrlen,
380                  (size_t)(12 - sp->curhdrlen));
381     if (nread == 0) {
382         /* A nonblocking read returning zero bytes means EOF.
383          * FIXME: it may mean this only on the first read after poll said
384          * bytes were ready, so beware of false EOFs here?
385          */
386         if (opt_verbose > 0)
387             fprintf(stderr, "lsdistcc: premature EOF while waiting for "
388                             "result from server %s\n",
389                     sp->req.hname);
390         sp->status = STATE_CLOSE;
391         return;
392     }
393 
394     if (nread > 0)
395         sp->curhdrlen += nread;
396 
397     if (sp->curhdrlen < 12)
398         return;
399 
400     arg = (int)strtol(sp->curhdrbuf+4, NULL, 16);
401 
402     if (opt_verbose > 2) {
403         int i;
404         printf("Got hdr '%12.12s' = ", sp->curhdrbuf);
405         for (i=0; i < sp->curhdrlen; i++)
406                 printf("%2x", sp->curhdrbuf[i]);
407         printf("\n");
408     }
409 
410     /* Parse and validate the packet header, move on to next state */
411     switch (sp->status) {
412     case STATE_READ_DONEPKT:
413         if (memcmp(sp->curhdrbuf, "DONE", 4) != 0) {
414             if (opt_verbose > 1)
415                 fprintf(stderr,
416                         "%s wrong protocol; expected DONE, got %4.4s!\n",
417                         sp->req.hname, sp->curhdrbuf);
418             sp->status = STATE_CLOSE;
419             break;
420         }
421         if (arg != opt_protocol) {
422             if (opt_verbose > 1)
423                 fprintf(stderr,
424                         "%s wrong protocol, expected %d got %d!\n",
425                         sp->req.hname,
426                         opt_protocol,
427                         arg);
428             sp->status = STATE_CLOSE;
429             break;
430         }
431         /* No body to this type.  Read next packet. */
432         sp->curhdrlen = 0;
433         sp->status = STATE_READ_STATPKT;
434         break;
435 
436     case STATE_READ_STATPKT:
437         if (memcmp(sp->curhdrbuf, "STAT", 4) != 0) {
438             if (opt_verbose > 1)
439                 fprintf(stderr,
440                         "%s wrong protocol!  Expected STAT, got %4.4s\n",
441                         sp->req.hname, sp->curhdrbuf);
442             sp->status = STATE_CLOSE;
443             break;
444         }
445         if (arg != 0) {
446             if (opt_verbose > 1) {
447             /* FIXME: only conditional because my server uses load shedding */
448                 fprintf(stderr,
449                         "lsdistcc: warning: test compile on %s failed! "
450                         "status 0x%x\n",
451                         sp->req.hname, arg);
452             }
453             sp->status = STATE_CLOSE;
454             break;
455         }
456         /* No body to this type.  Read next packet. */
457         sp->curhdrlen = 0;
458         sp->status = STATE_READ_REST;
459         break;
460 
461     default:
462         fprintf(stderr, "bug\n");
463         exit(1);
464     }
465 }
466 
467 /* Grind state machine for a single server */
468 /* Take one transition through the state machine, unless that takes you
469    to STATE_CLOSE, in which case go through that state too, into STATE_DONE
470  */
471 
server_handle_event(state_t * sp)472 void server_handle_event(state_t *sp)
473 {
474     struct timeval now;
475     gettimeofday(&now, 0);
476 
477     do {
478         struct sockaddr_in sa;
479 
480         if (opt_verbose > 2)
481             fprintf(stderr,
482                     "now %lld %ld: server_handle_event: %s: state %d\n",
483                     (long long) now.tv_sec, (long) now.tv_usec/1000,
484                     sp->req.hname, sp->status);
485 
486         switch (sp->status) {
487         case STATE_CONNECT:
488             if (opt_conntimeout_ms == 0) {
489                 sp->fd = -1;
490                 sp->up = 1;
491                 sp->status = STATE_CLOSE;
492                 break;
493             }
494 
495             /* Now do a nonblocking connect to that address */
496             memset(&sa, 0, sizeof sa);
497             sa.sin_family = AF_INET;
498             sa.sin_port = htons(opt_port);
499             memcpy(&sa.sin_addr, sp->res.addr, 4);
500 
501             if ((sp->fd = socket(sa.sin_family, SOCK_STREAM, 0)) == -1) {
502                 fprintf(stderr, "failed to create socket: %s", strerror(errno));
503                 sp->status = STATE_DONE;
504             } else {
505                 dcc_set_nonblocking(sp->fd);
506                 /* start the nonblocking connect... */
507                 if (opt_verbose > 0)
508                     fprintf(stderr,
509                             "now %lld %ld: Connecting to %s\n",
510                             (long long) now.tv_sec, (long) now.tv_usec/1000,
511                             sp->req.hname);
512                 if (connect(sp->fd, (struct sockaddr *)&sa, sizeof(sa))
513                     && errno != EINPROGRESS) {
514                     if (opt_verbose > 0)
515                         fprintf(stderr, "failed to connect socket: %s",
516                         strerror(errno));
517                     sp->status = STATE_CLOSE;
518                 } else {
519                     sp->status = STATE_CONNECTING;
520                     fd2state[sp->fd] = sp->res.id;
521                     gettimeofday(&now, 0);
522                     sp->start = now;
523                     sp->deadline = now;
524                     sp->deadline.tv_usec += 1000 * opt_conntimeout_ms;
525                     sp->deadline.tv_sec += sp->deadline.tv_usec / 1000000;
526                     sp->deadline.tv_usec = sp->deadline.tv_usec % 1000000;
527                 }
528             }
529             break;
530         case STATE_CONNECTING:
531             {
532                 int connecterr;
533                 socklen_t len = sizeof(connecterr);
534                 int nsend;
535                 int nsent;
536 
537                 if (getsockopt(sp->fd, SOL_SOCKET, SO_ERROR,
538                                (char *)&connecterr, &len) < 0) {
539                     fprintf(stderr, "getsockopt SO_ERROR failed?!");
540                     sp->status = STATE_CLOSE;
541                     break;
542                 }
543                 if (connecterr) {
544                     if (opt_verbose > 0)
545                        fprintf(stderr,
546                                "now %lld %ld: Connecting to %s failed "
547                                "with errno %d = %s\n",
548                          (long long) now.tv_sec, (long) now.tv_usec/1000,
549                          sp->req.hname, connecterr, strerror(connecterr));
550                     sp->status = STATE_CLOSE;   /* not listening */
551                     break;
552                 }
553                 if (opt_comptimeout_ms == 0 || !opt_compiler) {
554                     /* connect succeeded, don't need to compile */
555                     sp->up = 1;
556                     sp->status = STATE_CLOSE;
557                     break;
558                 }
559                 if (opt_verbose > 0)
560                     fprintf(stderr,
561                             "now %lld %ld: %s: sending compile request\n",
562                             (long long) now.tv_sec, (long) now.tv_usec/1000,
563                             sp->req.hname);
564                 nsend = canned_query_len;
565                 nsent = write(sp->fd, canned_query, nsend);
566                 if (nsent != nsend) {
567                     if (opt_verbose > 1) {
568                         if (nsent == -1)
569                             fprintf(stderr,
570                                     "now %lld %ld: Sending to %s failed, "
571                                     "errno %d\n",
572                                     (long long) now.tv_sec, (long) now.tv_usec/1000,
573                                     sp->req.hname, connecterr);
574                         else
575                             fprintf(stderr,
576                                     "now %lld %ld: Sending to %s failed, "
577                                     "nsent %d != nsend %d\n",
578                                     (long long) now.tv_sec, (long) now.tv_usec/1000,
579                                     sp->req.hname, nsent, nsend);
580                     }
581                     /* ??? remote disconnect?  Buffer too small? */
582                     sp->status = STATE_CLOSE;
583                     break;
584                 }
585                 sp->status=STATE_READ_DONEPKT;
586                 sp->curhdrlen = 0;
587                 sp->deadline = now;
588                 sp->deadline.tv_usec += 1000 * opt_comptimeout_ms;
589                 sp->deadline.tv_sec += sp->deadline.tv_usec / 1000000;
590                 sp->deadline.tv_usec = sp->deadline.tv_usec % 1000000;
591             }
592             break;
593 
594         case STATE_READ_DONEPKT:
595         case STATE_READ_STATPKT:
596             server_read_packet_header(sp);
597             break;
598 
599         case STATE_READ_REST:
600           {
601             char buf[1000];
602             int nread;
603             nread = read(sp->fd, buf, sizeof(buf));
604             if (nread == 0) {
605                 /* A nonblocking read returning zero bytes means EOF.
606                  * FIXME: it may mean this only on the first read after
607                  * poll said bytes were ready, so beware of false EOFs here?
608                  */
609                 sp->up = 1;
610                 sp->status = STATE_CLOSE;
611             }
612           }
613           break;
614 
615         case STATE_CLOSE:
616             if (sp->fd != -1) {
617                 close(sp->fd);
618                 sp->fd = -1;
619             }
620 
621             if (opt_bang_down || sp->up) {
622                 if (opt_numeric)
623                     printf("%d.%d.%d.%d", sp->res.addr[0], sp->res.addr[1],
624                            sp->res.addr[2], sp->res.addr[3]);
625                 else
626                     printf("%s", sp->req.hname);
627 
628                 if (opt_port != DEFAULT_PORT)
629                     printf(":%d", opt_port);
630 
631                 printf("%s", protocol_suffix[opt_protocol]);
632 
633                 if (opt_bang_down && !sp->up)
634                     printf(",down");
635 
636                 if (opt_latency) {
637                     int latency_ms;
638                     gettimeofday(&now, 0);
639                     latency_ms = (now.tv_usec - sp->start.tv_usec) /
640                                  1000 + 1000 * (now.tv_sec - sp->start.tv_sec);
641                     printf(" %d", latency_ms);
642                 }
643                 putchar('\n');
644                 if (opt_verbose)
645                     fflush(stdout);
646             }
647             nok++;
648             sp->status = STATE_DONE;
649             ndone++;
650             break;
651 
652         case STATE_DONE:
653             ;
654         default:
655             ;
656         }
657     } while (sp->status == STATE_CLOSE);
658 }
659 
660 /* A helper function for detecting all listening distcc servers: this
661  * routine makes one pass through the poll() loop and analyzes what it
662  * sees.
663  */
one_poll_loop(struct rslave_s * rs,struct state_s states[],int start_state,int end_state,int nwithtries[],int * ngotaddr,int * nbaddns,unsigned char firstipaddr[4],int dnstimeout_usec,int matchbits,int overlap,int dnsgap)664 static int one_poll_loop(struct rslave_s* rs, struct state_s states[],
665                          int start_state, int end_state,
666                          int nwithtries[], int* ngotaddr, int* nbaddns,
667                          unsigned char firstipaddr[4], int dnstimeout_usec,
668                          int matchbits, int overlap, int dnsgap)
669 {
670     int i;
671     int nfds;
672     struct state_s *sp;
673     int nready;
674     int found;
675     struct timeval now;
676     struct pollfd pollfds[MAXFDS];
677 
678     /* See which sockets have any events */
679     nfds = 0;
680     memset(pollfds, 0, sizeof(pollfds));
681     pollfds[nfds].fd = rslave_getfd_fromSlaves(rs);
682     pollfds[nfds++].events = POLLIN;
683     pollfds[nfds].fd = rslave_getfd_toSlaves(rs);
684     /* Decide if we want to be notified if slaves are ready to handle
685      * a DNS request.
686      * To avoid sending too many DNS requests, we avoid sending more if
687      * the number of first tries is greater than 'overlap'
688      * or the number of outstanding DNS requests plus the number of
689      * already satisfied ones would be greater than or equal to the max
690      * number of hosts we're looking for.
691      */
692     pollfds[nfds++].events = ((nwithtries[1] <= overlap) &&
693                               (nwithtries[1]+
694                                nwithtries[2]+
695                                nwithtries[3]+
696                                nwithtries[4]+
697                                *ngotaddr < end_state)) ? POLLOUT : 0;
698     /* Set interest bits.
699      * When connecting, we want to know if we can write (aka if the
700      * connect has finished); when waiting for a compile to finish,
701      * we want to know if we can read.
702      */
703     for (i=start_state; i<=end_state; i++) {
704         switch (states[i].status) {
705         case STATE_CONNECTING:
706             pollfds[nfds].fd = states[i].fd;
707             pollfds[nfds++].events = POLLOUT;
708             break;
709         case STATE_READ_DONEPKT:
710         case STATE_READ_STATPKT:
711         case STATE_READ_REST:
712             pollfds[nfds].fd = states[i].fd;
713             pollfds[nfds++].events = POLLIN;
714             break;
715         default: ;
716         }
717     }
718     /* When polling, wait for no more than 50 milliseconds.
719      * Anything lower doesn't help performance much.
720      * Anything higher would inflate all our timeouts,
721      * cause retries not to be sent as soon as they should,
722      * and make the program take longer than it should.
723      */
724     nready = poll(pollfds, (unsigned)nfds, 50);
725     if (nready == -1) {
726 	fprintf(stderr, "lsdistcc: poll failed: %s\n", strerror(errno));
727 	exit(1);
728     }
729     gettimeofday(&now, 0);
730 
731 
732     /***** Check for timeout events *****/
733     sp = NULL;
734     found = FALSE;
735     for (i=start_state; i<=end_state; i++) {
736         sp = &states[i];
737         if (sp->status == STATE_LOOKUP
738             && sp->ntries > 0 && sp->ntries < MAXTRIES
739             && (sp->deadline.tv_sec < now.tv_sec ||
740                 (sp->deadline.tv_sec == now.tv_sec &&
741                  sp->deadline.tv_usec < now.tv_usec))) {
742             found = TRUE;
743             nwithtries[sp->ntries]--;
744             sp->ntries++;
745             nwithtries[sp->ntries]++;
746             if (opt_verbose > 0)
747                 fprintf(stderr,
748                         "now %lld %ld: Resending %s because "
749                         "deadline was %lld %ld\n",
750                         (long long) now.tv_sec, (long) now.tv_usec/1000,
751                         sp->req.hname, (long long) sp->deadline.tv_sec,
752                         (long) sp->deadline.tv_usec/1000);
753             break;
754         }
755 
756         if (sp->status == STATE_CONNECTING
757             && (sp->deadline.tv_sec < now.tv_sec ||
758                 (sp->deadline.tv_sec == now.tv_sec &&
759                  sp->deadline.tv_usec < now.tv_usec))) {
760             sp->status = STATE_CLOSE;
761             server_handle_event(sp);
762             if (opt_verbose > 0)
763                 fprintf(stderr,
764                         "now %lld %ld: %s timed out while connecting\n",
765                         (long long) now.tv_sec, (long) now.tv_usec/1000,
766                         sp->req.hname);
767         }
768         if ((sp->status == STATE_READ_DONEPKT ||
769              sp->status == STATE_READ_STATPKT ||
770              sp->status == STATE_READ_REST)
771             && (sp->deadline.tv_sec < now.tv_sec ||
772                 (sp->deadline.tv_sec == now.tv_sec &&
773                  sp->deadline.tv_usec < now.tv_usec))) {
774             sp->status = STATE_CLOSE;
775             server_handle_event(sp);
776             if (opt_verbose > 0)
777                 fprintf(stderr,
778                         "now %lld %ld: %s timed out while compiling\n",
779                         (long long) now.tv_sec, (long) now.tv_usec/1000,
780                         sp->req.hname);
781         }
782     }
783     if (!found && (nwithtries[1] <= overlap) &&
784         (pollfds[1].revents & POLLOUT)) {
785         /* Look for a fresh record to send */
786         for (i=start_state; i<=end_state; i++) {
787             sp = &states[i];
788             if (sp->status == STATE_LOOKUP && sp->ntries == 0) {
789                 found = TRUE;
790                 nwithtries[sp->ntries]--;
791                 sp->ntries++;
792                 nwithtries[sp->ntries]++;
793                 break;
794             }
795         }
796     }
797     /* If we found a record to send or resend, send it,
798        and mark its timeout. */
799     if (found) {
800         if (opt_verbose)
801             fprintf(stderr, "now %lld %ld: Looking up %s\n",
802                     (long long) now.tv_sec, (long) now.tv_usec/1000,
803 		    sp->req.hname);
804         rslave_writeRequest(rs, &sp->req);
805         sp->deadline = now;
806         sp->deadline.tv_usec += dnstimeout_usec;
807         sp->deadline.tv_sec += sp->deadline.tv_usec / 1000000;
808         sp->deadline.tv_usec = sp->deadline.tv_usec % 1000000;
809     }
810 
811     /***** Check poll results for DNS results *****/
812     if (pollfds[0].revents & POLLIN) {
813         /* A reply is ready, huzzah! */
814         rslave_result_t result;
815         if (rslave_readResult(rs, &result)) {
816             printf("bug: can't read from pipe\n");
817         } else {
818             /* Find the matching state_t, save the result,
819                and mark it as done */
820             /* printf("result.id %d\n", result.id); fflush(stdout); */
821             assert(result.id >= start_state && result.id <= end_state);
822             sp = &states[result.id];
823             if (sp->status == STATE_LOOKUP) {
824                 nwithtries[sp->ntries]--;
825                 sp->res = result;
826                 (*ngotaddr)++;
827                 if (matchbits > 0) {
828                     if (*ngotaddr == 1) {
829                         memcpy(firstipaddr, result.addr, 4);
830                     } else {
831                         /* break if new server on a 'different network'
832                          than first server */
833                         if (bitcompare(firstipaddr, result.addr, matchbits))
834                             result.err = -1;
835                     }
836                 }
837 
838                 if (result.err) {
839                     if (opt_verbose)
840                         fprintf(stderr, "now %lld %ld: %s not found\n",
841                                 (long long) now.tv_sec, (long) now.tv_usec/1000,
842                                 sp->req.hname);
843                     sp->status = STATE_DONE;
844                     ndone++;
845                     (*nbaddns)++;
846                     if (*nbaddns > dnsgap) {
847                         int highest = 0;
848                         /* start no more lookups */
849                         for (i=start_state; i <= end_state; i++)
850                             if (states[i].ntries > 0)
851                                 highest = i;
852                         assert(highest <= end_state);
853                         if (opt_verbose && end_state != highest)
854                             fprintf(stderr,
855                                     "Already searching up to host %d, "
856                                     "won't search any higher\n",
857                                     highest);
858                         end_state = highest;
859                         assert(end_state <= MAXHOSTS);
860                     }
861                 } else {
862                     sp->status = STATE_CONNECT;
863                     server_handle_event(sp);
864                 }
865             }
866         }
867     }
868 
869     /***** Grind state machine for each remote server *****/
870     for (i=2; i<nfds && i < MAXFDS; i++) {
871         sp = states + fd2state[pollfds[i].fd];  /* FIXME */
872         if (pollfds[i].revents)
873             server_handle_event(sp);
874     }
875     return end_state;
876 }
877 
878 /* Get the name based on the sformat. If the first element in sformat is a
879  * format, ignore the rest, and use the format to generate the series of names;
880  * otherwise, copy the name from sformat. Attach domain_name if needed.
881  */
get_thename(const char ** sformat,const char * domain_name,int i,char * thename)882 void get_thename(const char**sformat, const char *domain_name, int i,
883                 char *thename)
884 {
885     if (strstr(sformat[0], "%d") != NULL)
886         sprintf(thename, sformat[0], i);
887     else
888         strcpy(thename, sformat[i-1]);
889     if (opt_domain) {
890         strcat(thename, ".");
891         strcat(thename, domain_name);
892     }
893 }
894 
895 
896 /* Detect all listening distcc servers and print their names to stdout.
897  * Looks for servers numbered 1 through infinity, stops at
898  * first server that doesn't resolve in DNS, or after 'timeout' seconds,
899  * whichever comes first.
900  * On entry:
901  *  sformat: format of names of distcc servers to check
902  *  bigtimeout: how many seconds to terminate slow run after
903  *  dnstimeout: how many milliseconds before retrying a particular
904  *            gethostbyname call
905  *  matchbits: top matchbits of address must match first host found,
906                else stop searching
907  *  overlap: how many extra DNS queries to keep in flight normally
908  *  dnsgap: how many missing DNS entries to tolerate
909  * On exit:
910  *  returns number of servers found.
911  */
detect_distcc_servers(const char ** argv,int argc,int opti,int bigtimeout,int dnstimeout,int matchbits,int overlap,int dnsgap)912 int detect_distcc_servers(const char **argv, int argc, int opti,
913                           int bigtimeout, int dnstimeout,
914                           int matchbits, int overlap, int dnsgap)
915 {
916     unsigned char firstipaddr[4];
917     int dnstimeout_usec = dnstimeout * 1000;   /* how long before
918                                                   resending gethostbyname */
919     int i;
920     int n = MAXHOSTS;
921     int maxfds = MAXHOSTS + 10;
922     char thename[256];
923 
924     struct state_s states[MAXHOSTS+1];
925     int start_state, end_state;
926     int ngotaddr;
927     int nbaddns;
928     int nwithtries[MAXTRIES+1];
929 
930     struct rslave_s rs;
931 
932     const char *default_format = DEFAULT_FORMAT;
933     const char **sformat = &default_format;
934     const char *domain_name;
935     if (opt_domain) {
936         if (dcc_get_dns_domain(&domain_name)) {
937                 fprintf(stderr, "Can't get domain name\n");
938                 exit(1);
939         }
940     }
941     if (opti < argc) {
942         if (strstr(argv[opti], "%d") != NULL) {
943             sformat = &argv[opti++];
944         } else {
945             /* A list of host names can be given in the command line */
946             n = argc-opti;
947             sformat = &argv[opti++];
948         }
949     }
950 
951     /* Figure out the limit on the number of fd's we can open, as per
952      * the OS.  We allow 8 fds for uses other than this in the program
953      * (eg stdin, stdout).  If possible, ask the OS for more fds.
954      * We'll ideally use n + 2 fds in our poll loop, so ask for n + 10
955      * fds total.
956      */
957     struct rlimit rlim = {0, 0};
958     getrlimit(RLIMIT_NOFILE, &rlim);
959     if (rlim.rlim_cur < (rlim_t)n + 10) {
960         rlim.rlim_cur = (rlim_t)n + 10;
961         if (rlim.rlim_cur > rlim.rlim_max)
962             rlim.rlim_cur = rlim.rlim_max;
963         setrlimit(RLIMIT_NOFILE, &rlim);
964         getrlimit(RLIMIT_NOFILE, &rlim);
965         if (rlim.rlim_cur > 14)
966            maxfds = (int)(rlim.rlim_cur - 10);
967     }
968 
969     /* Don't run longer than bigtimeout seconds */
970     signal(SIGALRM, timeout_handler);
971     alarm((unsigned) bigtimeout);
972 
973     if (rslave_init(&rs))
974         return 0;
975 
976     ngotaddr = 0;
977     memset(nwithtries, 0, sizeof(nwithtries));
978     memset(states, 0, sizeof(states));
979 
980     /* all hosts start off in state 'sent 0' */
981     for (i=1; i<=n; i++) {
982         rslave_request_t *req = &states[i].req;
983         get_thename(sformat, domain_name, i, thename);
984         rslave_request_init(req, thename, i);
985         states[i].status = STATE_LOOKUP;
986         states[i].ntries = 0;
987         nwithtries[0]++;
988     }
989 
990     ndone = 0;
991     nok = 0;
992     nbaddns = 0;
993     /* Loop until we're done finding distcc servers.  We have to do
994      * this loop in groups, with each group using no more than maxfds
995      * fd's.  One call to one_poll_loop uses n + 2 fds.
996      */
997     for (start_state = 1; start_state <= n; start_state = end_state + 1) {
998         int orig_end_state;
999         end_state = start_state + maxfds-2;
1000         if (end_state > n)
1001             end_state = n;
1002         orig_end_state = end_state;
1003         while (ndone < end_state) {
1004             end_state = one_poll_loop(&rs, states, start_state, end_state,
1005                                       nwithtries, &ngotaddr, &nbaddns,
1006                                       firstipaddr, dnstimeout_usec,
1007                                       matchbits, overlap, dnsgap);
1008         }
1009         if (end_state < orig_end_state) {
1010             /* If we lowered end_state, it means we decided to stop
1011              * searching early.
1012              */
1013             break;
1014         }
1015     }
1016     return nok;
1017 }
1018 
main(int argc,char ** argv)1019 int main(int argc, char **argv)
1020 {
1021     int opti;
1022     int nfound;
1023 
1024     for (opti = 1; opti < argc && argv[opti][0] == '-'; opti++) {
1025         switch (argv[opti][1]) {
1026         case 'm':
1027             opt_match = atoi(argv[opti]+2);
1028             if (opt_match > 31 || opt_match < 0)
1029                 usage();
1030             break;
1031         case 't':
1032             opt_bigtimeout_sec = atoi(argv[opti]+2);
1033             if (opt_bigtimeout_sec < 0)
1034                 usage();
1035             break;
1036         case 'h':
1037             opt_dnstimeout_ms = atoi(argv[opti]+2);
1038             if (opt_dnstimeout_ms < 0)
1039                 usage();
1040             break;
1041         case 'c':
1042             opt_conntimeout_ms = atoi(argv[opti]+2);
1043             if (opt_conntimeout_ms < 0)
1044                 usage();
1045             break;
1046         case 'k':
1047             opt_comptimeout_ms = atoi(argv[opti]+2);
1048             if (opt_comptimeout_ms < 0)
1049                 usage();
1050             break;
1051         case 'o':
1052             opt_overlap = atoi(argv[opti]+2);
1053             if (opt_overlap < 0)
1054                 usage();
1055             break;
1056         case 'g':
1057             opt_dnsgap = atoi(argv[opti]+2);
1058             if (opt_dnsgap < 0)
1059                 usage();
1060             break;
1061         case 'P':
1062             opt_protocol = atoi(argv[opti]+2);
1063             if (opt_protocol <= 0 || opt_protocol > 3) {
1064                 usage();
1065             }
1066             break;
1067         case 'p':
1068             opt_compiler = argv[opti]+2;
1069             if (! *opt_compiler)
1070                 usage();
1071             break;
1072         case 'r':
1073             opt_port = atoi(argv[opti]+2);
1074             if (opt_port <= 0)
1075                 usage();
1076             break;
1077         case 'l':
1078             opt_latency = 1;
1079             break;
1080         case 'n':
1081             opt_numeric = 1;
1082             break;
1083         case 'x':
1084             opt_bang_down = 1;
1085             break;
1086         case 'v':
1087             opt_verbose++;
1088             break;
1089         case 'd':
1090             opt_domain++;
1091             break;
1092         default:
1093             usage();
1094         }
1095     }
1096 
1097     if (opt_compiler)
1098         generate_query();
1099 
1100     nfound = detect_distcc_servers((const char **)argv, argc, opti,
1101                                    opt_bigtimeout_sec,
1102                                    opt_dnstimeout_ms,
1103                                    opt_match,
1104                                    opt_overlap,
1105                                    opt_dnsgap);
1106 
1107     /* return failure if no servers found */
1108     return (nfound > 0) ? 0 : 1;
1109 }
1110