1 /*
2 * DNS-Terror. Part of Fastresolve.
3 *
4 * Before running this, it's best to run 'unlimit'.
5 *
6 * Reads IP addresses to resolve from the standard input, one per line.
7 * Other stuff on a line after the IP address is ignored.
8 *
9 * Options:
10 * -c adns-conf ADNS conf string to use instead of /etc/resolv.conf
11 * and the various optional environment variables.
12 * One or more lines in a format like resolv.conf,
13 * with directives: nameserver, domain, search
14 * plus some additional directives:
15 * sortlist, options, clearnameservers, include
16 * One approach is to make an alternate conf file
17 * and use -c "include adns.conf".
18 * -d dbfile Save results to DB file dbfile. Defaults to
19 * ip2host.db. If given as the empty string,
20 * the DB is stored in memory, and is lost when the
21 * program exits.
22 * -f fields Skip fields blank-separated fields at the start
23 * of each line before expecting an IP address.
24 * -m marksize Print a notice every marksize input lines.
25 * -o Copy the input lines to the standard output
26 * with IP addresses resolved.
27 * -p parallel-queries Set the size of the query pipeline.
28 * -r Reresolve; do not read in negative cache entries.
29 * -s Sync the DB to disk at each mark.
30 * -v Increase output verbosity.
31 *
32 * On SIGHUP, closes and reopens the db file (useful if it was rolled).
33 * On SIGTERM, closes the db file and exits.
34 *
35 * Written by David MacKenzie <djm@djmnet.org>
36 * Thanks to Josh Osborne <stripes@pix.net> for ideas and an
37 * earlier implementation.
38 * Please send comments and bug reports to <fastresolve-bugs@djmnet.org>
39 *
40 ******************************************************************************
41 * Copyright 1999 UUNET, an MCI WorldCom company.
42 *
43 * This program is free software; you can redistribute it and/or modify
44 * it under the terms of the GNU General Public License as published by
45 * the Free Software Foundation; either version 2, or (at your option)
46 * any later version.
47 *
48 * This program is distributed in the hope that it will be useful,
49 * but WITHOUT ANY WARRANTY; without even the implied warranty of
50 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
51 * GNU General Public License for more details.
52 *
53 * You should have received a copy of the GNU General Public License
54 * along with this program; if not, write to the Free Software
55 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
56 * 02111-1307, USA.
57 ******************************************************************************
58 */
59
60 #include <stdio.h>
61 #include <time.h>
62 #include <unistd.h>
63 #include <stdlib.h>
64 #include <string.h>
65 #include <errno.h>
66 #include <assert.h>
67 #include <ctype.h>
68 #include <signal.h>
69 #include <setjmp.h>
70 #include <adns.h>
71 #include <zlib.h>
72 #include <map>
73 #include <deque>
74 #include "BoolString.h"
75 #include "DatedStringDb.h"
76
77 using namespace std;
78
79 extern "C" int getstr(char **lineptr, size_t *n, FILE *stream,
80 char terminator, size_t offset);
81 #ifndef HAVE_FGETLN
82 extern "C" char *fgetln(FILE *stream, size_t *lenp);
83 #endif
84
85 // Default maximum number of queries outstanding in the pipeline,
86 // or with -o, 1/20 the number of buffered log lines.
87 #define DEFAULT_PARALLEL_QUERIES 1000
88
89 // 20-30 is a typical ratio of queried addresses to log lines.
90 #define COPYLINES_MULTIPLIER 20
91
92 // Where to cache the results.
93 #define DEFAULT_DBFILE "ip2host.db"
94
95 typedef map<BoolString, BoolString, less<BoolString> > BoolStringMap;
96
97 static char *program_name;
98
99 // Degree of verbosity in output. The more, the messier.
100 static int verbose = 0;
101
102 // Flags set by signal handlers.
103 static int reopen = 0;
104 static jmp_buf getback;
105
106 void
hup_handler(int)107 hup_handler(int)
108 {
109 reopen = 1;
110 }
111
112 void
term_handler(int)113 term_handler(int)
114 {
115 fprintf(stderr, "%s: received terminate signal; exiting\n", program_name);
116 longjmp(getback, 1);
117 }
118
119 void
int_handler(int)120 int_handler(int)
121 {
122 fprintf(stderr, "%s: received interrupt signal; exiting\n", program_name);
123 longjmp(getback, 1);
124 }
125
126 static void
fatal_errno(const char * what,int errnoval)127 fatal_errno(const char *what, int errnoval)
128 {
129 fprintf(stderr, "%s: fatal error: %s: %s\n",
130 program_name, what, strerror(errnoval));
131 longjmp(getback, 1);
132 }
133
134 void
set_handlers(void)135 set_handlers(void)
136 {
137 struct sigaction act;
138
139 memset(&act, '\0', sizeof act);
140
141 act.sa_handler = hup_handler;
142 sigaction(SIGHUP, &act, NULL);
143
144 act.sa_handler = term_handler;
145 sigaction(SIGTERM, &act, NULL);
146
147 act.sa_handler = int_handler;
148 sigaction(SIGINT, &act, NULL);
149 }
150
151 // Info about one query that's being made.
152 class LogEntry
153 {
154 public:
155 adns_query qu; // ADNS query ID, or NULL.
156 char *ipaddr; // Forward dotted-quad, NUL-terminated.
157 char *logbefore, *logafter; // The rest of the log entry.
158 size_t lenbefore, lenafter; // Lengths; no NUL-termination.
159 char buf[1]; // Really longer. Must be last. Holds above.
160 };
161
162 typedef deque<LogEntry *> LogEntryQue;
163
164 class QueryStats
165 {
166 public:
QueryStats(void)167 QueryStats(void) { linesread = cached = submitted = invalid = successful = 0; }
168 void print(void);
169 long linesread;
170 long cached; // -1 if no cache DB file used.
171 long submitted;
172 long invalid;
173 long successful;
174 };
175
176 void
print(void)177 QueryStats::print(void)
178 {
179 fprintf(stderr, "%ld lines read.\n", linesread);
180 fprintf(stderr, "%ld (%.2f%%) invalid addresses.\n",
181 invalid, linesread ? ((100.0 * invalid) / (1.0 * linesread)) : 0.0);
182
183 if (cached >= 0)
184 fprintf(stderr, "%ld (%.2f%%) cache hits from the DB file.\n",
185 cached, linesread ? ((100.0 * cached) / (1.0 * linesread)) : 0.0);
186
187 fprintf(stderr, "%ld (%.2f%%) addresses were queried with DNS;\n",
188 submitted, linesread ? ((100.0 * submitted) / (1.0 * linesread)) : 0.0);
189 fprintf(stderr, "%ld (%.2f%%) of those queries were successful.\n",
190 successful, submitted ? ((100.0 * successful) / (1.0 * submitted)) : 0.0);
191 }
192
193 // Maximum bytes in an ASCII IPv4 address.
194 #define MAX_IP_LEN 15
195 // The size of "zzz.yyy.xxx.www.in-addr.arpa\0"
196 #define MAX_PTR_SIZE (MAX_IP_LEN + 14)
197 // Define to do pedantic checking that domain has a valid format.
198 // #define CHECK_PTR_SYNTAX
199
200 // If domain contains "www.xxx.yyy.zzz" then put in ptr
201 // "zzz.yyy.xxx.www.in-addr.arpa".
202 // ptr must be at least MAX_PTR_SIZE bytes long.
203 // Return 1 if ok, 0 if domain is not an IPv4 address.
204 //
205 // We leave off the final "." because the returned answers lack it,
206 // and we need to compare with them, and this is more efficient than
207 // adding a "." to the end of each of them.
208 // Moreover, we're not passing the adns_qf_search flag to
209 // adns_submit(), so we're not searching anyway.
210 int
domptr(const char * domain,char * ptr)211 domptr(const char *domain, char *ptr)
212 {
213 const char *inaddr = ".in-addr.arpa";
214 size_t domsize = strlen(domain);
215 const char *d;
216 const char *numstart, *numend = NULL;
217 char *p = ptr;
218 #ifdef CHECK_PTR_SYNTAX
219 int octets = 0, dots = 0, val;
220 #endif
221
222 if (domsize + sizeof(inaddr) > MAX_PTR_SIZE) {
223 return 0;
224 }
225 for (d = domain + domsize - 1; d >= domain; d--) {
226 if (isdigit(*d)) {
227 if (!numend)
228 numend = d;
229 } else if (*d == '.') {
230 if (numend) {
231 #ifdef CHECK_PTR_SYNTAX
232 val = 0;
233 #endif
234 for (numstart = d + 1; numstart <= numend; ++numstart) {
235 #ifdef CHECK_PTR_SYNTAX
236 val = val * 10 + *numstart - '0';
237 #endif
238 *p++ = *numstart;
239 }
240 numend = NULL;
241 #ifdef CHECK_PTR_SYNTAX
242 if (val > 255)
243 return 0;
244 ++octets;
245 #endif
246 }
247 *p++ = *d;
248 #ifdef CHECK_PTR_SYNTAX
249 ++dots;
250 #endif
251 } else {
252 return 0;
253 }
254 }
255
256 if (numend) {
257 #ifdef CHECK_PTR_SYNTAX
258 val = 0;
259 #endif
260 for (numstart = d + 1; numstart <= numend; ++numstart) {
261 #ifdef CHECK_PTR_SYNTAX
262 val = val * 10 + *numstart - '0';
263 #endif
264 *p++ = *numstart;
265 }
266 #ifdef CHECK_PTR_SYNTAX
267 if (val > 255)
268 return 0;
269 ++octets;
270 #endif
271 }
272 #ifdef CHECK_PTR_SYNTAX
273 if (octets != 4 || dots != 3)
274 return 0;
275 #endif
276 strcpy(p, inaddr);
277 return 1;
278 }
279
280 #if 0
281 void
282 print_map(BoolStringMap &reslist, bool all)
283 {
284 BoolStringMap::iterator it;
285 BoolString k, v;
286
287 fprintf(stderr, "MAP:\n");
288 for (it = reslist.begin(); it != reslist.end(); it++) {
289 k = (*it).first;
290 v = (*it).second;
291 if (all || strcmp(v.get_str(), "?"))
292 fprintf(stderr, "%s=%s\n", k.get_str(), v.get_str());
293 }
294 }
295 #endif
296
297 enum submission { sb_invalid, sb_cached, sb_known, sb_pending, sb_submitted };
298
299 enum submission
submit_query(adns_state ads,BoolStringMap & reslist,LogEntry * lp)300 submit_query(adns_state ads, BoolStringMap &reslist, LogEntry *lp)
301 {
302 int r;
303 adns_query qu;
304 char rev[MAX_PTR_SIZE], *ipaddr, *data;
305
306 if (!domptr(lp->ipaddr, rev)) {
307 if (verbose)
308 fprintf(stderr, "%s invalid\n", lp->ipaddr);
309 return sb_invalid;
310 }
311
312 BoolString key(lp->ipaddr, false), value;
313 BoolStringMap::iterator it = reslist.find(key);
314 if (it != reslist.end()) {
315 value = (*it).second;
316 data = value.get_str();
317 if (data[0] == '?' && data[1] == '\0') {
318 if (verbose > 1)
319 fprintf(stderr, "%s pending\n", lp->ipaddr);
320 return sb_pending;
321 }
322 if (value.get_flag()) {
323 if (verbose > 1)
324 fprintf(stderr, "%s known\n", lp->ipaddr);
325 return sb_known;
326 } else {
327 if (verbose > 1)
328 fprintf(stderr, "%s cached\n", lp->ipaddr);
329 return sb_cached;
330 }
331 }
332
333 r = adns_submit(ads, rev, adns_r_ptr_raw,
334 adns_queryflags (adns_qf_quoteok_cname|adns_qf_quoteok_anshost), lp, &qu);
335 if (r)
336 fatal_errno("adns_submit", r);
337 if (verbose)
338 fprintf(stderr, "%s submitted\n", lp->ipaddr);
339
340 lp->qu = qu;
341
342 ipaddr = strdup(lp->ipaddr);
343 if (ipaddr == NULL)
344 fatal_errno("malloc", errno);
345
346 BoolString k(ipaddr, false), v("?", true);
347 reslist[k] = v;
348
349 return sb_submitted;
350 }
351
352 // Record the resource record(s) we got back.
353 // Do not free the return value, which is used in reslist.
354 char *
process_answer(adns_answer * ans,char * ipaddr,BoolStringMap & reslist)355 process_answer(adns_answer *ans, char *ipaddr, BoolStringMap &reslist)
356 {
357 const char *rrtn, *fmtn;
358 char *ptr;
359 int len;
360 adns_status ri;
361
362 ri = adns_rr_info(ans->type, &rrtn, &fmtn, &len, 0, 0);
363 if (verbose)
364 fprintf(stderr, "%s %s; nrrs=%d ",
365 ipaddr,
366 adns_strerror(ans->status),
367 ans->nrrs);
368 if (ans->nrrs) {
369 ptr = *ans->rrs.str;
370 if (verbose)
371 fprintf(stderr, "%s\n", ptr);
372 } else {
373 ptr = "";
374 if (verbose)
375 putc('\n', stderr);
376 }
377
378 ptr = strdup(ptr);
379 if (ptr == NULL)
380 fatal_errno("malloc", errno);
381
382 // Update the value from "?".
383 BoolString key(ipaddr, false), oldvalue;
384 BoolStringMap::iterator it = reslist.find(key);
385 assert(it != reslist.end());
386 key = (*it).first; // Don't lose that malloc'd string.
387 oldvalue = (*it).second;
388 char *data = oldvalue.get_str();
389 assert(data[0] == '?' && data[1] == '\0');
390 assert(oldvalue.get_flag());
391
392 BoolString value(ptr, true);
393 reslist[key] = value;
394
395 return ptr;
396 }
397
398 // Read fields space-separated fields from fp, and return the result.
399 // Store in *lenp the number of characters read, not including the
400 // null terminator.
401 char *
read_fields(FILE * fp,int fields,size_t * lenp)402 read_fields(FILE *fp, int fields, size_t *lenp)
403 {
404 static char *p = NULL;
405 static size_t psize = 0;
406 ssize_t nread;
407 size_t off;
408
409 off = 0;
410 while (fields-- > 0 &&
411 (nread = getstr(&p, &psize, fp, ' ', off)) > 0) {
412 off += nread;
413 }
414 *lenp = off;
415 return off == 0 ? NULL : p;
416 }
417
418 // Return the IP address of the next log entry, NUL terminated.
419 // The result is in static storage that will be overwritten
420 // by the next call.
421 // Return NULL on EOF.
422 // If save_line is true, save the contents of the line in the returned
423 // structure. If skip_fields is nonzero, there are that many
424 // space-separated fields before the IP address.
425 LogEntry *
read_ipaddr(FILE * fp,bool save_line,int skip_fields)426 read_ipaddr(FILE *fp, bool save_line, int skip_fields)
427 {
428 static char ipa[MAX_IP_LEN + 1];
429 char *before;
430 size_t after_len = 0, before_len = 0;
431 char *p = ipa, *after = "", *to, *from, *end;
432 int c;
433 LogEntry *lp;
434
435 if (skip_fields)
436 before = read_fields(fp, skip_fields, &before_len);
437
438 while ((c = getc(fp)) != EOF
439 && !isspace(c)
440 && p - ipa < MAX_IP_LEN) {
441 if (c) // Guard against corruption (NUL bytes).
442 *p++ = c;
443 }
444 *p = '\0';
445 if (c == EOF)
446 // Note that we throw away any IP address that is the last thing
447 // in the input stream. It must be followed by something
448 // (a newline or two other characters will do) in order to be returned.
449 return NULL;
450
451 // N.B. BSD fgetln() does not NUL terminate.
452 if (c != '\n' && (after = fgetln(fp, &after_len)) == NULL)
453 return NULL;
454
455 lp = (LogEntry *)
456 malloc(sizeof(LogEntry)
457 + (save_line ? before_len : 0) // logbefore
458 + p - ipa // ipaddr (buf already has 1 byte for NUL)
459 + (save_line ? after_len + 1 : 0) // logafter
460 );
461 if (lp == NULL)
462 fatal_errno("malloc", errno);
463
464 lp->qu = 0;
465
466 // Point ipaddr, logafter, logbefore to data in buf.
467 to = lp->buf;
468
469 // Copy the IP address into the LogEntry.
470 for (lp->ipaddr = to, from = ipa; *from;)
471 *to++ = *from++;
472 *to = '\0';
473
474 // Copy the rest of the line into the LogEntry, if requested.
475 if (save_line) {
476 lp->logafter = ++to;
477 *to++ = c;
478 end = after + after_len; // Sentinel for speed.
479 for (from = after; from != end;)
480 *to++ = *from++;
481 lp->lenafter = after_len + 1;
482
483 if (before_len) {
484 lp->logbefore = ++to;
485 end = before + before_len; // Sentinel again.
486 for (from = before; from != end;)
487 *to++ = *from++;
488 }
489 lp->lenbefore = before_len;
490 }
491
492 return lp;
493 }
494
495 // Read DB entries created by earlier runs into reslist,
496 // marked as 'not new' (false).
497 // If skipempty is true, do not read in negative cache entries.
498 void
read_db(DatedStringDb * table,BoolStringMap & reslist,bool skipempty)499 read_db(DatedStringDb *table, BoolStringMap &reslist, bool skipempty)
500 {
501 DatedStringDbCursor *iterator = new DatedStringDbCursor(table);
502 char *key, *data;
503 time_t when;
504 int inserted = 0;
505
506 while (iterator->get(&key, &data, DB_NEXT, &when) == 0)
507 {
508 if (*data || !skipempty) {
509 BoolString k(key, false), v(data, false);
510 reslist[k] = v;
511 ++inserted;
512 }
513 }
514
515 iterator->close();
516
517 if (verbose)
518 fprintf(stderr, "read %d addresses from DB file\n", inserted);
519 }
520
521 // Write out new entries to the DB file.
522 void
store_db(DatedStringDb * table,BoolStringMap & reslist)523 store_db(DatedStringDb *table, BoolStringMap &reslist)
524 {
525 BoolStringMap::iterator it;
526 char *key, *data;
527 time_t when;
528 int added = 0;
529
530 time(&when);
531
532 for (it = reslist.begin(); it != reslist.end(); it++) {
533 BoolString ipaddr = (*it).first, value = (*it).second;
534 if (value.get_flag()) {
535 if (verbose > 1)
536 fprintf(stderr, "%s new address\n", ipaddr.get_str());
537 try
538 {
539 table->put(ipaddr.get_str(), value.get_str(), &when);
540 }
541 catch (DbException &dbe)
542 {
543 fprintf(stderr, "DB error storing (%s,%s): %s\n",
544 ipaddr.get_str(), value.get_str(), dbe.what());
545 }
546 ++added;
547 value.set_flag(false);
548 }
549 }
550
551 table->sync();
552
553 if (verbose)
554 fprintf(stderr, "added %d addresses to DB file\n", added);
555 }
556
557 void
usage(char * prog)558 usage(char *prog)
559 {
560 fprintf(stderr,
561 "Usage: %s [-v...] [-orsz] [-d db-file] [-c adns-conf-str] [-m mark-size] [-p parallel-queries] [-f skip-fields]\n",
562 prog);
563 exit(1);
564 }
565
566 int
main(int argc,char * const * argv)567 main(int argc, char *const *argv)
568 {
569 adns_state ads;
570 int r;
571 adns_initflags aflags;
572 int outstanding = 0;
573 size_t qsize;
574 gzFile zout = NULL;
575 adns_query qu;
576 adns_answer *ans;
577 DatedStringDb *resolved;
578 BoolStringMap reslist;
579 LogEntryQue lq;
580 char *dbhome = NULL;
581 char *dbfile = DEFAULT_DBFILE;
582 char *adnsconf = NULL;
583 long marksize = 0, lastmark = -1;
584 bool syncmark = false;
585 bool copylines = false;
586 bool reresolve = false;
587 int skip_fields = 0;
588 int parallel_queries = DEFAULT_PARALLEL_QUERIES;
589 QueryStats stats;
590 LogEntry *lp;
591 char *dom;
592
593 program_name = argv[0];
594
595 while ((r = getopt(argc, argv, "c:d:f:m:op:rsvz")) != -1) {
596 switch (r) {
597 case 'c':
598 adnsconf = optarg;
599 break;
600
601 case 'd':
602 dbfile = optarg;
603 break;
604
605 case 'f':
606 skip_fields = atoi(optarg);
607 break;
608
609 case 'm':
610 marksize = atol(optarg);
611 break;
612
613 case 'o':
614 copylines = true;
615 break;
616
617 case 'p':
618 parallel_queries = atoi(optarg);
619 break;
620
621 case 'r':
622 reresolve = true;
623 break;
624
625 case 's':
626 syncmark = true;
627 break;
628
629 case 'v':
630 verbose++;
631 break;
632
633 case 'z':
634 zout = gzdopen(1, "wb");
635 if (!zout) {
636 fprintf(stderr, "%s: fatal error: %s: %s\n",
637 program_name, "gzdopen", strerror(errno));
638 exit(1);
639 }
640 break;
641
642 default:
643 usage(argv[0]);
644 break;
645 }
646 }
647
648 // We get back here when terminating abnormally.
649 r = setjmp(getback);
650 if (r == 1)
651 goto out;
652 else if (r == 2) {
653 if (copylines) {
654 if (zout) {
655 if (lp->lenbefore)
656 gzwrite(zout, lp->logbefore, lp->lenbefore);
657 gzputs(zout, lp->ipaddr);
658 gzwrite(zout, lp->logafter, lp->lenafter);
659 } else {
660 if (lp->lenbefore)
661 fwrite(lp->logbefore, lp->lenbefore, 1, stdout);
662 fputs(lp->ipaddr, stdout);
663 fwrite(lp->logafter, lp->lenafter, 1, stdout);
664 }
665 }
666 goto timed_out;
667 }
668
669 if (verbose > 2)
670 aflags = (adns_initflags) adns_if_debug;
671 else
672 aflags = (adns_initflags) 0;
673 if (adnsconf)
674 r = adns_init_strcfg(&ads, aflags, stderr, adnsconf);
675 else
676 r = adns_init(&ads, aflags, stderr);
677 if (r)
678 fatal_errno("adns_init", r);
679
680 try
681 {
682 resolved = new DatedStringDb(dbhome, *dbfile ? dbfile : NULL);
683
684 set_handlers();
685
686 if (*dbfile)
687 read_db(resolved, reslist, reresolve);
688 else
689 stats.cached = -1;
690 }
691 catch (DbException &dbe)
692 {
693 fprintf(stderr, "DB error opening %s: %s\n",
694 *dbfile ? dbfile : "memory DB", dbe.what());
695 exit(1);
696 }
697
698 if (copylines)
699 parallel_queries *= COPYLINES_MULTIPLIER;
700
701 while ((lp = read_ipaddr(stdin, copylines, skip_fields))) {
702 ++stats.linesread;
703 if (marksize && stats.linesread % marksize == 0) {
704 if (copylines)
705 fprintf(stderr, "On line %ld, %d queries outstanding, %d lines buffered\n",
706 stats.linesread, outstanding, lq.size());
707 else
708 fprintf(stderr, "On line %ld, %d queries outstanding\n",
709 stats.linesread, outstanding);
710 if (syncmark && *dbfile)
711 store_db(resolved, reslist);
712 }
713
714 if (reopen && *dbfile) {
715 if (verbose) {
716 fprintf(stderr, "%s: received hangup signal; reopening DB file\n", program_name);
717 }
718 try
719 {
720 delete resolved;
721 resolved = new DatedStringDb(dbhome, dbfile);
722 }
723 catch (DbException &dbe)
724 {
725 fprintf(stderr, "DB error reopening %s: %s\n",
726 dbfile, dbe.what());
727 exit(1);
728 }
729 store_db(resolved, reslist);
730 reopen = 0;
731 }
732
733 // When doing copylines:
734 // We need to save the log files for all entries,
735 // not just those that we submit. The log line must be
736 // in the deque.
737 //
738 // When we read a log file entry to enqueue it, the following
739 // situations are possible:
740 // 0. The IP address field is syntactically invalid.
741 // There is no answer cached in the map.
742 // We don't need a query ID (assume it's 0).
743 // 1. The answer is cached in the map, and is not "?".
744 // We don't need a query ID (assume it's 0).
745 // 2. The answer is cached in the map, and is "?", meaning
746 // that the query is in process for some earlier entry.
747 // By the time we dequeue this entry, the earlier entry will
748 // have been written out, so the answer will be in the map.
749 // We don't need a query ID (assume it's 0).
750 // 3. The answer is not in the map. We need to submit a
751 // new query for it and save the query ID.
752 //
753 // When we dequeue a log file entry, the following
754 // situations are possible:
755 // 0. The query ID is 0 and there's no answer in the map,
756 // so the IP address is syntactically invalid.
757 // We can write out the line.
758 // 1. The query ID is 0. We know the answer is in the map,
759 // and is not "?". We can write out the line.
760 // 2. The query ID is not 0. The query is in process.
761 // We need to wait on the query ID for it.
762
763 switch (submit_query(ads, reslist, lp)) {
764 case sb_invalid:
765 ++stats.invalid;
766 break;
767
768 case sb_pending:
769 break;
770
771 case sb_known:
772 break;
773
774 case sb_cached:
775 ++stats.cached;
776 break;
777
778 case sb_submitted:
779 ++outstanding;
780 ++stats.submitted;
781 break;
782 }
783
784 if (copylines)
785 lq.push_back(lp);
786
787 // The goal is to keep the query pipeline full, so only pick off
788 // one answer.
789
790 if (copylines)
791 assert(lq.size() <= parallel_queries);
792 else
793 assert(outstanding <= parallel_queries);
794 if (copylines ? (lq.size() == parallel_queries) : (outstanding == parallel_queries)) {
795 if (copylines) {
796 lp = lq[0];
797 lq.pop_front();
798 qu = lp->qu;
799 } else {
800 qu = 0;
801 }
802 if (qu || !copylines) {
803 // It would be best if the answer were always ready here, so the
804 // adns_wait call wouldn't block. That would happen if the time
805 // it takes to generate a full pipeline of queries were equal to
806 // (or greater than, though that wouldn't help) the maximum time it
807 // takes to get back a response (or timeout). Otherwise, the
808 // pipeline will stall.
809 //
810 // For example, with the default (-p 1000) for -o of 20,000 lines
811 // buffered, there may be about 800 queries outstanding.
812 // On a Pentium III/500, we can generate about 1400 queries per second.
813 // If we set udpmaxretries:4 for an 8 second timeout, then we
814 // may generate no more than 800/8=100 queries per second to
815 // avoid all pipeline stalls on timeouts. 8*1400=11200; round to 12000.
816 // -p 12000 will give better performance assuming you have enough RAM.
817 //
818 // Even if you get the main loop optimal, the pipeline will
819 // probably stall during the drain time at the end.
820 r = adns_wait(ads, &qu, &ans, (void **) &lp);
821 if (r)
822 fatal_errno("adns_wait", r);
823 dom = process_answer(ans, lp->ipaddr, reslist);
824 if (*dom)
825 ++stats.successful;
826 --outstanding;
827 } else {
828 // It's already in the list.
829 BoolString key(lp->ipaddr, false), value;
830 BoolStringMap::iterator it = reslist.find(key);
831 if (it != reslist.end()) {
832 value = (*it).second;
833 dom = value.get_str();
834 } else {
835 // For a syntactically invalid IP address, there's no answer.
836 // Print it unchanged.
837 dom = lp->ipaddr;
838 }
839 ans = NULL;
840 }
841 if (copylines) {
842 if (zout) {
843 if (lp->lenbefore)
844 gzwrite(zout, lp->logbefore, lp->lenbefore);
845 gzputs(zout, *dom ? dom : lp->ipaddr);
846 gzwrite(zout, lp->logafter, lp->lenafter);
847 } else {
848 if (lp->lenbefore)
849 fwrite(lp->logbefore, lp->lenbefore, 1, stdout);
850 fputs(*dom ? dom : lp->ipaddr, stdout);
851 fwrite(lp->logafter, lp->lenafter, 1, stdout);
852 }
853 }
854 if (ans)
855 free(ans);
856 free(lp);
857 }
858 }
859
860 if (verbose || marksize) {
861 fprintf(stderr, "Read last line %ld, %d queries outstanding\n",
862 stats.linesread, outstanding);
863 if (syncmark && *dbfile)
864 store_db(resolved, reslist);
865 }
866
867 // Pick up the stragglers; let the pipeline drain.
868 if (marksize) {
869 marksize = outstanding / 10;
870 if (!marksize)
871 marksize = 1;
872 }
873 while (copylines ? ((qsize = lq.size()) > 0) : (outstanding > 0)) {
874 if (marksize && outstanding % marksize == 0 && outstanding != lastmark) {
875 lastmark = outstanding;
876 if (copylines)
877 fprintf(stderr, "%d queries outstanding, %d lines buffered\n",
878 outstanding, (int) qsize);
879 else
880 fprintf(stderr, "%d queries outstanding\n", outstanding);
881 }
882 // Yes, the body of this loop is identical to the inner loop above.
883 // I can't think of an easy way to roll them together, given all
884 // the variables they use.
885 if (copylines) {
886 lp = lq[0];
887 lq.pop_front();
888 qu = lp->qu;
889 } else {
890 qu = 0;
891 }
892 if (qu || !copylines) {
893 r = adns_wait(ads, &qu, &ans, (void **) &lp);
894 if (r)
895 fatal_errno("adns_wait", r);
896 dom = process_answer(ans, lp->ipaddr, reslist);
897 if (*dom)
898 ++stats.successful;
899 --outstanding;
900 } else {
901 // It's already in the list.
902 BoolString key(lp->ipaddr, false), value;
903 BoolStringMap::iterator it = reslist.find(key);
904 if (it != reslist.end()) {
905 value = (*it).second;
906 dom = value.get_str();
907 } else {
908 dom = lp->ipaddr;
909 }
910 ans = NULL;
911 }
912 if (copylines) {
913 if (zout) {
914 if (lp->lenbefore)
915 gzwrite(zout, lp->logbefore, lp->lenbefore);
916 gzputs(zout, *dom ? dom : lp->ipaddr);
917 gzwrite(zout, lp->logafter, lp->lenafter);
918 } else {
919 if (lp->lenbefore)
920 fwrite(lp->logbefore, lp->lenbefore, 1, stdout);
921 fputs(*dom ? dom : lp->ipaddr, stdout);
922 fwrite(lp->logafter, lp->lenafter, 1, stdout);
923 }
924 }
925 if (ans)
926 free(ans);
927 free(lp);
928 }
929
930 timed_out:
931 if (*dbfile)
932 store_db(resolved, reslist);
933
934 out:
935 if (zout)
936 gzclose(zout);
937
938 try
939 {
940 resolved->close();
941 }
942 catch (DbException &dbe)
943 {
944 fprintf(stderr, "DB error closing %s: %s\n",
945 *dbfile ? dbfile : "memory DB", dbe.what());
946 }
947
948 adns_finish(ads);
949
950 stats.print();
951
952 exit (0);
953 }
954