xref: /freebsd/contrib/ntp/libntp/ntp_intres.c (revision 5b9c547c)
1 /*
2  * ntp_intres.c - Implements a generic blocking worker child or thread,
3  *		  initially to provide a nonblocking solution for DNS
4  *		  name to address lookups available with getaddrinfo().
5  *
6  * This is a new implementation as of 2009 sharing the filename and
7  * very little else with the prior implementation, which used a
8  * temporary file to receive a single set of requests from the parent,
9  * and a NTP mode 7 authenticated request to push back responses.
10  *
11  * A primary goal in rewriting this code was the need to support the
12  * pool configuration directive's requirement to retrieve multiple
13  * addresses resolving a single name, which has previously been
14  * satisfied with blocking resolver calls from the ntpd mainline code.
15  *
16  * A secondary goal is to provide a generic mechanism for other
17  * blocking operations to be delegated to a worker using a common
18  * model for both Unix and Windows ntpd.  ntp_worker.c, work_fork.c,
19  * and work_thread.c implement the generic mechanism.  This file
20  * implements the two current consumers, getaddrinfo_sometime() and the
21  * presently unused getnameinfo_sometime().
22  *
23  * Both routines deliver results to a callback and manage memory
24  * allocation, meaning there is no freeaddrinfo_sometime().
25  *
26  * The initial implementation for Unix uses a pair of unidirectional
27  * pipes, one each for requests and responses, connecting the forked
28  * blocking child worker with the ntpd mainline.  The threaded code
29  * uses arrays of pointers to queue requests and responses.
30  *
31  * The parent drives the process, including scheduling sleeps between
32  * retries.
33  *
34  * Memory is managed differently for a child process, which mallocs
35  * request buffers to read from the pipe into, whereas the threaded
36  * code mallocs a copy of the request to hand off to the worker via
37  * the queueing array.  The resulting request buffer is free()d by
38  * platform-independent code.  A wrinkle is the request needs to be
39  * available to the requestor during response processing.
40  *
41  * Response memory allocation is also platform-dependent.  With a
42  * separate process and pipes, the response is free()d after being
43  * written to the pipe.  With threads, the same memory is handed
44  * over and the requestor frees it after processing is completed.
45  *
46  * The code should be generalized to support threads on Unix using
47  * much of the same code used for Windows initially.
48  *
49  */
50 #ifdef HAVE_CONFIG_H
51 # include <config.h>
52 #endif
53 
54 #include "ntp_workimpl.h"
55 
56 #ifdef WORKER
57 
58 #include <stdio.h>
59 #include <ctype.h>
60 #include <signal.h>
61 
62 /**/
63 #ifdef HAVE_SYS_TYPES_H
64 # include <sys/types.h>
65 #endif
66 #ifdef HAVE_NETINET_IN_H
67 #include <netinet/in.h>
68 #endif
69 #include <arpa/inet.h>
70 /**/
71 #ifdef HAVE_SYS_PARAM_H
72 # include <sys/param.h>
73 #endif
74 
75 #if !defined(HAVE_RES_INIT) && defined(HAVE___RES_INIT)
76 # define HAVE_RES_INIT
77 #endif
78 
79 #if defined(HAVE_RESOLV_H) && defined(HAVE_RES_INIT)
80 # ifdef HAVE_ARPA_NAMESER_H
81 #  include <arpa/nameser.h> /* DNS HEADER struct */
82 # endif
83 # ifdef HAVE_NETDB_H
84 #  include <netdb.h>
85 # endif
86 # include <resolv.h>
87 # ifdef HAVE_INT32_ONLY_WITH_DNS
88 #  define HAVE_INT32
89 # endif
90 # ifdef HAVE_U_INT32_ONLY_WITH_DNS
91 #  define HAVE_U_INT32
92 # endif
93 #endif
94 
95 #include "ntp.h"
96 #include "ntp_debug.h"
97 #include "ntp_malloc.h"
98 #include "ntp_syslog.h"
99 #include "ntp_unixtime.h"
100 #include "ntp_intres.h"
101 #include "intreswork.h"
102 
103 
104 /*
105  * Following are implementations of getaddrinfo_sometime() and
106  * getnameinfo_sometime().  Each is implemented in three routines:
107  *
108  * getaddrinfo_sometime()		getnameinfo_sometime()
109  * blocking_getaddrinfo()		blocking_getnameinfo()
110  * getaddrinfo_sometime_complete()	getnameinfo_sometime_complete()
111  *
112  * The first runs in the parent and marshalls (or serializes) request
113  * parameters into a request blob which is processed in the child by
114  * the second routine, blocking_*(), which serializes the results into
115  * a response blob unpacked by the third routine, *_complete(), which
116  * calls the callback routine provided with the request and frees
117  * _request_ memory allocated by the first routine.  Response memory
118  * is managed by the code which calls the *_complete routines.
119  */
120 
121 /* === typedefs === */
122 typedef struct blocking_gai_req_tag {	/* marshalled args */
123 	size_t			octets;
124 	u_int			dns_idx;
125 	time_t			scheduled;
126 	time_t			earliest;
127 	struct addrinfo		hints;
128 	int			retry;
129 	gai_sometime_callback	callback;
130 	void *			context;
131 	size_t			nodesize;
132 	size_t			servsize;
133 } blocking_gai_req;
134 
135 typedef struct blocking_gai_resp_tag {
136 	size_t			octets;
137 	int			retcode;
138 	int			retry;
139 	int			gai_errno; /* for EAI_SYSTEM case */
140 	int			ai_count;
141 	/*
142 	 * Followed by ai_count struct addrinfo and then ai_count
143 	 * sockaddr_u and finally the canonical name strings.
144 	 */
145 } blocking_gai_resp;
146 
147 typedef struct blocking_gni_req_tag {
148 	size_t			octets;
149 	u_int			dns_idx;
150 	time_t			scheduled;
151 	time_t			earliest;
152 	int			retry;
153 	size_t			hostoctets;
154 	size_t			servoctets;
155 	int			flags;
156 	gni_sometime_callback	callback;
157 	void *			context;
158 	sockaddr_u		socku;
159 } blocking_gni_req;
160 
161 typedef struct blocking_gni_resp_tag {
162 	size_t			octets;
163 	int			retcode;
164 	int			gni_errno; /* for EAI_SYSTEM case */
165 	int			retry;
166 	size_t			hostoctets;
167 	size_t			servoctets;
168 	/*
169 	 * Followed by hostoctets bytes of null-terminated host,
170 	 * then servoctets bytes of null-terminated service.
171 	 */
172 } blocking_gni_resp;
173 
174 /* per-DNS-worker state in parent */
175 typedef struct dnschild_ctx_tag {
176 	u_int	index;
177 	time_t	next_dns_timeslot;
178 } dnschild_ctx;
179 
180 /* per-DNS-worker state in worker */
181 typedef struct dnsworker_ctx_tag {
182 	blocking_child *	c;
183 	time_t			ignore_scheduled_before;
184 #ifdef HAVE_RES_INIT
185 	time_t	next_res_init;
186 #endif
187 } dnsworker_ctx;
188 
189 
190 /* === variables === */
191 dnschild_ctx **		dnschild_contexts;		/* parent */
192 u_int			dnschild_contexts_alloc;
193 dnsworker_ctx **	dnsworker_contexts;		/* child */
194 u_int			dnsworker_contexts_alloc;
195 
196 #ifdef HAVE_RES_INIT
197 static	time_t		next_res_init;
198 #endif
199 
200 
201 /* === forward declarations === */
202 static	u_int		reserve_dnschild_ctx(void);
203 static	u_int		get_dnschild_ctx(void);
204 static	void		alloc_dnsworker_context(u_int);
205 /* static	void		free_dnsworker_context(u_int); */
206 static	dnsworker_ctx *	get_worker_context(blocking_child *, u_int);
207 static	void		scheduled_sleep(time_t, time_t,
208 					dnsworker_ctx *);
209 static	void		manage_dns_retry_interval(time_t *, time_t *,
210 						  int *,
211 						  time_t *);
212 static	int		should_retry_dns(int, int);
213 #ifdef HAVE_RES_INIT
214 static	void		reload_resolv_conf(dnsworker_ctx *);
215 #else
216 # define		reload_resolv_conf(wc)		\
217 	do {						\
218 		(void)(wc);				\
219 	} while (FALSE)
220 #endif
221 static	void		getaddrinfo_sometime_complete(blocking_work_req,
222 						      void *, size_t,
223 						      void *);
224 static	void		getnameinfo_sometime_complete(blocking_work_req,
225 						      void *, size_t,
226 						      void *);
227 
228 
229 /* === functions === */
230 /*
231  * getaddrinfo_sometime - uses blocking child to call getaddrinfo then
232  *			  invokes provided callback completion function.
233  */
234 int
235 getaddrinfo_sometime(
236 	const char *		node,
237 	const char *		service,
238 	const struct addrinfo *	hints,
239 	int			retry,
240 	gai_sometime_callback	callback,
241 	void *			context
242 	)
243 {
244 	blocking_gai_req *	gai_req;
245 	u_int			idx;
246 	dnschild_ctx *		child_ctx;
247 	size_t			req_size;
248 	size_t			nodesize;
249 	size_t			servsize;
250 	time_t			now;
251 
252 	NTP_REQUIRE(NULL != node);
253 	if (NULL != hints) {
254 		NTP_REQUIRE(0 == hints->ai_addrlen);
255 		NTP_REQUIRE(NULL == hints->ai_addr);
256 		NTP_REQUIRE(NULL == hints->ai_canonname);
257 		NTP_REQUIRE(NULL == hints->ai_next);
258 	}
259 
260 	idx = get_dnschild_ctx();
261 	child_ctx = dnschild_contexts[idx];
262 
263 	nodesize = strlen(node) + 1;
264 	servsize = strlen(service) + 1;
265 	req_size = sizeof(*gai_req) + nodesize + servsize;
266 
267 	gai_req = emalloc_zero(req_size);
268 
269 	gai_req->octets = req_size;
270 	gai_req->dns_idx = idx;
271 	now = time(NULL);
272 	gai_req->scheduled = now;
273 	gai_req->earliest = max(now, child_ctx->next_dns_timeslot);
274 	child_ctx->next_dns_timeslot = gai_req->earliest;
275 	if (hints != NULL)
276 		gai_req->hints = *hints;
277 	gai_req->retry = retry;
278 	gai_req->callback = callback;
279 	gai_req->context = context;
280 	gai_req->nodesize = nodesize;
281 	gai_req->servsize = servsize;
282 
283 	memcpy((char *)gai_req + sizeof(*gai_req), node, nodesize);
284 	memcpy((char *)gai_req + sizeof(*gai_req) + nodesize, service,
285 	       servsize);
286 
287 	if (queue_blocking_request(
288 		BLOCKING_GETADDRINFO,
289 		gai_req,
290 		req_size,
291 		&getaddrinfo_sometime_complete,
292 		gai_req)) {
293 
294 		msyslog(LOG_ERR, "unable to queue getaddrinfo request");
295 		errno = EFAULT;
296 		return -1;
297 	}
298 
299 	return 0;
300 }
301 
302 int
303 blocking_getaddrinfo(
304 	blocking_child *	c,
305 	blocking_pipe_header *	req
306 	)
307 {
308 	blocking_gai_req *	gai_req;
309 	dnsworker_ctx *		worker_ctx;
310 	blocking_pipe_header *	resp;
311 	blocking_gai_resp *	gai_resp;
312 	char *			node;
313 	char *			service;
314 	struct addrinfo *	ai_res;
315 	struct addrinfo *	ai;
316 	struct addrinfo *	serialized_ai;
317 	size_t			canons_octets;
318 	size_t			this_octets;
319 	size_t			resp_octets;
320 	char *			cp;
321 	time_t			time_now;
322 
323 	gai_req = (void *)((char *)req + sizeof(*req));
324 	node = (char *)gai_req + sizeof(*gai_req);
325 	service = node + gai_req->nodesize;
326 
327 	worker_ctx = get_worker_context(c, gai_req->dns_idx);
328 	scheduled_sleep(gai_req->scheduled, gai_req->earliest,
329 			worker_ctx);
330 	reload_resolv_conf(worker_ctx);
331 
332 	/*
333 	 * Take a shot at the final size, better to overestimate
334 	 * at first and then realloc to a smaller size.
335 	 */
336 
337 	resp_octets = sizeof(*resp) + sizeof(*gai_resp) +
338 		      16 * (sizeof(struct addrinfo) +
339 			    sizeof(sockaddr_u)) +
340 		      256;
341 	resp = emalloc_zero(resp_octets);
342 	gai_resp = (void *)(resp + 1);
343 
344 	TRACE(2, ("blocking_getaddrinfo given node %s serv %s fam %d flags %x\n",
345 		  node, service, gai_req->hints.ai_family,
346 		  gai_req->hints.ai_flags));
347 #ifdef DEBUG
348 	if (debug >= 2)
349 		fflush(stdout);
350 #endif
351 	ai_res = NULL;
352 	gai_resp->retcode = getaddrinfo(node, service, &gai_req->hints,
353 					&ai_res);
354 	gai_resp->retry = gai_req->retry;
355 #ifdef EAI_SYSTEM
356 	if (EAI_SYSTEM == gai_resp->retcode)
357 		gai_resp->gai_errno = errno;
358 #endif
359 	canons_octets = 0;
360 
361 	if (0 == gai_resp->retcode) {
362 		ai = ai_res;
363 		while (NULL != ai) {
364 			gai_resp->ai_count++;
365 			if (ai->ai_canonname)
366 				canons_octets += strlen(ai->ai_canonname) + 1;
367 			ai = ai->ai_next;
368 		}
369 		/*
370 		 * If this query succeeded only after retrying, DNS may have
371 		 * just become responsive.  Ignore previously-scheduled
372 		 * retry sleeps once for each pending request, similar to
373 		 * the way scheduled_sleep() does when its worker_sleep()
374 		 * is interrupted.
375 		 */
376 		if (gai_resp->retry > INITIAL_DNS_RETRY) {
377 			time_now = time(NULL);
378 			worker_ctx->ignore_scheduled_before = time_now;
379 			TRACE(1, ("DNS success after retry, ignoring sleeps scheduled before now (%s)\n",
380 				  humantime(time_now)));
381 		}
382 	}
383 
384 	/*
385 	 * Our response consists of a header, followed by ai_count
386 	 * addrinfo structs followed by ai_count sockaddr_storage
387 	 * structs followed by the canonical names.
388 	 */
389 	gai_resp->octets = sizeof(*gai_resp)
390 			    + gai_resp->ai_count
391 				* (sizeof(gai_req->hints)
392 				   + sizeof(sockaddr_u))
393 			    + canons_octets;
394 
395 	resp_octets = sizeof(*resp) + gai_resp->octets;
396 	resp = erealloc(resp, resp_octets);
397 	gai_resp = (void *)(resp + 1);
398 
399 	/* cp serves as our current pointer while serializing */
400 	cp = (void *)(gai_resp + 1);
401 	canons_octets = 0;
402 
403 	if (0 == gai_resp->retcode) {
404 		ai = ai_res;
405 		while (NULL != ai) {
406 			memcpy(cp, ai, sizeof(*ai));
407 			serialized_ai = (void *)cp;
408 			cp += sizeof(*ai);
409 
410 			/* transform ai_canonname into offset */
411 			if (NULL != serialized_ai->ai_canonname) {
412 				serialized_ai->ai_canonname = (char *)canons_octets;
413 				canons_octets += strlen(ai->ai_canonname) + 1;
414 			}
415 
416 			/* leave fixup of ai_addr pointer for receiver */
417 
418 			ai = ai->ai_next;
419 		}
420 
421 		ai = ai_res;
422 		while (NULL != ai) {
423 			NTP_INSIST(ai->ai_addrlen <= sizeof(sockaddr_u));
424 			memcpy(cp, ai->ai_addr, ai->ai_addrlen);
425 			cp += sizeof(sockaddr_u);
426 
427 			ai = ai->ai_next;
428 		}
429 
430 		ai = ai_res;
431 		while (NULL != ai) {
432 			if (NULL != ai->ai_canonname) {
433 				this_octets = strlen(ai->ai_canonname) + 1;
434 				memcpy(cp, ai->ai_canonname, this_octets);
435 				cp += this_octets;
436 			}
437 
438 			ai = ai->ai_next;
439 		}
440 		freeaddrinfo(ai_res);
441 	}
442 
443 	/*
444 	 * make sure our walk and earlier calc match
445 	 */
446 	DEBUG_INSIST((size_t)(cp - (char *)resp) == resp_octets);
447 
448 	if (queue_blocking_response(c, resp, resp_octets, req)) {
449 		msyslog(LOG_ERR, "blocking_getaddrinfo can not queue response");
450 		return -1;
451 	}
452 
453 	return 0;
454 }
455 
456 
457 static void
458 getaddrinfo_sometime_complete(
459 	blocking_work_req	rtype,
460 	void *			context,
461 	size_t			respsize,
462 	void *			resp
463 	)
464 {
465 	blocking_gai_req *	gai_req;
466 	blocking_gai_resp *	gai_resp;
467 	dnschild_ctx *		child_ctx;
468 	struct addrinfo *	ai;
469 	struct addrinfo *	next_ai;
470 	sockaddr_u *		psau;
471 	char *			node;
472 	char *			service;
473 	char *			canon_start;
474 	time_t			time_now;
475 	int			again;
476 	int			af;
477 	const char *		fam_spec;
478 	int			i;
479 
480 	gai_req = context;
481 	gai_resp = resp;
482 
483 	DEBUG_REQUIRE(BLOCKING_GETADDRINFO == rtype);
484 	DEBUG_REQUIRE(respsize == gai_resp->octets);
485 
486 	node = (char *)gai_req + sizeof(*gai_req);
487 	service = node + gai_req->nodesize;
488 
489 	child_ctx = dnschild_contexts[gai_req->dns_idx];
490 
491 	if (0 == gai_resp->retcode) {
492 		/*
493 		 * If this query succeeded only after retrying, DNS may have
494 		 * just become responsive.
495 		 */
496 		if (gai_resp->retry > INITIAL_DNS_RETRY) {
497 			time_now = time(NULL);
498 			child_ctx->next_dns_timeslot = time_now;
499 			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
500 				  gai_req->dns_idx, humantime(time_now)));
501 		}
502 	} else {
503 		again = should_retry_dns(gai_resp->retcode,
504 					 gai_resp->gai_errno);
505 		/*
506 		 * exponential backoff of DNS retries to 64s
507 		 */
508 		if (gai_req->retry > 0 && again) {
509 			/* log the first retry only */
510 			if (INITIAL_DNS_RETRY == gai_req->retry)
511 				NLOG(NLOG_SYSINFO) {
512 					af = gai_req->hints.ai_family;
513 					fam_spec = (AF_INET6 == af)
514 						       ? " (AAAA)"
515 						       : (AF_INET == af)
516 							     ? " (A)"
517 							     : "";
518 #ifdef EAI_SYSTEM
519 					if (EAI_SYSTEM == gai_resp->retcode) {
520 						errno = gai_resp->gai_errno;
521 						msyslog(LOG_INFO,
522 							"retrying DNS %s%s: EAI_SYSTEM %d: %m",
523 							node, fam_spec,
524 							gai_resp->gai_errno);
525 					} else
526 #endif
527 						msyslog(LOG_INFO,
528 							"retrying DNS %s%s: %s (%d)",
529 							node, fam_spec,
530 							gai_strerror(gai_resp->retcode),
531 							gai_resp->retcode);
532 				}
533 			manage_dns_retry_interval(&gai_req->scheduled,
534 			    &gai_req->earliest, &gai_req->retry,
535 			    &child_ctx->next_dns_timeslot);
536 			if (!queue_blocking_request(
537 					BLOCKING_GETADDRINFO,
538 					gai_req,
539 					gai_req->octets,
540 					&getaddrinfo_sometime_complete,
541 					gai_req))
542 				return;
543 			else
544 				msyslog(LOG_ERR,
545 					"unable to retry hostname %s",
546 					node);
547 		}
548 	}
549 
550 	/*
551 	 * fixup pointers in returned addrinfo array
552 	 */
553 	ai = (void *)((char *)gai_resp + sizeof(*gai_resp));
554 	next_ai = NULL;
555 	for (i = gai_resp->ai_count - 1; i >= 0; i--) {
556 		ai[i].ai_next = next_ai;
557 		next_ai = &ai[i];
558 	}
559 
560 	psau = (void *)((char *)ai + gai_resp->ai_count * sizeof(*ai));
561 	canon_start = (char *)psau + gai_resp->ai_count * sizeof(*psau);
562 
563 	for (i = 0; i < gai_resp->ai_count; i++) {
564 		if (NULL != ai[i].ai_addr)
565 			ai[i].ai_addr = &psau->sa;
566 		psau++;
567 		if (NULL != ai[i].ai_canonname)
568 			ai[i].ai_canonname += (size_t)canon_start;
569 	}
570 
571 	NTP_ENSURE((char *)psau == canon_start);
572 
573 	if (!gai_resp->ai_count)
574 		ai = NULL;
575 
576 	(*gai_req->callback)(gai_resp->retcode, gai_resp->gai_errno,
577 			     gai_req->context, node, service,
578 			     &gai_req->hints, ai);
579 
580 	free(gai_req);
581 	/* gai_resp is part of block freed by process_blocking_resp() */
582 }
583 
584 
585 #ifdef TEST_BLOCKING_WORKER
586 void gai_test_callback(int rescode, int gai_errno, void *context, const char *name, const char *service, const struct addrinfo *hints, const struct addrinfo *ai_res)
587 {
588 	sockaddr_u addr;
589 
590 	if (rescode) {
591 		TRACE(1, ("gai_test_callback context %p error rescode %d %s serv %s\n",
592 			  context, rescode, name, service));
593 		return;
594 	}
595 	while (!rescode && NULL != ai_res) {
596 		ZERO_SOCK(&addr);
597 		memcpy(&addr, ai_res->ai_addr, ai_res->ai_addrlen);
598 		TRACE(1, ("ctx %p fam %d addr %s canon '%s' type %s at %p ai_addr %p ai_next %p\n",
599 			  context,
600 			  AF(&addr),
601 			  stoa(&addr),
602 			  (ai_res->ai_canonname)
603 			      ? ai_res->ai_canonname
604 			      : "",
605 			  (SOCK_DGRAM == ai_res->ai_socktype)
606 			      ? "DGRAM"
607 			      : (SOCK_STREAM == ai_res->ai_socktype)
608 				    ? "STREAM"
609 				    : "(other)",
610 			  ai_res,
611 			  ai_res->ai_addr,
612 			  ai_res->ai_next));
613 
614 		getnameinfo_sometime((sockaddr_u *)ai_res->ai_addr, 128, 32, 0, gni_test_callback, context);
615 
616 		ai_res = ai_res->ai_next;
617 	}
618 }
619 #endif	/* TEST_BLOCKING_WORKER */
620 
621 
622 int
623 getnameinfo_sometime(
624 	sockaddr_u *		psau,
625 	size_t			hostoctets,
626 	size_t			servoctets,
627 	int			flags,
628 	gni_sometime_callback	callback,
629 	void *			context
630 	)
631 {
632 	blocking_gni_req *	gni_req;
633 	u_int			idx;
634 	dnschild_ctx *		child_ctx;
635 	time_t			time_now;
636 
637 	NTP_REQUIRE(hostoctets);
638 	NTP_REQUIRE(hostoctets + servoctets < 1024);
639 
640 	idx = get_dnschild_ctx();
641 	child_ctx = dnschild_contexts[idx];
642 
643 	gni_req = emalloc_zero(sizeof(*gni_req));
644 
645 	gni_req->octets = sizeof(*gni_req);
646 	gni_req->dns_idx = idx;
647 	time_now = time(NULL);
648 	gni_req->scheduled = time_now;
649 	gni_req->earliest = max(time_now, child_ctx->next_dns_timeslot);
650 	child_ctx->next_dns_timeslot = gni_req->earliest;
651 	memcpy(&gni_req->socku, psau, SOCKLEN(psau));
652 	gni_req->hostoctets = hostoctets;
653 	gni_req->servoctets = servoctets;
654 	gni_req->flags = flags;
655 	gni_req->retry = INITIAL_DNS_RETRY;
656 	gni_req->callback = callback;
657 	gni_req->context = context;
658 
659 	if (queue_blocking_request(
660 		BLOCKING_GETNAMEINFO,
661 		gni_req,
662 		sizeof(*gni_req),
663 		&getnameinfo_sometime_complete,
664 		gni_req)) {
665 
666 		msyslog(LOG_ERR, "unable to queue getnameinfo request");
667 		errno = EFAULT;
668 		return -1;
669 	}
670 
671 	return 0;
672 }
673 
674 
675 int
676 blocking_getnameinfo(
677 	blocking_child *	c,
678 	blocking_pipe_header *	req
679 	)
680 {
681 	blocking_gni_req *	gni_req;
682 	dnsworker_ctx *		worker_ctx;
683 	blocking_pipe_header *	resp;
684 	blocking_gni_resp *	gni_resp;
685 	size_t			octets;
686 	size_t			resp_octets;
687 	char *			service;
688 	char *			cp;
689 	int			rc;
690 	time_t			time_now;
691 	char			host[1024];
692 
693 	gni_req = (void *)((char *)req + sizeof(*req));
694 
695 	octets = gni_req->hostoctets + gni_req->servoctets;
696 
697 	/*
698 	 * Some alloca() implementations are fragile regarding
699 	 * large allocations.  We only need room for the host
700 	 * and service names.
701 	 */
702 	NTP_REQUIRE(octets < sizeof(host));
703 	service = host + gni_req->hostoctets;
704 
705 	worker_ctx = get_worker_context(c, gni_req->dns_idx);
706 	scheduled_sleep(gni_req->scheduled, gni_req->earliest,
707 			worker_ctx);
708 	reload_resolv_conf(worker_ctx);
709 
710 	/*
711 	 * Take a shot at the final size, better to overestimate
712 	 * then realloc to a smaller size.
713 	 */
714 
715 	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
716 	resp = emalloc_zero(resp_octets);
717 	gni_resp = (void *)((char *)resp + sizeof(*resp));
718 
719 	TRACE(2, ("blocking_getnameinfo given addr %s flags 0x%x hostlen %lu servlen %lu\n",
720 		  stoa(&gni_req->socku), gni_req->flags,
721 		  (u_long)gni_req->hostoctets, (u_long)gni_req->servoctets));
722 
723 	gni_resp->retcode = getnameinfo(&gni_req->socku.sa,
724 					SOCKLEN(&gni_req->socku),
725 					host,
726 					gni_req->hostoctets,
727 					service,
728 					gni_req->servoctets,
729 					gni_req->flags);
730 	gni_resp->retry = gni_req->retry;
731 #ifdef EAI_SYSTEM
732 	if (EAI_SYSTEM == gni_resp->retcode)
733 		gni_resp->gni_errno = errno;
734 #endif
735 
736 	if (0 != gni_resp->retcode) {
737 		gni_resp->hostoctets = 0;
738 		gni_resp->servoctets = 0;
739 	} else {
740 		gni_resp->hostoctets = strlen(host) + 1;
741 		gni_resp->servoctets = strlen(service) + 1;
742 		/*
743 		 * If this query succeeded only after retrying, DNS may have
744 		 * just become responsive.  Ignore previously-scheduled
745 		 * retry sleeps once for each pending request, similar to
746 		 * the way scheduled_sleep() does when its worker_sleep()
747 		 * is interrupted.
748 		 */
749 		if (gni_req->retry > INITIAL_DNS_RETRY) {
750 			time_now = time(NULL);
751 			worker_ctx->ignore_scheduled_before = time_now;
752 			TRACE(1, ("DNS success after retrying, ignoring sleeps scheduled before now (%s)\n",
753 				humantime(time_now)));
754 		}
755 	}
756 	octets = gni_resp->hostoctets + gni_resp->servoctets;
757 	/*
758 	 * Our response consists of a header, followed by the host and
759 	 * service strings, each null-terminated.
760 	 */
761 	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
762 
763 	resp = erealloc(resp, resp_octets);
764 	gni_resp = (void *)(resp + 1);
765 
766 	gni_resp->octets = sizeof(*gni_resp) + octets;
767 
768 	/* cp serves as our current pointer while serializing */
769 	cp = (void *)(gni_resp + 1);
770 
771 	if (0 == gni_resp->retcode) {
772 		memcpy(cp, host, gni_resp->hostoctets);
773 		cp += gni_resp->hostoctets;
774 		memcpy(cp, service, gni_resp->servoctets);
775 		cp += gni_resp->servoctets;
776 	}
777 
778 	NTP_INSIST((size_t)(cp - (char *)resp) == resp_octets);
779 	NTP_INSIST(resp_octets - sizeof(*resp) == gni_resp->octets);
780 
781 	rc = queue_blocking_response(c, resp, resp_octets, req);
782 	if (rc)
783 		msyslog(LOG_ERR, "blocking_getnameinfo unable to queue response");
784 	return rc;
785 }
786 
787 
788 static void
789 getnameinfo_sometime_complete(
790 	blocking_work_req	rtype,
791 	void *			context,
792 	size_t			respsize,
793 	void *			resp
794 	)
795 {
796 	blocking_gni_req *	gni_req;
797 	blocking_gni_resp *	gni_resp;
798 	dnschild_ctx *		child_ctx;
799 	char *			host;
800 	char *			service;
801 	time_t			time_now;
802 	int			again;
803 
804 	gni_req = context;
805 	gni_resp = resp;
806 
807 	DEBUG_REQUIRE(BLOCKING_GETNAMEINFO == rtype);
808 	DEBUG_REQUIRE(respsize == gni_resp->octets);
809 
810 	child_ctx = dnschild_contexts[gni_req->dns_idx];
811 
812 	if (0 == gni_resp->retcode) {
813 		/*
814 		 * If this query succeeded only after retrying, DNS may have
815 		 * just become responsive.
816 		 */
817 		if (gni_resp->retry > INITIAL_DNS_RETRY) {
818 			time_now = time(NULL);
819 			child_ctx->next_dns_timeslot = time_now;
820 			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
821 				  gni_req->dns_idx, humantime(time_now)));
822 		}
823 	} else {
824 		again = should_retry_dns(gni_resp->retcode, gni_resp->gni_errno);
825 		/*
826 		 * exponential backoff of DNS retries to 64s
827 		 */
828 		if (gni_req->retry > 0)
829 			manage_dns_retry_interval(&gni_req->scheduled,
830 			    &gni_req->earliest, &gni_req->retry,
831 			    &child_ctx->next_dns_timeslot);
832 
833 		if (gni_req->retry > 0 && again) {
834 			if (!queue_blocking_request(
835 				BLOCKING_GETNAMEINFO,
836 				gni_req,
837 				gni_req->octets,
838 				&getnameinfo_sometime_complete,
839 				gni_req))
840 				return;
841 
842 			msyslog(LOG_ERR, "unable to retry reverse lookup of %s", stoa(&gni_req->socku));
843 		}
844 	}
845 
846 	if (!gni_resp->hostoctets) {
847 		host = NULL;
848 		service = NULL;
849 	} else {
850 		host = (char *)gni_resp + sizeof(*gni_resp);
851 		service = (gni_resp->servoctets)
852 			      ? host + gni_resp->hostoctets
853 			      : NULL;
854 	}
855 
856 	(*gni_req->callback)(gni_resp->retcode, gni_resp->gni_errno,
857 			     &gni_req->socku, gni_req->flags, host,
858 			     service, gni_req->context);
859 
860 	free(gni_req);
861 	/* gni_resp is part of block freed by process_blocking_resp() */
862 }
863 
864 
865 #ifdef TEST_BLOCKING_WORKER
866 void gni_test_callback(int rescode, int gni_errno, sockaddr_u *psau, int flags, const char *host, const char *service, void *context)
867 {
868 	if (!rescode)
869 		TRACE(1, ("gni_test_callback got host '%s' serv '%s' for addr %s context %p\n",
870 			  host, service, stoa(psau), context));
871 	else
872 		TRACE(1, ("gni_test_callback context %p rescode %d gni_errno %d flags 0x%x addr %s\n",
873 			  context, rescode, gni_errno, flags, stoa(psau)));
874 }
875 #endif	/* TEST_BLOCKING_WORKER */
876 
877 
878 #ifdef HAVE_RES_INIT
879 static void
880 reload_resolv_conf(
881 	dnsworker_ctx *	worker_ctx
882 	)
883 {
884 	time_t	time_now;
885 
886 	/*
887 	 * This is ad-hoc.  Reload /etc/resolv.conf once per minute
888 	 * to pick up on changes from the DHCP client.  [Bug 1226]
889 	 * When using threads for the workers, this needs to happen
890 	 * only once per minute process-wide.
891 	 */
892 	time_now = time(NULL);
893 # ifdef WORK_THREAD
894 	worker_ctx->next_res_init = next_res_init;
895 # endif
896 	if (worker_ctx->next_res_init <= time_now) {
897 		if (worker_ctx->next_res_init != 0)
898 			res_init();
899 		worker_ctx->next_res_init = time_now + 60;
900 # ifdef WORK_THREAD
901 		next_res_init = worker_ctx->next_res_init;
902 # endif
903 	}
904 }
905 #endif	/* HAVE_RES_INIT */
906 
907 
908 static u_int
909 reserve_dnschild_ctx(void)
910 {
911 	const size_t	ps = sizeof(dnschild_contexts[0]);
912 	const size_t	cs = sizeof(*dnschild_contexts[0]);
913 	u_int		c;
914 	u_int		new_alloc;
915 	size_t		octets;
916 	size_t		new_octets;
917 
918 	c = 0;
919 	while (TRUE) {
920 		for ( ; c < dnschild_contexts_alloc; c++) {
921 			if (NULL == dnschild_contexts[c]) {
922 				dnschild_contexts[c] = emalloc_zero(cs);
923 
924 				return c;
925 			}
926 		}
927 		new_alloc = dnschild_contexts_alloc + 20;
928 		new_octets = new_alloc * ps;
929 		octets = dnschild_contexts_alloc * ps;
930 		dnschild_contexts = erealloc_zero(dnschild_contexts,
931 						  new_octets, octets);
932 		dnschild_contexts_alloc = new_alloc;
933 	}
934 }
935 
936 
937 static u_int
938 get_dnschild_ctx(void)
939 {
940 	static u_int	shared_ctx = UINT_MAX;
941 
942 	if (worker_per_query)
943 		return reserve_dnschild_ctx();
944 
945 	if (UINT_MAX == shared_ctx)
946 		shared_ctx = reserve_dnschild_ctx();
947 
948 	return shared_ctx;
949 }
950 
951 
952 static void
953 alloc_dnsworker_context(
954 	u_int idx
955 	)
956 {
957 	const size_t worker_context_sz = sizeof(*dnsworker_contexts[0]);
958 
959 	REQUIRE(NULL == dnsworker_contexts[idx]);
960 	dnsworker_contexts[idx] = emalloc_zero(worker_context_sz);
961 }
962 
963 
964 static dnsworker_ctx *
965 get_worker_context(
966 	blocking_child *	c,
967 	u_int			idx
968 	)
969 {
970 	static size_t	ps = sizeof(dnsworker_contexts[0]);
971 	u_int	min_new_alloc;
972 	u_int	new_alloc;
973 	size_t	octets;
974 	size_t	new_octets;
975 
976 	if (dnsworker_contexts_alloc <= idx) {
977 		min_new_alloc = 1 + idx;
978 		/* round new_alloc up to nearest multiple of 4 */
979 		new_alloc = (min_new_alloc + 4) & ~(4 - 1);
980 		new_octets = new_alloc * ps;
981 		octets = dnsworker_contexts_alloc * ps;
982 		dnsworker_contexts = erealloc_zero(dnsworker_contexts,
983 						   new_octets, octets);
984 		dnsworker_contexts_alloc = new_alloc;
985 	}
986 
987 	if (NULL == dnsworker_contexts[idx])
988 		alloc_dnsworker_context(idx);
989 	ZERO(*dnsworker_contexts[idx]);
990 	dnsworker_contexts[idx]->c = c;
991 
992 	return dnsworker_contexts[idx];
993 }
994 
995 
996 static void
997 scheduled_sleep(
998 	time_t		scheduled,
999 	time_t		earliest,
1000 	dnsworker_ctx *	worker_ctx
1001 	)
1002 {
1003 	time_t now;
1004 
1005 	if (scheduled < worker_ctx->ignore_scheduled_before) {
1006 		TRACE(1, ("ignoring sleep until %s scheduled at %s (before %s)\n",
1007 			  humantime(earliest), humantime(scheduled),
1008 			  humantime(worker_ctx->ignore_scheduled_before)));
1009 		return;
1010 	}
1011 
1012 	now = time(NULL);
1013 
1014 	if (now < earliest) {
1015 		TRACE(1, ("sleep until %s scheduled at %s (>= %s)\n",
1016 			  humantime(earliest), humantime(scheduled),
1017 			  humantime(worker_ctx->ignore_scheduled_before)));
1018 		if (-1 == worker_sleep(worker_ctx->c, earliest - now)) {
1019 			/* our sleep was interrupted */
1020 			now = time(NULL);
1021 			worker_ctx->ignore_scheduled_before = now;
1022 #ifdef HAVE_RES_INIT
1023 			worker_ctx->next_res_init = now + 60;
1024 			next_res_init = worker_ctx->next_res_init;
1025 			res_init();
1026 #endif
1027 			TRACE(1, ("sleep interrupted by daemon, ignoring sleeps scheduled before now (%s)\n",
1028 				  humantime(worker_ctx->ignore_scheduled_before)));
1029 		}
1030 	}
1031 }
1032 
1033 
1034 /*
1035  * manage_dns_retry_interval is a helper used by
1036  * getaddrinfo_sometime_complete and getnameinfo_sometime_complete
1037  * to calculate the new retry interval and schedule the next query.
1038  */
1039 static void
1040 manage_dns_retry_interval(
1041 	time_t *	pscheduled,
1042 	time_t *	pwhen,
1043 	int *		pretry,
1044 	time_t *	pnext_timeslot
1045 	)
1046 {
1047 	time_t	now;
1048 	time_t	when;
1049 	int	retry;
1050 
1051 	now = time(NULL);
1052 	retry = *pretry;
1053 	when = max(now + retry, *pnext_timeslot);
1054 	*pnext_timeslot = when;
1055 	retry = min(64, retry << 1);
1056 
1057 	*pscheduled = now;
1058 	*pwhen = when;
1059 	*pretry = retry;
1060 }
1061 
1062 /*
1063  * should_retry_dns is a helper used by getaddrinfo_sometime_complete
1064  * and getnameinfo_sometime_complete which implements ntpd's DNS retry
1065  * policy.
1066  */
1067 static int
1068 should_retry_dns(
1069 	int	rescode,
1070 	int	res_errno
1071 	)
1072 {
1073 	static int	eai_again_seen;
1074 	int		again;
1075 #if defined (EAI_SYSTEM) && defined(DEBUG)
1076 	char		msg[256];
1077 #endif
1078 
1079 	/*
1080 	 * If the resolver failed, see if the failure is
1081 	 * temporary. If so, return success.
1082 	 */
1083 	again = 0;
1084 
1085 	switch (rescode) {
1086 
1087 	case EAI_FAIL:
1088 		again = 1;
1089 		break;
1090 
1091 	case EAI_AGAIN:
1092 		again = 1;
1093 		eai_again_seen = 1;		/* [Bug 1178] */
1094 		break;
1095 
1096 	case EAI_NONAME:
1097 #if defined(EAI_NODATA) && (EAI_NODATA != EAI_NONAME)
1098 	case EAI_NODATA:
1099 #endif
1100 		again = !eai_again_seen;	/* [Bug 1178] */
1101 		break;
1102 
1103 #ifdef EAI_SYSTEM
1104 	case EAI_SYSTEM:
1105 		/*
1106 		 * EAI_SYSTEM means the real error is in errno.  We should be more
1107 		 * discriminating about which errno values require retrying, but
1108 		 * this matches existing behavior.
1109 		 */
1110 		again = 1;
1111 # ifdef DEBUG
1112 		errno_to_str(res_errno, msg, sizeof(msg));
1113 		TRACE(1, ("intres: EAI_SYSTEM errno %d (%s) means try again, right?\n",
1114 			  res_errno, msg));
1115 # endif
1116 		break;
1117 #endif
1118 	}
1119 
1120 	TRACE(2, ("intres: resolver returned: %s (%d), %sretrying\n",
1121 		  gai_strerror(rescode), rescode, again ? "" : "not "));
1122 
1123 	return again;
1124 }
1125 
1126 #else	/* !WORKER follows */
1127 int ntp_intres_nonempty_compilation_unit;
1128 #endif
1129