1 /*
2  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3  *
4  * This Source Code Form is subject to the terms of the Mozilla Public
5  * License, v. 2.0. If a copy of the MPL was not distributed with this
6  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
7  *
8  * See the COPYRIGHT file distributed with this work for additional
9  * information regarding copyright ownership.
10  */
11 
12 #include <libgen.h>
13 #include <unistd.h>
14 #include <uv.h>
15 
16 #include <isc/atomic.h>
17 #include <isc/barrier.h>
18 #include <isc/buffer.h>
19 #include <isc/condition.h>
20 #include <isc/errno.h>
21 #include <isc/log.h>
22 #include <isc/magic.h>
23 #include <isc/mem.h>
24 #include <isc/netmgr.h>
25 #include <isc/quota.h>
26 #include <isc/random.h>
27 #include <isc/refcount.h>
28 #include <isc/region.h>
29 #include <isc/result.h>
30 #include <isc/sockaddr.h>
31 #include <isc/stdtime.h>
32 #include <isc/thread.h>
33 #include <isc/util.h>
34 
35 #include "netmgr-int.h"
36 #include "uv-compat.h"
37 
38 /*%<
39  *
40  * Maximum number of simultaneous handles in flight supported for a single
41  * connected TCPDNS socket. This value was chosen arbitrarily, and may be
42  * changed in the future.
43  */
44 
45 static atomic_uint_fast32_t last_tcpdnsquota_log = ATOMIC_VAR_INIT(0);
46 
47 static bool
can_log_tcpdns_quota(void)48 can_log_tcpdns_quota(void) {
49 	isc_stdtime_t now, last;
50 
51 	isc_stdtime_get(&now);
52 	last = atomic_exchange_relaxed(&last_tcpdnsquota_log, now);
53 	if (now != last) {
54 		return (true);
55 	}
56 
57 	return (false);
58 }
59 
60 static isc_result_t
61 tcpdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req);
62 
63 static void
64 tcpdns_close_direct(isc_nmsocket_t *sock);
65 
66 static void
67 tcpdns_connect_cb(uv_connect_t *uvreq, int status);
68 
69 static void
70 tcpdns_connection_cb(uv_stream_t *server, int status);
71 
72 static void
73 tcpdns_close_cb(uv_handle_t *uvhandle);
74 
75 static isc_result_t
76 accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota);
77 
78 static void
79 quota_accept_cb(isc_quota_t *quota, void *sock0);
80 
81 static void
82 stop_tcpdns_parent(isc_nmsocket_t *sock);
83 static void
84 stop_tcpdns_child(isc_nmsocket_t *sock);
85 
86 static isc_result_t
tcpdns_connect_direct(isc_nmsocket_t * sock,isc__nm_uvreq_t * req)87 tcpdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
88 	isc__networker_t *worker = NULL;
89 	isc_result_t result = ISC_R_UNSET;
90 	int r;
91 
92 	REQUIRE(VALID_NMSOCK(sock));
93 	REQUIRE(VALID_UVREQ(req));
94 
95 	REQUIRE(isc__nm_in_netthread());
96 	REQUIRE(sock->tid == isc_nm_tid());
97 
98 	worker = &sock->mgr->workers[sock->tid];
99 
100 	atomic_store(&sock->connecting, true);
101 
102 	r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp);
103 	RUNTIME_CHECK(r == 0);
104 	uv_handle_set_data(&sock->uv_handle.handle, sock);
105 
106 	r = uv_timer_init(&worker->loop, &sock->timer);
107 	RUNTIME_CHECK(r == 0);
108 
109 	if (isc__nm_closing(sock)) {
110 		result = ISC_R_SHUTTINGDOWN;
111 		goto error;
112 	}
113 
114 	r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
115 	if (r != 0) {
116 		isc__nm_closesocket(sock->fd);
117 		isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]);
118 		goto done;
119 	}
120 	isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPEN]);
121 
122 	if (req->local.length != 0) {
123 		r = uv_tcp_bind(&sock->uv_handle.tcp, &req->local.type.sa, 0);
124 		/*
125 		 * In case of shared socket UV_EINVAL will be returned and needs
126 		 * to be ignored
127 		 */
128 		if (r != 0 && r != UV_EINVAL) {
129 			isc__nm_incstats(sock->mgr,
130 					 sock->statsindex[STATID_BINDFAIL]);
131 			goto done;
132 		}
133 	}
134 
135 	isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle);
136 
137 	uv_handle_set_data(&req->uv_req.handle, req);
138 	r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp,
139 			   &req->peer.type.sa, tcpdns_connect_cb);
140 	if (r != 0) {
141 		isc__nm_incstats(sock->mgr,
142 				 sock->statsindex[STATID_CONNECTFAIL]);
143 		goto done;
144 	}
145 	isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CONNECT]);
146 
147 	uv_handle_set_data((uv_handle_t *)&sock->timer, &req->uv_req.connect);
148 	isc__nmsocket_timer_start(sock);
149 
150 	atomic_store(&sock->connected, true);
151 
152 done:
153 	result = isc__nm_uverr2result(r);
154 error:
155 	LOCK(&sock->lock);
156 	sock->result = result;
157 	SIGNAL(&sock->cond);
158 	if (!atomic_load(&sock->active)) {
159 		WAIT(&sock->scond, &sock->lock);
160 	}
161 	INSIST(atomic_load(&sock->active));
162 	UNLOCK(&sock->lock);
163 
164 	return (result);
165 }
166 
167 void
isc__nm_async_tcpdnsconnect(isc__networker_t * worker,isc__netievent_t * ev0)168 isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0) {
169 	isc__netievent_tcpdnsconnect_t *ievent =
170 		(isc__netievent_tcpdnsconnect_t *)ev0;
171 	isc_nmsocket_t *sock = ievent->sock;
172 	isc__nm_uvreq_t *req = ievent->req;
173 	isc_result_t result = ISC_R_SUCCESS;
174 
175 	UNUSED(worker);
176 
177 	REQUIRE(VALID_NMSOCK(sock));
178 	REQUIRE(sock->type == isc_nm_tcpdnssocket);
179 	REQUIRE(sock->parent == NULL);
180 	REQUIRE(sock->tid == isc_nm_tid());
181 
182 	result = tcpdns_connect_direct(sock, req);
183 	if (result != ISC_R_SUCCESS) {
184 		isc__nmsocket_clearcb(sock);
185 		isc__nm_connectcb(sock, req, result, true);
186 		atomic_store(&sock->active, false);
187 		isc__nm_tcpdns_close(sock);
188 	}
189 
190 	/*
191 	 * The sock is now attached to the handle.
192 	 */
193 	isc__nmsocket_detach(&sock);
194 }
195 
196 static void
tcpdns_connect_cb(uv_connect_t * uvreq,int status)197 tcpdns_connect_cb(uv_connect_t *uvreq, int status) {
198 	isc_result_t result;
199 	isc__nm_uvreq_t *req = NULL;
200 	isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle);
201 	struct sockaddr_storage ss;
202 	int r;
203 
204 	REQUIRE(VALID_NMSOCK(sock));
205 	REQUIRE(sock->tid == isc_nm_tid());
206 
207 	isc__nmsocket_timer_stop(sock);
208 	uv_handle_set_data((uv_handle_t *)&sock->timer, sock);
209 
210 	if (!atomic_load(&sock->connecting)) {
211 		return;
212 	}
213 
214 	req = uv_handle_get_data((uv_handle_t *)uvreq);
215 
216 	REQUIRE(VALID_UVREQ(req));
217 	REQUIRE(VALID_NMHANDLE(req->handle));
218 
219 	if (isc__nmsocket_closing(sock)) {
220 		/* Network manager shutting down */
221 		result = ISC_R_SHUTTINGDOWN;
222 		goto error;
223 	} else if (isc__nmsocket_closing(sock)) {
224 		/* Connection canceled */
225 		result = ISC_R_CANCELED;
226 		goto error;
227 	} else if (status == UV_ETIMEDOUT) {
228 		/* Timeout status code here indicates hard error */
229 		result = ISC_R_CANCELED;
230 		goto error;
231 	} else if (status != 0) {
232 		result = isc__nm_uverr2result(status);
233 		goto error;
234 	}
235 
236 	isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CONNECT]);
237 	r = uv_tcp_getpeername(&sock->uv_handle.tcp, (struct sockaddr *)&ss,
238 			       &(int){ sizeof(ss) });
239 	if (r != 0) {
240 		result = isc__nm_uverr2result(r);
241 		goto error;
242 	}
243 
244 	atomic_store(&sock->connecting, false);
245 
246 	result = isc_sockaddr_fromsockaddr(&sock->peer, (struct sockaddr *)&ss);
247 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
248 
249 	isc__nm_connectcb(sock, req, ISC_R_SUCCESS, false);
250 
251 	return;
252 
253 error:
254 	isc__nm_failed_connect_cb(sock, req, result, false);
255 }
256 
257 void
isc_nm_tcpdnsconnect(isc_nm_t * mgr,isc_sockaddr_t * local,isc_sockaddr_t * peer,isc_nm_cb_t cb,void * cbarg,unsigned int timeout,size_t extrahandlesize)258 isc_nm_tcpdnsconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer,
259 		     isc_nm_cb_t cb, void *cbarg, unsigned int timeout,
260 		     size_t extrahandlesize) {
261 	isc_result_t result = ISC_R_SUCCESS;
262 	isc_nmsocket_t *sock = NULL;
263 	isc__netievent_tcpdnsconnect_t *ievent = NULL;
264 	isc__nm_uvreq_t *req = NULL;
265 	sa_family_t sa_family;
266 
267 	REQUIRE(VALID_NM(mgr));
268 	REQUIRE(local != NULL);
269 	REQUIRE(peer != NULL);
270 
271 	sa_family = peer->type.sa.sa_family;
272 
273 	sock = isc_mem_get(mgr->mctx, sizeof(*sock));
274 	isc__nmsocket_init(sock, mgr, isc_nm_tcpdnssocket, local);
275 
276 	sock->extrahandlesize = extrahandlesize;
277 	sock->connect_timeout = timeout;
278 	sock->result = ISC_R_UNSET;
279 	atomic_init(&sock->client, true);
280 
281 	req = isc__nm_uvreq_get(mgr, sock);
282 	req->cb.connect = cb;
283 	req->cbarg = cbarg;
284 	req->peer = *peer;
285 	req->local = *local;
286 	req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface);
287 
288 	result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock->fd);
289 	if (result != ISC_R_SUCCESS) {
290 		if (isc__nm_in_netthread()) {
291 			sock->tid = isc_nm_tid();
292 		}
293 		isc__nmsocket_clearcb(sock);
294 		isc__nm_connectcb(sock, req, result, true);
295 		atomic_store(&sock->closed, true);
296 		isc__nmsocket_detach(&sock);
297 		return;
298 	}
299 
300 	/* 2 minute timeout */
301 	result = isc__nm_socket_connectiontimeout(sock->fd, 120 * 1000);
302 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
303 
304 	ievent = isc__nm_get_netievent_tcpdnsconnect(mgr, sock, req);
305 
306 	if (isc__nm_in_netthread()) {
307 		atomic_store(&sock->active, true);
308 		sock->tid = isc_nm_tid();
309 		isc__nm_async_tcpdnsconnect(&mgr->workers[sock->tid],
310 					    (isc__netievent_t *)ievent);
311 		isc__nm_put_netievent_tcpdnsconnect(mgr, ievent);
312 	} else {
313 		atomic_init(&sock->active, false);
314 		sock->tid = isc_random_uniform(mgr->nworkers);
315 		isc__nm_enqueue_ievent(&mgr->workers[sock->tid],
316 				       (isc__netievent_t *)ievent);
317 	}
318 
319 	LOCK(&sock->lock);
320 	while (sock->result == ISC_R_UNSET) {
321 		WAIT(&sock->cond, &sock->lock);
322 	}
323 	atomic_store(&sock->active, true);
324 	BROADCAST(&sock->scond);
325 	UNLOCK(&sock->lock);
326 }
327 
328 static uv_os_sock_t
isc__nm_tcpdns_lb_socket(sa_family_t sa_family)329 isc__nm_tcpdns_lb_socket(sa_family_t sa_family) {
330 	isc_result_t result;
331 	uv_os_sock_t sock;
332 
333 	result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock);
334 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
335 
336 	(void)isc__nm_socket_incoming_cpu(sock);
337 
338 	/* FIXME: set mss */
339 
340 	result = isc__nm_socket_reuse(sock);
341 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
342 
343 #if HAVE_SO_REUSEPORT_LB
344 	result = isc__nm_socket_reuse_lb(sock);
345 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
346 #endif
347 
348 	return (sock);
349 }
350 
351 static void
enqueue_stoplistening(isc_nmsocket_t * sock)352 enqueue_stoplistening(isc_nmsocket_t *sock) {
353 	isc__netievent_tcpdnsstop_t *ievent =
354 		isc__nm_get_netievent_tcpdnsstop(sock->mgr, sock);
355 	isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
356 			       (isc__netievent_t *)ievent);
357 }
358 
359 static void
start_tcpdns_child(isc_nm_t * mgr,isc_sockaddr_t * iface,isc_nmsocket_t * sock,uv_os_sock_t fd,int tid)360 start_tcpdns_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock,
361 		   uv_os_sock_t fd, int tid) {
362 	isc__netievent_tcpdnslisten_t *ievent = NULL;
363 	isc_nmsocket_t *csock = &sock->children[tid];
364 
365 	isc__nmsocket_init(csock, mgr, isc_nm_tcpdnssocket, iface);
366 	csock->parent = sock;
367 	csock->accept_cb = sock->accept_cb;
368 	csock->accept_cbarg = sock->accept_cbarg;
369 	csock->recv_cb = sock->recv_cb;
370 	csock->recv_cbarg = sock->recv_cbarg;
371 	csock->extrahandlesize = sock->extrahandlesize;
372 	csock->backlog = sock->backlog;
373 	csock->tid = tid;
374 	/*
375 	 * We don't attach to quota, just assign - to avoid
376 	 * increasing quota unnecessarily.
377 	 */
378 	csock->pquota = sock->pquota;
379 	isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock);
380 
381 #if HAVE_SO_REUSEPORT_LB
382 	UNUSED(fd);
383 	csock->fd = isc__nm_tcpdns_lb_socket(iface->type.sa.sa_family);
384 #else
385 	csock->fd = dup(fd);
386 #endif
387 	REQUIRE(csock->fd >= 0);
388 
389 	ievent = isc__nm_get_netievent_tcpdnslisten(mgr, csock);
390 	isc__nm_maybe_enqueue_ievent(&mgr->workers[tid],
391 				     (isc__netievent_t *)ievent);
392 }
393 isc_result_t
isc_nm_listentcpdns(isc_nm_t * mgr,isc_sockaddr_t * iface,isc_nm_recv_cb_t recv_cb,void * recv_cbarg,isc_nm_accept_cb_t accept_cb,void * accept_cbarg,size_t extrahandlesize,int backlog,isc_quota_t * quota,isc_nmsocket_t ** sockp)394 isc_nm_listentcpdns(isc_nm_t *mgr, isc_sockaddr_t *iface,
395 		    isc_nm_recv_cb_t recv_cb, void *recv_cbarg,
396 		    isc_nm_accept_cb_t accept_cb, void *accept_cbarg,
397 		    size_t extrahandlesize, int backlog, isc_quota_t *quota,
398 		    isc_nmsocket_t **sockp) {
399 	isc_result_t result = ISC_R_SUCCESS;
400 	isc_nmsocket_t *sock = NULL;
401 	size_t children_size = 0;
402 	uv_os_sock_t fd = -1;
403 
404 	REQUIRE(VALID_NM(mgr));
405 
406 	sock = isc_mem_get(mgr->mctx, sizeof(*sock));
407 	isc__nmsocket_init(sock, mgr, isc_nm_tcpdnslistener, iface);
408 
409 	atomic_init(&sock->rchildren, 0);
410 	sock->nchildren = mgr->nworkers;
411 	children_size = sock->nchildren * sizeof(sock->children[0]);
412 	sock->children = isc_mem_get(mgr->mctx, children_size);
413 	memset(sock->children, 0, children_size);
414 
415 	sock->result = ISC_R_UNSET;
416 	sock->accept_cb = accept_cb;
417 	sock->accept_cbarg = accept_cbarg;
418 	sock->recv_cb = recv_cb;
419 	sock->recv_cbarg = recv_cbarg;
420 	sock->extrahandlesize = extrahandlesize;
421 	sock->backlog = backlog;
422 	sock->pquota = quota;
423 
424 	sock->tid = 0;
425 	sock->fd = -1;
426 
427 #if !HAVE_SO_REUSEPORT_LB
428 	fd = isc__nm_tcpdns_lb_socket(iface->type.sa.sa_family);
429 #endif
430 
431 	isc_barrier_init(&sock->startlistening, sock->nchildren);
432 
433 	for (size_t i = 0; i < sock->nchildren; i++) {
434 		if ((int)i == isc_nm_tid()) {
435 			continue;
436 		}
437 		start_tcpdns_child(mgr, iface, sock, fd, i);
438 	}
439 
440 	if (isc__nm_in_netthread()) {
441 		start_tcpdns_child(mgr, iface, sock, fd, isc_nm_tid());
442 	}
443 
444 #if !HAVE_SO_REUSEPORT_LB
445 	isc__nm_closesocket(fd);
446 #endif
447 
448 	LOCK(&sock->lock);
449 	while (atomic_load(&sock->rchildren) != sock->nchildren) {
450 		WAIT(&sock->cond, &sock->lock);
451 	}
452 	result = sock->result;
453 	atomic_store(&sock->active, true);
454 	UNLOCK(&sock->lock);
455 
456 	INSIST(result != ISC_R_UNSET);
457 
458 	if (result == ISC_R_SUCCESS) {
459 		REQUIRE(atomic_load(&sock->rchildren) == sock->nchildren);
460 		*sockp = sock;
461 	} else {
462 		atomic_store(&sock->active, false);
463 		enqueue_stoplistening(sock);
464 		isc_nmsocket_close(&sock);
465 	}
466 
467 	return (result);
468 }
469 
470 void
isc__nm_async_tcpdnslisten(isc__networker_t * worker,isc__netievent_t * ev0)471 isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) {
472 	isc__netievent_tcpdnslisten_t *ievent =
473 		(isc__netievent_tcpdnslisten_t *)ev0;
474 	sa_family_t sa_family;
475 	int r;
476 	int flags = 0;
477 	isc_nmsocket_t *sock = NULL;
478 	isc_result_t result = ISC_R_UNSET;
479 
480 	REQUIRE(VALID_NMSOCK(ievent->sock));
481 	REQUIRE(ievent->sock->tid == isc_nm_tid());
482 	REQUIRE(VALID_NMSOCK(ievent->sock->parent));
483 
484 	sock = ievent->sock;
485 	sa_family = sock->iface.type.sa.sa_family;
486 
487 	REQUIRE(sock->type == isc_nm_tcpdnssocket);
488 	REQUIRE(sock->parent != NULL);
489 	REQUIRE(sock->tid == isc_nm_tid());
490 
491 	/* TODO: set min mss */
492 
493 	r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp);
494 	RUNTIME_CHECK(r == 0);
495 	uv_handle_set_data(&sock->uv_handle.handle, sock);
496 	/* This keeps the socket alive after everything else is gone */
497 	isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL });
498 
499 	r = uv_timer_init(&worker->loop, &sock->timer);
500 	RUNTIME_CHECK(r == 0);
501 	uv_handle_set_data((uv_handle_t *)&sock->timer, sock);
502 
503 	LOCK(&sock->parent->lock);
504 
505 	r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
506 	if (r < 0) {
507 		isc__nm_closesocket(sock->fd);
508 		isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]);
509 		goto done;
510 	}
511 	isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPEN]);
512 
513 	if (sa_family == AF_INET6) {
514 		flags = UV_TCP_IPV6ONLY;
515 	}
516 
517 #if HAVE_SO_REUSEPORT_LB
518 	r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa,
519 				flags);
520 	if (r < 0) {
521 		isc__nm_incstats(sock->mgr, sock->statsindex[STATID_BINDFAIL]);
522 		goto done;
523 	}
524 #else
525 	if (sock->parent->fd == -1) {
526 		r = isc_uv_tcp_freebind(&sock->uv_handle.tcp,
527 					&sock->iface.type.sa, flags);
528 		if (r < 0) {
529 			isc__nm_incstats(sock->mgr,
530 					 sock->statsindex[STATID_BINDFAIL]);
531 			goto done;
532 		}
533 		sock->parent->uv_handle.tcp.flags = sock->uv_handle.tcp.flags;
534 		sock->parent->fd = sock->fd;
535 	} else {
536 		/* The socket is already bound, just copy the flags */
537 		sock->uv_handle.tcp.flags = sock->parent->uv_handle.tcp.flags;
538 	}
539 #endif
540 
541 	isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle);
542 
543 	/*
544 	 * The callback will run in the same thread uv_listen() was called
545 	 * from, so a race with tcpdns_connection_cb() isn't possible.
546 	 */
547 	r = uv_listen((uv_stream_t *)&sock->uv_handle.tcp, sock->backlog,
548 		      tcpdns_connection_cb);
549 	if (r != 0) {
550 		isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
551 			      ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR,
552 			      "uv_listen failed: %s",
553 			      isc_result_totext(isc__nm_uverr2result(r)));
554 		isc__nm_incstats(sock->mgr, sock->statsindex[STATID_BINDFAIL]);
555 		goto done;
556 	}
557 
558 	atomic_store(&sock->listening, true);
559 
560 done:
561 	result = isc__nm_uverr2result(r);
562 	if (result != ISC_R_SUCCESS) {
563 		sock->pquota = NULL;
564 	}
565 
566 	atomic_fetch_add(&sock->parent->rchildren, 1);
567 	if (sock->parent->result == ISC_R_UNSET) {
568 		sock->parent->result = result;
569 	}
570 	SIGNAL(&sock->parent->cond);
571 	UNLOCK(&sock->parent->lock);
572 
573 	isc_barrier_wait(&sock->parent->startlistening);
574 }
575 
576 static void
tcpdns_connection_cb(uv_stream_t * server,int status)577 tcpdns_connection_cb(uv_stream_t *server, int status) {
578 	isc_nmsocket_t *ssock = uv_handle_get_data((uv_handle_t *)server);
579 	isc_result_t result;
580 	isc_quota_t *quota = NULL;
581 
582 	if (status != 0) {
583 		result = isc__nm_uverr2result(status);
584 		goto done;
585 	}
586 
587 	REQUIRE(VALID_NMSOCK(ssock));
588 	REQUIRE(ssock->tid == isc_nm_tid());
589 
590 	if (isc__nmsocket_closing(ssock)) {
591 		result = ISC_R_CANCELED;
592 		goto done;
593 	}
594 
595 	if (ssock->pquota != NULL) {
596 		result = isc_quota_attach_cb(ssock->pquota, &quota,
597 					     &ssock->quotacb);
598 		if (result == ISC_R_QUOTA) {
599 			isc__nm_incstats(ssock->mgr,
600 					 ssock->statsindex[STATID_ACCEPTFAIL]);
601 			return;
602 		}
603 	}
604 
605 	result = accept_connection(ssock, quota);
606 done:
607 	if (result != ISC_R_SUCCESS && result != ISC_R_NOCONN) {
608 		if ((result != ISC_R_QUOTA && result != ISC_R_SOFTQUOTA) ||
609 		    can_log_tcpdns_quota())
610 		{
611 			isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
612 				      ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR,
613 				      "TCP connection failed: %s",
614 				      isc_result_totext(result));
615 		}
616 	}
617 }
618 
619 void
isc__nm_tcpdns_stoplistening(isc_nmsocket_t * sock)620 isc__nm_tcpdns_stoplistening(isc_nmsocket_t *sock) {
621 	REQUIRE(VALID_NMSOCK(sock));
622 	REQUIRE(sock->type == isc_nm_tcpdnslistener);
623 
624 	if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
625 					    true)) {
626 		INSIST(0);
627 		ISC_UNREACHABLE();
628 	}
629 
630 	if (!isc__nm_in_netthread()) {
631 		enqueue_stoplistening(sock);
632 	} else {
633 		stop_tcpdns_parent(sock);
634 	}
635 }
636 
637 void
isc__nm_async_tcpdnsstop(isc__networker_t * worker,isc__netievent_t * ev0)638 isc__nm_async_tcpdnsstop(isc__networker_t *worker, isc__netievent_t *ev0) {
639 	isc__netievent_tcpdnsstop_t *ievent =
640 		(isc__netievent_tcpdnsstop_t *)ev0;
641 	isc_nmsocket_t *sock = ievent->sock;
642 
643 	UNUSED(worker);
644 
645 	REQUIRE(VALID_NMSOCK(sock));
646 	REQUIRE(sock->tid == isc_nm_tid());
647 
648 	if (sock->parent != NULL) {
649 		stop_tcpdns_child(sock);
650 		return;
651 	}
652 
653 	stop_tcpdns_parent(sock);
654 }
655 
656 void
isc__nm_tcpdns_failed_read_cb(isc_nmsocket_t * sock,isc_result_t result)657 isc__nm_tcpdns_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result) {
658 	REQUIRE(VALID_NMSOCK(sock));
659 	REQUIRE(result != ISC_R_SUCCESS);
660 
661 	isc__nmsocket_timer_stop(sock);
662 	isc__nm_stop_reading(sock);
663 
664 	if (!sock->recv_read) {
665 		goto destroy;
666 	}
667 	sock->recv_read = false;
668 
669 	if (sock->recv_cb != NULL) {
670 		isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL);
671 		isc__nmsocket_clearcb(sock);
672 		isc__nm_readcb(sock, req, result);
673 	}
674 
675 destroy:
676 	isc__nmsocket_prep_destroy(sock);
677 
678 	/*
679 	 * We need to detach from quota after the read callback function had a
680 	 * chance to be executed.
681 	 */
682 	if (sock->quota != NULL) {
683 		isc_quota_detach(&sock->quota);
684 	}
685 }
686 
687 void
isc__nm_tcpdns_read(isc_nmhandle_t * handle,isc_nm_recv_cb_t cb,void * cbarg)688 isc__nm_tcpdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
689 	REQUIRE(VALID_NMHANDLE(handle));
690 	REQUIRE(VALID_NMSOCK(handle->sock));
691 
692 	isc_nmsocket_t *sock = handle->sock;
693 	isc__netievent_tcpdnsread_t *ievent = NULL;
694 
695 	REQUIRE(sock->type == isc_nm_tcpdnssocket);
696 	REQUIRE(sock->statichandle == handle);
697 
698 	sock->recv_cb = cb;
699 	sock->recv_cbarg = cbarg;
700 	sock->recv_read = true;
701 	if (sock->read_timeout == 0) {
702 		sock->read_timeout =
703 			(atomic_load(&sock->keepalive)
704 				 ? atomic_load(&sock->mgr->keepalive)
705 				 : atomic_load(&sock->mgr->idle));
706 	}
707 
708 	ievent = isc__nm_get_netievent_tcpdnsread(sock->mgr, sock);
709 
710 	/*
711 	 * This MUST be done asynchronously, no matter which thread we're
712 	 * in. The callback function for isc_nm_read() often calls
713 	 * isc_nm_read() again; if we tried to do that synchronously
714 	 * we'd clash in processbuffer() and grow the stack indefinitely.
715 	 */
716 	isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
717 			       (isc__netievent_t *)ievent);
718 
719 	return;
720 }
721 
722 void
isc__nm_async_tcpdnsread(isc__networker_t * worker,isc__netievent_t * ev0)723 isc__nm_async_tcpdnsread(isc__networker_t *worker, isc__netievent_t *ev0) {
724 	isc__netievent_tcpdnsread_t *ievent =
725 		(isc__netievent_tcpdnsread_t *)ev0;
726 	isc_nmsocket_t *sock = ievent->sock;
727 
728 	UNUSED(worker);
729 
730 	REQUIRE(VALID_NMSOCK(sock));
731 	REQUIRE(sock->tid == isc_nm_tid());
732 
733 	if (isc__nmsocket_closing(sock)) {
734 		atomic_store(&sock->reading, true);
735 		isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
736 		return;
737 	}
738 
739 	isc__nm_process_sock_buffer(sock);
740 }
741 
742 /*
743  * Process a single packet from the incoming buffer.
744  *
745  * Return ISC_R_SUCCESS and attach 'handlep' to a handle if something
746  * was processed; return ISC_R_NOMORE if there isn't a full message
747  * to be processed.
748  *
749  * The caller will need to unreference the handle.
750  */
751 isc_result_t
isc__nm_tcpdns_processbuffer(isc_nmsocket_t * sock)752 isc__nm_tcpdns_processbuffer(isc_nmsocket_t *sock) {
753 	size_t len;
754 	isc__nm_uvreq_t *req = NULL;
755 	isc_nmhandle_t *handle = NULL;
756 
757 	REQUIRE(VALID_NMSOCK(sock));
758 	REQUIRE(sock->tid == isc_nm_tid());
759 
760 	if (isc__nmsocket_closing(sock)) {
761 		return (ISC_R_CANCELED);
762 	}
763 
764 	/*
765 	 * If we don't even have the length yet, we can't do
766 	 * anything.
767 	 */
768 	if (sock->buf_len < 2) {
769 		return (ISC_R_NOMORE);
770 	}
771 
772 	/*
773 	 * Process the first packet from the buffer, leaving
774 	 * the rest (if any) for later.
775 	 */
776 	len = ntohs(*(uint16_t *)sock->buf);
777 	if (len > sock->buf_len - 2) {
778 		return (ISC_R_NOMORE);
779 	}
780 
781 	req = isc__nm_get_read_req(sock, NULL);
782 	REQUIRE(VALID_UVREQ(req));
783 
784 	/*
785 	 * We need to launch isc__nm_resume_processing() after the buffer
786 	 * has been consumed, thus we must delay detaching the handle.
787 	 */
788 	isc_nmhandle_attach(req->handle, &handle);
789 
790 	/*
791 	 * The callback will be called synchronously because the
792 	 * result is ISC_R_SUCCESS, so we don't need to have
793 	 * the buffer on the heap
794 	 */
795 	req->uvbuf.base = (char *)sock->buf + 2;
796 	req->uvbuf.len = len;
797 
798 	/*
799 	 * If isc__nm_tcpdns_read() was called, it will be satisfied by single
800 	 * DNS message in the next call.
801 	 */
802 	sock->recv_read = false;
803 
804 	/*
805 	 * An assertion failure here means that there's an erroneous
806 	 * extra nmhandle detach happening in the callback and
807 	 * isc__nm_resume_processing() is called while we're
808 	 * processing the buffer.
809 	 */
810 	REQUIRE(sock->processing == false);
811 	sock->processing = true;
812 	isc__nm_readcb(sock, req, ISC_R_SUCCESS);
813 	sock->processing = false;
814 
815 	len += 2;
816 	sock->buf_len -= len;
817 	if (sock->buf_len > 0) {
818 		memmove(sock->buf, sock->buf + len, sock->buf_len);
819 	}
820 
821 	isc_nmhandle_detach(&handle);
822 
823 	return (ISC_R_SUCCESS);
824 }
825 
826 void
isc__nm_tcpdns_read_cb(uv_stream_t * stream,ssize_t nread,const uv_buf_t * buf)827 isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread,
828 		       const uv_buf_t *buf) {
829 	isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)stream);
830 	uint8_t *base = NULL;
831 	size_t len;
832 
833 	REQUIRE(VALID_NMSOCK(sock));
834 	REQUIRE(sock->tid == isc_nm_tid());
835 	REQUIRE(atomic_load(&sock->reading));
836 	REQUIRE(buf != NULL);
837 
838 	if (isc__nmsocket_closing(sock)) {
839 		isc__nm_failed_read_cb(sock, ISC_R_CANCELED, true);
840 		goto free;
841 	}
842 
843 	if (nread < 0) {
844 		if (nread != UV_EOF) {
845 			isc__nm_incstats(sock->mgr,
846 					 sock->statsindex[STATID_RECVFAIL]);
847 		}
848 
849 		isc__nm_failed_read_cb(sock, isc__nm_uverr2result(nread), true);
850 		goto free;
851 	}
852 
853 	base = (uint8_t *)buf->base;
854 	len = nread;
855 
856 	/*
857 	 * FIXME: We can avoid the memmove here if we know we have received full
858 	 * packet; e.g. we should be smarter, a.s. there are just few situations
859 	 *
860 	 * The tcp_alloc_buf should be smarter and point the uv_read_start to
861 	 * the position where previous read has ended in the sock->buf, that way
862 	 * the data could be read directly into sock->buf.
863 	 */
864 
865 	if (sock->buf_len + len > sock->buf_size) {
866 		isc__nm_alloc_dnsbuf(sock, sock->buf_len + len);
867 	}
868 	memmove(sock->buf + sock->buf_len, base, len);
869 	sock->buf_len += len;
870 
871 	if (!atomic_load(&sock->client)) {
872 		sock->read_timeout = atomic_load(&sock->mgr->idle);
873 	}
874 
875 	isc__nm_process_sock_buffer(sock);
876 free:
877 	isc__nm_free_uvbuf(sock, buf);
878 }
879 
880 static void
quota_accept_cb(isc_quota_t * quota,void * sock0)881 quota_accept_cb(isc_quota_t *quota, void *sock0) {
882 	isc_nmsocket_t *sock = (isc_nmsocket_t *)sock0;
883 
884 	REQUIRE(VALID_NMSOCK(sock));
885 
886 	/*
887 	 * Create a tcpdnsaccept event and pass it using the async channel.
888 	 */
889 
890 	isc__netievent_tcpdnsaccept_t *ievent =
891 		isc__nm_get_netievent_tcpdnsaccept(sock->mgr, sock, quota);
892 	isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
893 				     (isc__netievent_t *)ievent);
894 }
895 
896 /*
897  * This is called after we get a quota_accept_cb() callback.
898  */
899 void
isc__nm_async_tcpdnsaccept(isc__networker_t * worker,isc__netievent_t * ev0)900 isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0) {
901 	isc__netievent_tcpdnsaccept_t *ievent =
902 		(isc__netievent_tcpdnsaccept_t *)ev0;
903 	isc_result_t result;
904 
905 	UNUSED(worker);
906 
907 	REQUIRE(VALID_NMSOCK(ievent->sock));
908 	REQUIRE(ievent->sock->tid == isc_nm_tid());
909 
910 	result = accept_connection(ievent->sock, ievent->quota);
911 	if (result != ISC_R_SUCCESS && result != ISC_R_NOCONN) {
912 		if ((result != ISC_R_QUOTA && result != ISC_R_SOFTQUOTA) ||
913 		    can_log_tcpdns_quota())
914 		{
915 			isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
916 				      ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR,
917 				      "TCP connection failed: %s",
918 				      isc_result_totext(result));
919 		}
920 	}
921 }
922 
923 static isc_result_t
accept_connection(isc_nmsocket_t * ssock,isc_quota_t * quota)924 accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota) {
925 	isc_nmsocket_t *csock = NULL;
926 	isc__networker_t *worker = NULL;
927 	int r;
928 	isc_result_t result;
929 	struct sockaddr_storage peer_ss;
930 	struct sockaddr_storage local_ss;
931 	isc_sockaddr_t local;
932 	isc_nmhandle_t *handle = NULL;
933 
934 	REQUIRE(VALID_NMSOCK(ssock));
935 	REQUIRE(ssock->tid == isc_nm_tid());
936 
937 	if (isc__nmsocket_closing(ssock)) {
938 		if (quota != NULL) {
939 			isc_quota_detach(&quota);
940 		}
941 		return (ISC_R_CANCELED);
942 	}
943 
944 	REQUIRE(ssock->accept_cb != NULL);
945 
946 	csock = isc_mem_get(ssock->mgr->mctx, sizeof(isc_nmsocket_t));
947 	isc__nmsocket_init(csock, ssock->mgr, isc_nm_tcpdnssocket,
948 			   &ssock->iface);
949 	csock->tid = ssock->tid;
950 	csock->extrahandlesize = ssock->extrahandlesize;
951 	isc__nmsocket_attach(ssock, &csock->server);
952 	csock->recv_cb = ssock->recv_cb;
953 	csock->recv_cbarg = ssock->recv_cbarg;
954 	csock->quota = quota;
955 	atomic_init(&csock->accepting, true);
956 
957 	worker = &csock->mgr->workers[csock->tid];
958 
959 	r = uv_tcp_init(&worker->loop, &csock->uv_handle.tcp);
960 	RUNTIME_CHECK(r == 0);
961 	uv_handle_set_data(&csock->uv_handle.handle, csock);
962 
963 	r = uv_timer_init(&worker->loop, &csock->timer);
964 	RUNTIME_CHECK(r == 0);
965 	uv_handle_set_data((uv_handle_t *)&csock->timer, csock);
966 
967 	r = uv_accept(&ssock->uv_handle.stream, &csock->uv_handle.stream);
968 	if (r != 0) {
969 		result = isc__nm_uverr2result(r);
970 		goto failure;
971 	}
972 
973 	r = uv_tcp_getpeername(&csock->uv_handle.tcp,
974 			       (struct sockaddr *)&peer_ss,
975 			       &(int){ sizeof(peer_ss) });
976 	if (r != 0) {
977 		result = isc__nm_uverr2result(r);
978 		goto failure;
979 	}
980 
981 	result = isc_sockaddr_fromsockaddr(&csock->peer,
982 					   (struct sockaddr *)&peer_ss);
983 	if (result != ISC_R_SUCCESS) {
984 		goto failure;
985 	}
986 
987 	r = uv_tcp_getsockname(&csock->uv_handle.tcp,
988 			       (struct sockaddr *)&local_ss,
989 			       &(int){ sizeof(local_ss) });
990 	if (r != 0) {
991 		result = isc__nm_uverr2result(r);
992 		goto failure;
993 	}
994 
995 	result = isc_sockaddr_fromsockaddr(&local,
996 					   (struct sockaddr *)&local_ss);
997 	if (result != ISC_R_SUCCESS) {
998 		goto failure;
999 	}
1000 
1001 	/*
1002 	 * The handle will be either detached on acceptcb failure or in the
1003 	 * readcb.
1004 	 */
1005 	handle = isc__nmhandle_get(csock, NULL, &local);
1006 
1007 	result = ssock->accept_cb(handle, ISC_R_SUCCESS, ssock->accept_cbarg);
1008 	if (result != ISC_R_SUCCESS) {
1009 		isc_nmhandle_detach(&handle);
1010 		goto failure;
1011 	}
1012 
1013 	atomic_store(&csock->accepting, false);
1014 
1015 	isc__nm_incstats(csock->mgr, csock->statsindex[STATID_ACCEPT]);
1016 
1017 	csock->read_timeout = atomic_load(&csock->mgr->init);
1018 
1019 	csock->closehandle_cb = isc__nm_resume_processing;
1020 
1021 	/*
1022 	 * We need to keep the handle alive until we fail to read or connection
1023 	 * is closed by the other side, it will be detached via
1024 	 * prep_destroy()->tcpdns_close_direct().
1025 	 */
1026 	isc_nmhandle_attach(handle, &csock->recv_handle);
1027 	isc__nm_process_sock_buffer(csock);
1028 
1029 	/*
1030 	 * The initial timer has been set, update the read timeout for the next
1031 	 * reads.
1032 	 */
1033 	csock->read_timeout = (atomic_load(&csock->keepalive)
1034 				       ? atomic_load(&csock->mgr->keepalive)
1035 				       : atomic_load(&csock->mgr->idle));
1036 
1037 	isc_nmhandle_detach(&handle);
1038 
1039 	/*
1040 	 * sock is now attached to the handle.
1041 	 */
1042 	isc__nmsocket_detach(&csock);
1043 
1044 	return (ISC_R_SUCCESS);
1045 
1046 failure:
1047 
1048 	atomic_store(&csock->active, false);
1049 
1050 	isc__nm_failed_accept_cb(csock, result);
1051 
1052 	isc__nmsocket_prep_destroy(csock);
1053 
1054 	isc__nmsocket_detach(&csock);
1055 
1056 	return (result);
1057 }
1058 
1059 void
isc__nm_tcpdns_send(isc_nmhandle_t * handle,isc_region_t * region,isc_nm_cb_t cb,void * cbarg)1060 isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region,
1061 		    isc_nm_cb_t cb, void *cbarg) {
1062 	isc__netievent_tcpdnssend_t *ievent = NULL;
1063 	isc__nm_uvreq_t *uvreq = NULL;
1064 	isc_nmsocket_t *sock = NULL;
1065 
1066 	REQUIRE(VALID_NMHANDLE(handle));
1067 
1068 	sock = handle->sock;
1069 
1070 	REQUIRE(VALID_NMSOCK(sock));
1071 	REQUIRE(sock->type == isc_nm_tcpdnssocket);
1072 
1073 	uvreq = isc__nm_uvreq_get(sock->mgr, sock);
1074 	*(uint16_t *)uvreq->tcplen = htons(region->length);
1075 	uvreq->uvbuf.base = (char *)region->base;
1076 	uvreq->uvbuf.len = region->length;
1077 
1078 	isc_nmhandle_attach(handle, &uvreq->handle);
1079 
1080 	uvreq->cb.send = cb;
1081 	uvreq->cbarg = cbarg;
1082 
1083 	ievent = isc__nm_get_netievent_tcpdnssend(sock->mgr, sock, uvreq);
1084 	isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
1085 				     (isc__netievent_t *)ievent);
1086 
1087 	return;
1088 }
1089 
1090 static void
tcpdns_send_cb(uv_write_t * req,int status)1091 tcpdns_send_cb(uv_write_t *req, int status) {
1092 	isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data;
1093 	isc_nmsocket_t *sock = NULL;
1094 
1095 	REQUIRE(VALID_UVREQ(uvreq));
1096 	REQUIRE(VALID_NMHANDLE(uvreq->handle));
1097 
1098 	sock = uvreq->sock;
1099 
1100 	if (status < 0) {
1101 		isc__nm_incstats(sock->mgr, sock->statsindex[STATID_SENDFAIL]);
1102 		isc__nm_failed_send_cb(sock, uvreq,
1103 				       isc__nm_uverr2result(status));
1104 		return;
1105 	}
1106 
1107 	isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false);
1108 }
1109 
1110 /*
1111  * Handle 'tcpsend' async event - send a packet on the socket
1112  */
1113 void
isc__nm_async_tcpdnssend(isc__networker_t * worker,isc__netievent_t * ev0)1114 isc__nm_async_tcpdnssend(isc__networker_t *worker, isc__netievent_t *ev0) {
1115 	isc_result_t result;
1116 	isc__netievent_tcpdnssend_t *ievent =
1117 		(isc__netievent_tcpdnssend_t *)ev0;
1118 	isc_nmsocket_t *sock = NULL;
1119 	isc__nm_uvreq_t *uvreq = NULL;
1120 	int r, nbufs = 2;
1121 
1122 	UNUSED(worker);
1123 
1124 	REQUIRE(VALID_UVREQ(ievent->req));
1125 	REQUIRE(VALID_NMSOCK(ievent->sock));
1126 	REQUIRE(ievent->sock->type == isc_nm_tcpdnssocket);
1127 	REQUIRE(ievent->sock->tid == isc_nm_tid());
1128 
1129 	sock = ievent->sock;
1130 	uvreq = ievent->req;
1131 
1132 	uv_buf_t bufs[2] = { { .base = uvreq->tcplen, .len = 2 },
1133 			     { .base = uvreq->uvbuf.base,
1134 			       .len = uvreq->uvbuf.len } };
1135 
1136 	if (isc__nmsocket_closing(sock)) {
1137 		result = ISC_R_CANCELED;
1138 		goto fail;
1139 	}
1140 
1141 	r = uv_try_write(&sock->uv_handle.stream, bufs, nbufs);
1142 
1143 	if (r == (int)(bufs[0].len + bufs[1].len)) {
1144 		/* Wrote everything */
1145 		isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, true);
1146 		return;
1147 	}
1148 
1149 	if (r == 1) {
1150 		/* Partial write of DNSMSG length */
1151 		bufs[0].base = uvreq->tcplen + 1;
1152 		bufs[0].len = 1;
1153 	} else if (r > 0) {
1154 		/* Partial write of DNSMSG */
1155 		nbufs = 1;
1156 		bufs[0].base = uvreq->uvbuf.base + (r - 2);
1157 		bufs[0].len = uvreq->uvbuf.len - (r - 2);
1158 	} else if (r == UV_ENOSYS || r == UV_EAGAIN) {
1159 		/* uv_try_write not support, send asynchronously */
1160 	} else {
1161 		/* error sending data */
1162 		result = isc__nm_uverr2result(r);
1163 		goto fail;
1164 	}
1165 
1166 	r = uv_write(&uvreq->uv_req.write, &sock->uv_handle.stream, bufs, nbufs,
1167 		     tcpdns_send_cb);
1168 	if (r < 0) {
1169 		result = isc__nm_uverr2result(r);
1170 		goto fail;
1171 	}
1172 
1173 	return;
1174 
1175 fail:
1176 	if (result != ISC_R_SUCCESS) {
1177 		isc__nm_incstats(sock->mgr, sock->statsindex[STATID_SENDFAIL]);
1178 		isc__nm_failed_send_cb(sock, uvreq, result);
1179 	}
1180 }
1181 
1182 static void
tcpdns_stop_cb(uv_handle_t * handle)1183 tcpdns_stop_cb(uv_handle_t *handle) {
1184 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
1185 
1186 	REQUIRE(VALID_NMSOCK(sock));
1187 	REQUIRE(sock->tid == isc_nm_tid());
1188 	REQUIRE(atomic_load(&sock->closing));
1189 
1190 	uv_handle_set_data(handle, NULL);
1191 
1192 	if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
1193 					    true)) {
1194 		INSIST(0);
1195 		ISC_UNREACHABLE();
1196 	}
1197 
1198 	isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CLOSE]);
1199 
1200 	atomic_store(&sock->listening, false);
1201 
1202 	isc__nmsocket_detach(&sock);
1203 }
1204 
1205 static void
tcpdns_close_sock(isc_nmsocket_t * sock)1206 tcpdns_close_sock(isc_nmsocket_t *sock) {
1207 	REQUIRE(VALID_NMSOCK(sock));
1208 	REQUIRE(sock->tid == isc_nm_tid());
1209 	REQUIRE(atomic_load(&sock->closing));
1210 
1211 	if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
1212 					    true)) {
1213 		INSIST(0);
1214 		ISC_UNREACHABLE();
1215 	}
1216 
1217 	isc__nm_incstats(sock->mgr, sock->statsindex[STATID_CLOSE]);
1218 
1219 	if (sock->server != NULL) {
1220 		isc__nmsocket_detach(&sock->server);
1221 	}
1222 
1223 	atomic_store(&sock->connected, false);
1224 
1225 	isc__nmsocket_prep_destroy(sock);
1226 }
1227 
1228 static void
tcpdns_close_cb(uv_handle_t * handle)1229 tcpdns_close_cb(uv_handle_t *handle) {
1230 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
1231 
1232 	uv_handle_set_data(handle, NULL);
1233 
1234 	tcpdns_close_sock(sock);
1235 }
1236 
1237 static void
timer_close_cb(uv_handle_t * timer)1238 timer_close_cb(uv_handle_t *timer) {
1239 	isc_nmsocket_t *sock = uv_handle_get_data(timer);
1240 	uv_handle_set_data(timer, NULL);
1241 
1242 	REQUIRE(VALID_NMSOCK(sock));
1243 
1244 	if (sock->parent) {
1245 		uv_close(&sock->uv_handle.handle, tcpdns_stop_cb);
1246 	} else if (uv_is_closing(&sock->uv_handle.handle)) {
1247 		tcpdns_close_sock(sock);
1248 	} else {
1249 		uv_close(&sock->uv_handle.handle, tcpdns_close_cb);
1250 	}
1251 }
1252 
1253 static void
stop_tcpdns_child(isc_nmsocket_t * sock)1254 stop_tcpdns_child(isc_nmsocket_t *sock) {
1255 	REQUIRE(sock->type == isc_nm_tcpdnssocket);
1256 	REQUIRE(sock->tid == isc_nm_tid());
1257 
1258 	if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
1259 					    true)) {
1260 		return;
1261 	}
1262 
1263 	tcpdns_close_direct(sock);
1264 
1265 	atomic_fetch_sub(&sock->parent->rchildren, 1);
1266 
1267 	isc_barrier_wait(&sock->parent->stoplistening);
1268 }
1269 
1270 static void
stop_tcpdns_parent(isc_nmsocket_t * sock)1271 stop_tcpdns_parent(isc_nmsocket_t *sock) {
1272 	isc_nmsocket_t *csock = NULL;
1273 
1274 	REQUIRE(VALID_NMSOCK(sock));
1275 	REQUIRE(sock->tid == isc_nm_tid());
1276 	REQUIRE(sock->type == isc_nm_tcpdnslistener);
1277 
1278 	isc_barrier_init(&sock->stoplistening, sock->nchildren);
1279 
1280 	for (size_t i = 0; i < sock->nchildren; i++) {
1281 		csock = &sock->children[i];
1282 		REQUIRE(VALID_NMSOCK(csock));
1283 
1284 		if ((int)i == isc_nm_tid()) {
1285 			/*
1286 			 * We need to schedule closing the other sockets first
1287 			 */
1288 			continue;
1289 		}
1290 
1291 		atomic_store(&csock->active, false);
1292 		enqueue_stoplistening(csock);
1293 	}
1294 
1295 	csock = &sock->children[isc_nm_tid()];
1296 	atomic_store(&csock->active, false);
1297 	stop_tcpdns_child(csock);
1298 
1299 	atomic_store(&sock->closed, true);
1300 	isc__nmsocket_prep_destroy(sock);
1301 }
1302 
1303 static void
tcpdns_close_direct(isc_nmsocket_t * sock)1304 tcpdns_close_direct(isc_nmsocket_t *sock) {
1305 	REQUIRE(VALID_NMSOCK(sock));
1306 	REQUIRE(sock->tid == isc_nm_tid());
1307 	REQUIRE(atomic_load(&sock->closing));
1308 
1309 	if (sock->quota != NULL) {
1310 		isc_quota_detach(&sock->quota);
1311 	}
1312 
1313 	if (sock->recv_handle != NULL) {
1314 		isc_nmhandle_detach(&sock->recv_handle);
1315 	}
1316 
1317 	isc__nmsocket_timer_stop(sock);
1318 	isc__nm_stop_reading(sock);
1319 
1320 	uv_handle_set_data((uv_handle_t *)&sock->timer, sock);
1321 	uv_close((uv_handle_t *)&sock->timer, timer_close_cb);
1322 }
1323 
1324 void
isc__nm_tcpdns_close(isc_nmsocket_t * sock)1325 isc__nm_tcpdns_close(isc_nmsocket_t *sock) {
1326 	REQUIRE(VALID_NMSOCK(sock));
1327 	REQUIRE(sock->type == isc_nm_tcpdnssocket);
1328 	REQUIRE(!isc__nmsocket_active(sock));
1329 
1330 	if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
1331 					    true)) {
1332 		return;
1333 	}
1334 
1335 	if (sock->tid == isc_nm_tid()) {
1336 		tcpdns_close_direct(sock);
1337 	} else {
1338 		/*
1339 		 * We need to create an event and pass it using async channel
1340 		 */
1341 		isc__netievent_tcpdnsclose_t *ievent =
1342 			isc__nm_get_netievent_tcpdnsclose(sock->mgr, sock);
1343 
1344 		isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1345 				       (isc__netievent_t *)ievent);
1346 	}
1347 }
1348 
1349 void
isc__nm_async_tcpdnsclose(isc__networker_t * worker,isc__netievent_t * ev0)1350 isc__nm_async_tcpdnsclose(isc__networker_t *worker, isc__netievent_t *ev0) {
1351 	isc__netievent_tcpdnsclose_t *ievent =
1352 		(isc__netievent_tcpdnsclose_t *)ev0;
1353 	isc_nmsocket_t *sock = ievent->sock;
1354 
1355 	UNUSED(worker);
1356 
1357 	REQUIRE(VALID_NMSOCK(sock));
1358 	REQUIRE(sock->tid == isc_nm_tid());
1359 
1360 	tcpdns_close_direct(sock);
1361 }
1362 
1363 static void
tcpdns_close_connect_cb(uv_handle_t * handle)1364 tcpdns_close_connect_cb(uv_handle_t *handle) {
1365 	isc_nmsocket_t *sock = uv_handle_get_data(handle);
1366 
1367 	REQUIRE(VALID_NMSOCK(sock));
1368 
1369 	REQUIRE(isc__nm_in_netthread());
1370 	REQUIRE(sock->tid == isc_nm_tid());
1371 
1372 	isc__nmsocket_prep_destroy(sock);
1373 	isc__nmsocket_detach(&sock);
1374 }
1375 
1376 void
isc__nm_tcpdns_shutdown(isc_nmsocket_t * sock)1377 isc__nm_tcpdns_shutdown(isc_nmsocket_t *sock) {
1378 	REQUIRE(VALID_NMSOCK(sock));
1379 	REQUIRE(sock->tid == isc_nm_tid());
1380 	REQUIRE(sock->type == isc_nm_tcpdnssocket);
1381 
1382 	/*
1383 	 * If the socket is active, mark it inactive and
1384 	 * continue. If it isn't active, stop now.
1385 	 */
1386 	if (!isc__nmsocket_deactivate(sock)) {
1387 		return;
1388 	}
1389 
1390 	if (atomic_load(&sock->accepting)) {
1391 		return;
1392 	}
1393 
1394 	if (atomic_load(&sock->connecting)) {
1395 		isc_nmsocket_t *tsock = NULL;
1396 		isc__nmsocket_attach(sock, &tsock);
1397 		uv_close(&sock->uv_handle.handle, tcpdns_close_connect_cb);
1398 		return;
1399 	}
1400 
1401 	if (sock->statichandle != NULL) {
1402 		if (isc__nm_closing(sock)) {
1403 			isc__nm_failed_read_cb(sock, ISC_R_SHUTTINGDOWN, false);
1404 		} else {
1405 			isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
1406 		}
1407 		return;
1408 	}
1409 
1410 	/*
1411 	 * Otherwise, we just send the socket to abyss...
1412 	 */
1413 	if (sock->parent == NULL) {
1414 		isc__nmsocket_prep_destroy(sock);
1415 	}
1416 }
1417 
1418 void
isc__nm_tcpdns_cancelread(isc_nmhandle_t * handle)1419 isc__nm_tcpdns_cancelread(isc_nmhandle_t *handle) {
1420 	isc_nmsocket_t *sock = NULL;
1421 	isc__netievent_tcpdnscancel_t *ievent = NULL;
1422 
1423 	REQUIRE(VALID_NMHANDLE(handle));
1424 
1425 	sock = handle->sock;
1426 
1427 	REQUIRE(VALID_NMSOCK(sock));
1428 	REQUIRE(sock->type == isc_nm_tcpdnssocket);
1429 
1430 	ievent = isc__nm_get_netievent_tcpdnscancel(sock->mgr, sock, handle);
1431 	isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
1432 			       (isc__netievent_t *)ievent);
1433 }
1434 
1435 void
isc__nm_async_tcpdnscancel(isc__networker_t * worker,isc__netievent_t * ev0)1436 isc__nm_async_tcpdnscancel(isc__networker_t *worker, isc__netievent_t *ev0) {
1437 	isc__netievent_tcpdnscancel_t *ievent =
1438 		(isc__netievent_tcpdnscancel_t *)ev0;
1439 	isc_nmsocket_t *sock = ievent->sock;
1440 
1441 	UNUSED(worker);
1442 
1443 	REQUIRE(VALID_NMSOCK(sock));
1444 	REQUIRE(sock->tid == isc_nm_tid());
1445 
1446 	isc__nm_failed_read_cb(sock, ISC_R_EOF, false);
1447 }
1448