1 /*
2  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3  *
4  * This Source Code Form is subject to the terms of the Mozilla Public
5  * License, v. 2.0. If a copy of the MPL was not distributed with this
6  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
7  *
8  * See the COPYRIGHT file distributed with this work for additional
9  * information regarding copyright ownership.
10  */
11 
12 /* This code uses functions which are only available on Server 2003 and
13  * higher, and Windows XP and higher.
14  *
15  * This code is by nature multithreaded and takes advantage of various
16  * features to pass on information through the completion port for
17  * when I/O is completed.  All sends, receives, accepts, and connects are
18  * completed through the completion port.
19  *
20  * The number of Completion Port Worker threads used is the total number
21  * of CPU's + 1. This increases the likelihood that a Worker Thread is
22  * available for processing a completed request.
23  *
24  * XXXPDM 5 August, 2002
25  */
26 
27 #include <config.h>
28 
29 #define MAKE_EXTERNAL 1
30 
31 #include <sys/types.h>
32 
33 #ifndef _WINSOCKAPI_
34 #define _WINSOCKAPI_   /* Prevent inclusion of winsock.h in windows.h */
35 #endif
36 
37 #include <errno.h>
38 #include <stdbool.h>
39 #include <stddef.h>
40 #include <inttypes.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44 #include <io.h>
45 #include <fcntl.h>
46 #include <process.h>
47 
48 #include <isc/app.h>
49 #include <isc/buffer.h>
50 #include <isc/bufferlist.h>
51 #include <isc/condition.h>
52 #include <isc/list.h>
53 #include <isc/log.h>
54 #include <isc/mem.h>
55 #include <isc/msgs.h>
56 #include <isc/mutex.h>
57 #include <isc/net.h>
58 #include <isc/once.h>
59 #include <isc/os.h>
60 #include <isc/platform.h>
61 #include <isc/print.h>
62 #include <isc/region.h>
63 #include <isc/socket.h>
64 #include <isc/stats.h>
65 #include <isc/strerror.h>
66 #include <isc/string.h>
67 #include <isc/syslog.h>
68 #include <isc/task.h>
69 #include <isc/thread.h>
70 #include <isc/util.h>
71 #include <isc/win32os.h>
72 
73 #include <mswsock.h>
74 
75 #include "errno2result.h"
76 
77 /*
78  * Set by the -T dscp option on the command line. If set to a value
79  * other than -1, we check to make sure DSCP values match it, and
80  * assert if not.
81  */
82 LIBISC_EXTERNAL_DATA int isc_dscp_check_value = -1;
83 
84 /*
85  * How in the world can Microsoft exist with APIs like this?
86  * We can't actually call this directly, because it turns out
87  * no library exports this function.  Instead, we need to
88  * issue a runtime call to get the address.
89  */
90 LPFN_CONNECTEX ISCConnectEx;
91 LPFN_ACCEPTEX ISCAcceptEx;
92 LPFN_GETACCEPTEXSOCKADDRS ISCGetAcceptExSockaddrs;
93 
94 /*
95  * Run expensive internal consistency checks.
96  */
97 #ifdef ISC_SOCKET_CONSISTENCY_CHECKS
98 #define CONSISTENT(sock) consistent(sock)
99 #else
100 #define CONSISTENT(sock) do {} while (0)
101 #endif
102 static void consistent(isc_socket_t *sock);
103 
104 /*
105  * Define this macro to control the behavior of connection
106  * resets on UDP sockets.  See Microsoft KnowledgeBase Article Q263823
107  * for details.
108  * NOTE: This requires that Windows 2000 systems install Service Pack 2
109  * or later.
110  */
111 #ifndef SIO_UDP_CONNRESET
112 #define SIO_UDP_CONNRESET _WSAIOW(IOC_VENDOR,12)
113 #endif
114 
115 /*
116  * Some systems define the socket length argument as an int, some as size_t,
117  * some as socklen_t.  This is here so it can be easily changed if needed.
118  */
119 #ifndef ISC_SOCKADDR_LEN_T
120 #define ISC_SOCKADDR_LEN_T unsigned int
121 #endif
122 
123 /*
124  * Define what the possible "soft" errors can be.  These are non-fatal returns
125  * of various network related functions, like recv() and so on.
126  */
127 #define SOFT_ERROR(e)	((e) == WSAEINTR || \
128 			 (e) == WSAEWOULDBLOCK || \
129 			 (e) == EWOULDBLOCK || \
130 			 (e) == EINTR || \
131 			 (e) == EAGAIN || \
132 			 (e) == 0)
133 
134 /*
135  * Pending errors are not really errors and should be
136  * kept separate
137  */
138 #define PENDING_ERROR(e) ((e) == WSA_IO_PENDING || (e) == 0)
139 
140 #define DOIO_SUCCESS	  0       /* i/o ok, event sent */
141 #define DOIO_SOFT	  1       /* i/o ok, soft error, no event sent */
142 #define DOIO_HARD	  2       /* i/o error, event sent */
143 #define DOIO_EOF	  3       /* EOF, no event sent */
144 #define DOIO_PENDING	  4       /* status when i/o is in process */
145 #define DOIO_NEEDMORE	  5       /* IO was processed, but we need more due to minimum */
146 
147 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
148 
149 /*
150  * DLVL(90)  --  Function entry/exit and other tracing.
151  * DLVL(70)  --  Socket "correctness" -- including returning of events, etc.
152  * DLVL(60)  --  Socket data send/receive
153  * DLVL(50)  --  Event tracing, including receiving/sending completion events.
154  * DLVL(20)  --  Socket creation/destruction.
155  */
156 #define TRACE_LEVEL		90
157 #define CORRECTNESS_LEVEL	70
158 #define IOEVENT_LEVEL		60
159 #define EVENT_LEVEL		50
160 #define CREATION_LEVEL		20
161 
162 #define TRACE		DLVL(TRACE_LEVEL)
163 #define CORRECTNESS	DLVL(CORRECTNESS_LEVEL)
164 #define IOEVENT		DLVL(IOEVENT_LEVEL)
165 #define EVENT		DLVL(EVENT_LEVEL)
166 #define CREATION	DLVL(CREATION_LEVEL)
167 
168 typedef isc_event_t intev_t;
169 
170 /*
171  * Socket State
172  */
173 enum {
174   SOCK_INITIALIZED,	/* Socket Initialized */
175   SOCK_OPEN,		/* Socket opened but nothing yet to do */
176   SOCK_DATA,		/* Socket sending or receiving data */
177   SOCK_LISTEN,		/* TCP Socket listening for connects */
178   SOCK_ACCEPT,		/* TCP socket is waiting to accept */
179   SOCK_CONNECT,		/* TCP Socket connecting */
180   SOCK_CLOSED,		/* Socket has been closed */
181 };
182 
183 #define SOCKET_MAGIC		ISC_MAGIC('I', 'O', 'i', 'o')
184 #define VALID_SOCKET(t)		ISC_MAGIC_VALID(t, SOCKET_MAGIC)
185 
186 /*
187  * IPv6 control information.  If the socket is an IPv6 socket we want
188  * to collect the destination address and interface so the client can
189  * set them on outgoing packets.
190  */
191 #ifdef ISC_PLATFORM_HAVEIPV6
192 #ifndef USE_CMSG
193 #define USE_CMSG	1
194 #endif
195 #endif
196 
197 /*
198  * We really  don't want to try and use these control messages. Win32
199  * doesn't have this mechanism before XP.
200  */
201 #undef USE_CMSG
202 
203 /*
204  * Message header for recvmsg and sendmsg calls.
205  * Used value-result for recvmsg, value only for sendmsg.
206  */
207 struct msghdr {
208 	SOCKADDR_STORAGE to_addr;	/* UDP send/recv address */
209 	int      to_addr_len;		/* length of the address */
210 	WSABUF  *msg_iov;		/* scatter/gather array */
211 	u_int   msg_iovlen;             /* # elements in msg_iov */
212 	void	*msg_control;           /* ancillary data, see below */
213 	u_int   msg_controllen;         /* ancillary data buffer len */
214 	u_int	msg_totallen;		/* total length of this message */
215 } msghdr;
216 
217 /*
218  * The size to raise the receive buffer to.
219  */
220 #define RCVBUFSIZE (32*1024)
221 
222 /*
223  * The number of times a send operation is repeated if the result
224  * is WSAEINTR.
225  */
226 #define NRETRIES 10
227 
228 struct isc_socket {
229 	/* Not locked. */
230 	unsigned int		magic;
231 	isc_socketmgr_t	       *manager;
232 	isc_mutex_t		lock;
233 	isc_sockettype_t	type;
234 
235 	/* Pointers to scatter/gather buffers */
236 	WSABUF			iov[ISC_SOCKET_MAXSCATTERGATHER];
237 
238 	/* Locked by socket lock. */
239 	ISC_LINK(isc_socket_t)	link;
240 	unsigned int		references; /* EXTERNAL references */
241 	SOCKET			fd;	/* file handle */
242 	int			pf;	/* protocol family */
243 	char			name[16];
244 	void *			tag;
245 
246 	/*
247 	 * Each recv() call uses this buffer.  It is a per-socket receive
248 	 * buffer that allows us to decouple the system recv() from the
249 	 * recv_list done events.  This means the items on the recv_list
250 	 * can be removed without having to cancel pending system recv()
251 	 * calls.  It also allows us to read-ahead in some cases.
252 	 */
253 	struct {
254 		SOCKADDR_STORAGE	from_addr;	   // UDP send/recv address
255 		int		from_addr_len;	   // length of the address
256 		char		*base;		   // the base of the buffer
257 		char		*consume_position; // where to start copying data from next
258 		unsigned int	len;		   // the actual size of this buffer
259 		unsigned int	remaining;	   // the number of bytes remaining
260 	} recvbuf;
261 
262 	ISC_LIST(isc_socketevent_t)		send_list;
263 	ISC_LIST(isc_socketevent_t)		recv_list;
264 	ISC_LIST(isc_socket_newconnev_t)	accept_list;
265 	ISC_LIST(isc_socket_connev_t)		connect_list;
266 
267 	isc_sockaddr_t		address;  /* remote address */
268 
269 	unsigned int		listener : 1,	/* listener socket */
270 				connected : 1,
271 				pending_connect : 1, /* connect pending */
272 				bound : 1,	/* bound to local addr */
273 				dupped : 1;     /* created by isc_socket_dup() */
274 	unsigned int		pending_iocp;	/* Should equal the counters below. Debug. */
275 	unsigned int		pending_recv;  /* Number of outstanding recv() calls. */
276 	unsigned int		pending_send;  /* Number of outstanding send() calls. */
277 	unsigned int		pending_accept; /* Number of outstanding accept() calls. */
278 	unsigned int		state; /* Socket state. Debugging and consistency checking. */
279 	int			state_lineno;  /* line which last touched state */
280 };
281 
282 #define _set_state(sock, _state) do { (sock)->state = (_state); (sock)->state_lineno = __LINE__; } while (0)
283 
284 /*
285  * Buffer structure
286  */
287 typedef struct buflist buflist_t;
288 
289 struct buflist {
290 	void			*buf;
291 	unsigned int		buflen;
292 	ISC_LINK(buflist_t)	link;
293 };
294 
295 /*
296  * I/O Completion ports Info structures
297  */
298 
299 static HANDLE hHeapHandle = NULL;
300 typedef struct IoCompletionInfo {
301 	OVERLAPPED		overlapped;
302 	isc_socketevent_t	*dev;  /* send()/recv() done event */
303 	isc_socket_connev_t	*cdev; /* connect() done event */
304 	isc_socket_newconnev_t	*adev; /* accept() done event */
305 	void			*acceptbuffer;
306 	DWORD			received_bytes;
307 	int			request_type;
308 	struct msghdr		messagehdr;
309 	ISC_LIST(buflist_t)	bufferlist;	/*%< list of buffers */
310 } IoCompletionInfo;
311 
312 /*
313  * Define a maximum number of I/O Completion Port worker threads
314  * to handle the load on the Completion Port. The actual number
315  * used is the number of CPU's + 1.
316  */
317 #define MAX_IOCPTHREADS 20
318 
319 #define SOCKET_MANAGER_MAGIC	ISC_MAGIC('I', 'O', 'm', 'g')
320 #define VALID_MANAGER(m)	ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)
321 
322 struct isc_socketmgr {
323 	/* Not locked. */
324 	unsigned int		magic;
325 	isc_mem_t	       *mctx;
326 	isc_mutex_t		lock;
327 	isc_stats_t	       *stats;
328 
329 	/* Locked by manager lock. */
330 	ISC_LIST(isc_socket_t)	socklist;
331 	bool			bShutdown;
332 	isc_condition_t		shutdown_ok;
333 	HANDLE			hIoCompletionPort;
334 	int			maxIOCPThreads;
335 	HANDLE			hIOCPThreads[MAX_IOCPTHREADS];
336 	DWORD			dwIOCPThreadIds[MAX_IOCPTHREADS];
337 	size_t			maxudp;
338 
339 	/*
340 	 * Debugging.
341 	 * Modified by InterlockedIncrement() and InterlockedDecrement()
342 	 */
343 	LONG				totalSockets;
344 	LONG				iocp_total;
345 };
346 
347 enum {
348 	SOCKET_RECV,
349 	SOCKET_SEND,
350 	SOCKET_ACCEPT,
351 	SOCKET_CONNECT
352 };
353 
354 /*
355  * send() and recv() iovec counts
356  */
357 #define MAXSCATTERGATHER_SEND	(ISC_SOCKET_MAXSCATTERGATHER)
358 #define MAXSCATTERGATHER_RECV	(ISC_SOCKET_MAXSCATTERGATHER)
359 
360 static isc_result_t socket_create(isc_socketmgr_t *manager0, int pf,
361 				  isc_sockettype_t type,
362 				  isc_socket_t **socketp,
363 				  isc_socket_t *dup_socket);
364 static isc_threadresult_t WINAPI SocketIoThread(LPVOID ThreadContext);
365 static void maybe_free_socket(isc_socket_t **, int);
366 static void free_socket(isc_socket_t **, int);
367 static bool senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev);
368 static bool acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev);
369 static bool connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev);
370 static void send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev);
371 static void send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev);
372 static void send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev);
373 static void send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev);
374 static void send_recvdone_abort(isc_socket_t *sock, isc_result_t result);
375 static void send_connectdone_abort(isc_socket_t *sock, isc_result_t result);
376 static void queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev);
377 static void queue_receive_request(isc_socket_t *sock);
378 
379 /*
380  * This is used to dump the contents of the sock structure
381  * You should make sure that the sock is locked before
382  * dumping it. Since the code uses simple printf() statements
383  * it should only be used interactively.
384  */
385 void
sock_dump(isc_socket_t * sock)386 sock_dump(isc_socket_t *sock) {
387 	isc_socketevent_t *ldev;
388 	isc_socket_newconnev_t *ndev;
389 	isc_socket_connev_t *cdev;
390 
391 #if 0
392 	isc_sockaddr_t addr;
393 	char socktext[ISC_SOCKADDR_FORMATSIZE];
394 	isc_result_t result;
395 
396 	result = isc_socket_getpeername(sock, &addr);
397 	if (result == ISC_R_SUCCESS) {
398 		isc_sockaddr_format(&addr, socktext, sizeof(socktext));
399 		printf("Remote Socket: %s\n", socktext);
400 	}
401 	result = isc_socket_getsockname(sock, &addr);
402 	if (result == ISC_R_SUCCESS) {
403 		isc_sockaddr_format(&addr, socktext, sizeof(socktext));
404 		printf("This Socket: %s\n", socktext);
405 	}
406 #endif
407 
408 	printf("\n\t\tSock Dump\n");
409 	printf("\t\tfd: %Iu\n", sock->fd);
410 	printf("\t\treferences: %u\n", sock->references);
411 	printf("\t\tpending_accept: %u\n", sock->pending_accept);
412 	printf("\t\tconnecting: %u\n", sock->pending_connect);
413 	printf("\t\tconnected: %u\n", sock->connected);
414 	printf("\t\tbound: %u\n", sock->bound);
415 	printf("\t\tpending_iocp: %u\n", sock->pending_iocp);
416 	printf("\t\tsocket type: %d\n", sock->type);
417 
418 	printf("\n\t\tSock Recv List\n");
419 	ldev = ISC_LIST_HEAD(sock->recv_list);
420 	while (ldev != NULL) {
421 		printf("\t\tdev: %p\n", ldev);
422 		ldev = ISC_LIST_NEXT(ldev, ev_link);
423 	}
424 
425 	printf("\n\t\tSock Send List\n");
426 	ldev = ISC_LIST_HEAD(sock->send_list);
427 	while (ldev != NULL) {
428 		printf("\t\tdev: %p\n", ldev);
429 		ldev = ISC_LIST_NEXT(ldev, ev_link);
430 	}
431 
432 	printf("\n\t\tSock Accept List\n");
433 	ndev = ISC_LIST_HEAD(sock->accept_list);
434 	while (ndev != NULL) {
435 		printf("\t\tdev: %p\n", ldev);
436 		ndev = ISC_LIST_NEXT(ndev, ev_link);
437 	}
438 
439 	printf("\n\t\tSock Connect List\n");
440 	cdev = ISC_LIST_HEAD(sock->connect_list);
441 	while (cdev != NULL) {
442 		printf("\t\tdev: %p\n", cdev);
443 		cdev = ISC_LIST_NEXT(cdev, ev_link);
444 	}
445 }
446 
447 static void
448 socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
449 	   isc_logcategory_t *category, isc_logmodule_t *module, int level,
450 	   isc_msgcat_t *msgcat, int msgset, int message,
451 	   const char *fmt, ...) ISC_FORMAT_PRINTF(9, 10);
452 
453 /*  This function will add an entry to the I/O completion port
454  *  that will signal the I/O thread to exit (gracefully)
455  */
456 static void
signal_iocompletionport_exit(isc_socketmgr_t * manager)457 signal_iocompletionport_exit(isc_socketmgr_t *manager) {
458 	int i;
459 	int errval;
460 	char strbuf[ISC_STRERRORSIZE];
461 
462 	REQUIRE(VALID_MANAGER(manager));
463 	for (i = 0; i < manager->maxIOCPThreads; i++) {
464 		if (!PostQueuedCompletionStatus(manager->hIoCompletionPort,
465 						0, 0, 0)) {
466 			errval = GetLastError();
467 			isc__strerror(errval, strbuf, sizeof(strbuf));
468 			FATAL_ERROR(__FILE__, __LINE__,
469 				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
470 				ISC_MSG_FAILED,
471 				"Can't request service thread to exit: %s"),
472 				strbuf);
473 		}
474 	}
475 }
476 
477 /*
478  * Create the worker threads for the I/O Completion Port
479  */
480 void
iocompletionport_createthreads(int total_threads,isc_socketmgr_t * manager)481 iocompletionport_createthreads(int total_threads, isc_socketmgr_t *manager) {
482 	int errval;
483 	char strbuf[ISC_STRERRORSIZE];
484 	int i;
485 
486 	INSIST(total_threads > 0);
487 	REQUIRE(VALID_MANAGER(manager));
488 	/*
489 	 * We need at least one
490 	 */
491 	for (i = 0; i < total_threads; i++) {
492 		manager->hIOCPThreads[i] = CreateThread(NULL, 0, SocketIoThread,
493 						manager, 0,
494 						&manager->dwIOCPThreadIds[i]);
495 		if (manager->hIOCPThreads[i] == NULL) {
496 			errval = GetLastError();
497 			isc__strerror(errval, strbuf, sizeof(strbuf));
498 			FATAL_ERROR(__FILE__, __LINE__,
499 				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
500 				ISC_MSG_FAILED,
501 				"Can't create IOCP thread: %s"),
502 				strbuf);
503 		}
504 	}
505 }
506 
507 /*
508  *  Create/initialise the I/O completion port
509  */
510 void
iocompletionport_init(isc_socketmgr_t * manager)511 iocompletionport_init(isc_socketmgr_t *manager) {
512 	int errval;
513 	char strbuf[ISC_STRERRORSIZE];
514 
515 	REQUIRE(VALID_MANAGER(manager));
516 	/*
517 	 * Create a private heap to handle the socket overlapped structure
518 	 * The minimum number of structures is 10, there is no maximum
519 	 */
520 	hHeapHandle = HeapCreate(0, 10 * sizeof(IoCompletionInfo), 0);
521 	if (hHeapHandle == NULL) {
522 		errval = GetLastError();
523 		isc__strerror(errval, strbuf, sizeof(strbuf));
524 		FATAL_ERROR(__FILE__, __LINE__,
525 			    isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
526 					   ISC_MSG_FAILED,
527 					   "HeapCreate() failed during "
528 					   "initialization: %s"),
529 			    strbuf);
530 	}
531 
532 	manager->maxIOCPThreads = min(isc_os_ncpus() + 1, MAX_IOCPTHREADS);
533 
534 	/* Now Create the Completion Port */
535 	manager->hIoCompletionPort = CreateIoCompletionPort(
536 			INVALID_HANDLE_VALUE, NULL,
537 			0, manager->maxIOCPThreads);
538 	if (manager->hIoCompletionPort == NULL) {
539 		errval = GetLastError();
540 		isc__strerror(errval, strbuf, sizeof(strbuf));
541 		FATAL_ERROR(__FILE__, __LINE__,
542 				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
543 				ISC_MSG_FAILED,
544 				"CreateIoCompletionPort() failed "
545 				"during initialization: %s"),
546 				strbuf);
547 	}
548 
549 	/*
550 	 * Worker threads for servicing the I/O
551 	 */
552 	iocompletionport_createthreads(manager->maxIOCPThreads, manager);
553 }
554 
555 /*
556  * Associate a socket with an IO Completion Port.  This allows us to queue events for it
557  * and have our worker pool of threads process them.
558  */
559 void
iocompletionport_update(isc_socket_t * sock)560 iocompletionport_update(isc_socket_t *sock) {
561 	HANDLE hiocp;
562 	char strbuf[ISC_STRERRORSIZE];
563 
564 	REQUIRE(VALID_SOCKET(sock));
565 
566 	hiocp = CreateIoCompletionPort((HANDLE)sock->fd,
567 		sock->manager->hIoCompletionPort, (ULONG_PTR)sock, 0);
568 
569 	if (hiocp == NULL) {
570 		DWORD errval = GetLastError();
571 		isc__strerror(errval, strbuf, sizeof(strbuf));
572 		isc_log_iwrite(isc_lctx,
573 				ISC_LOGCATEGORY_GENERAL,
574 				ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
575 				isc_msgcat, ISC_MSGSET_SOCKET,
576 				ISC_MSG_TOOMANYHANDLES,
577 				"iocompletionport_update: failed to open"
578 				" io completion port: %s",
579 				strbuf);
580 
581 		/* XXXMLG temporary hack to make failures detected.
582 		 * This function should return errors to the caller, not
583 		 * exit here.
584 		 */
585 		FATAL_ERROR(__FILE__, __LINE__,
586 				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
587 				ISC_MSG_FAILED,
588 				"CreateIoCompletionPort() failed "
589 				"during initialization: %s"),
590 				strbuf);
591 	}
592 
593 	InterlockedIncrement(&sock->manager->iocp_total);
594 }
595 
596 /*
597  * Routine to cleanup and then close the socket.
598  * Only close the socket here if it is NOT associated
599  * with an event, otherwise the WSAWaitForMultipleEvents
600  * may fail due to the fact that the Wait should not
601  * be running while closing an event or a socket.
602  * The socket is locked before calling this function
603  */
604 void
socket_close(isc_socket_t * sock)605 socket_close(isc_socket_t *sock) {
606 
607 	REQUIRE(sock != NULL);
608 
609 	if (sock->fd != INVALID_SOCKET) {
610 		closesocket(sock->fd);
611 		sock->fd = INVALID_SOCKET;
612 		_set_state(sock, SOCK_CLOSED);
613 		InterlockedDecrement(&sock->manager->totalSockets);
614 	}
615 }
616 
617 static isc_once_t initialise_once = ISC_ONCE_INIT;
618 static bool initialised = false;
619 
620 static void
initialise(void)621 initialise(void) {
622 	WORD wVersionRequested;
623 	WSADATA wsaData;
624 	int err;
625 	SOCKET sock;
626 	GUID GUIDConnectEx = WSAID_CONNECTEX;
627 	GUID GUIDAcceptEx = WSAID_ACCEPTEX;
628 	GUID GUIDGetAcceptExSockaddrs = WSAID_GETACCEPTEXSOCKADDRS;
629 	DWORD dwBytes;
630 
631 	/* Need Winsock 2.2 or better */
632 	wVersionRequested = MAKEWORD(2, 2);
633 
634 	err = WSAStartup(wVersionRequested, &wsaData);
635 	if (err != 0) {
636 		char strbuf[ISC_STRERRORSIZE];
637 		isc__strerror(err, strbuf, sizeof(strbuf));
638 		FATAL_ERROR(__FILE__, __LINE__, "WSAStartup() %s: %s",
639 			    isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
640 					   ISC_MSG_FAILED, "failed"),
641 			    strbuf);
642 	}
643 	/*
644 	 * The following APIs do not exist as functions in a library, but
645 	 * we must ask winsock for them.  They are "extensions" -- but why
646 	 * they cannot be actual functions is beyond me.  So, ask winsock
647 	 * for the pointers to the functions we need.
648 	 */
649 	sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
650 	INSIST(sock != INVALID_SOCKET);
651 	err = WSAIoctl(sock,  SIO_GET_EXTENSION_FUNCTION_POINTER,
652 		 &GUIDConnectEx, sizeof(GUIDConnectEx),
653 		 &ISCConnectEx, sizeof(ISCConnectEx),
654 		 &dwBytes, NULL, NULL);
655 	INSIST(err == 0);
656 
657 	err = WSAIoctl(sock,  SIO_GET_EXTENSION_FUNCTION_POINTER,
658 		 &GUIDAcceptEx, sizeof(GUIDAcceptEx),
659 		 &ISCAcceptEx, sizeof(ISCAcceptEx),
660 		 &dwBytes, NULL, NULL);
661 	INSIST(err == 0);
662 
663 	err = WSAIoctl(sock,  SIO_GET_EXTENSION_FUNCTION_POINTER,
664 		 &GUIDGetAcceptExSockaddrs, sizeof(GUIDGetAcceptExSockaddrs),
665 		 &ISCGetAcceptExSockaddrs, sizeof(ISCGetAcceptExSockaddrs),
666 		 &dwBytes, NULL, NULL);
667 	INSIST(err == 0);
668 
669 	closesocket(sock);
670 
671 	initialised = true;
672 }
673 
674 /*
675  * Initialize socket services
676  */
677 void
InitSockets(void)678 InitSockets(void) {
679 	RUNTIME_CHECK(isc_once_do(&initialise_once,
680 				  initialise) == ISC_R_SUCCESS);
681 	if (!initialised)
682 		exit(1);
683 }
684 
685 int
internal_sendmsg(isc_socket_t * sock,IoCompletionInfo * lpo,struct msghdr * messagehdr,int flags,int * Error)686 internal_sendmsg(isc_socket_t *sock, IoCompletionInfo *lpo,
687 		 struct msghdr *messagehdr, int flags, int *Error)
688 {
689 	int Result;
690 	DWORD BytesSent;
691 	DWORD Flags = flags;
692 	int total_sent;
693 
694 	*Error = 0;
695 	Result = WSASendTo(sock->fd, messagehdr->msg_iov,
696 			   messagehdr->msg_iovlen, &BytesSent,
697 			   Flags, (SOCKADDR *)&messagehdr->to_addr,
698 			   messagehdr->to_addr_len, (LPWSAOVERLAPPED)lpo,
699 			   NULL);
700 
701 	total_sent = (int)BytesSent;
702 
703 	/* Check for errors.*/
704 	if (Result == SOCKET_ERROR) {
705 		*Error = WSAGetLastError();
706 
707 		switch (*Error) {
708 		case WSA_IO_INCOMPLETE:
709 		case WSA_WAIT_IO_COMPLETION:
710 		case WSA_IO_PENDING:
711 		case NO_ERROR:		/* Strange, but okay */
712 			sock->pending_iocp++;
713 			sock->pending_send++;
714 			break;
715 
716 		default:
717 			return (-1);
718 			break;
719 		}
720 	} else {
721 		sock->pending_iocp++;
722 		sock->pending_send++;
723 	}
724 
725 	if (lpo != NULL)
726 		return (0);
727 	else
728 		return (total_sent);
729 }
730 
731 static void
queue_receive_request(isc_socket_t * sock)732 queue_receive_request(isc_socket_t *sock) {
733 	DWORD Flags = 0;
734 	DWORD NumBytes = 0;
735 	int Result;
736 	int Error;
737 	int need_retry;
738 	WSABUF iov[1];
739 	IoCompletionInfo *lpo = NULL;
740 	isc_result_t isc_result;
741 
742  retry:
743 	need_retry = false;
744 
745 	/*
746 	 * If we already have a receive pending, do nothing.
747 	 */
748 	if (sock->pending_recv > 0) {
749 		if (lpo != NULL)
750 			HeapFree(hHeapHandle, 0, lpo);
751 		return;
752 	}
753 
754 	/*
755 	 * If no one is waiting, do nothing.
756 	 */
757 	if (ISC_LIST_EMPTY(sock->recv_list)) {
758 		if (lpo != NULL)
759 			HeapFree(hHeapHandle, 0, lpo);
760 		return;
761 	}
762 
763 	INSIST(sock->recvbuf.remaining == 0);
764 	INSIST(sock->fd != INVALID_SOCKET);
765 
766 	iov[0].len = sock->recvbuf.len;
767 	iov[0].buf = sock->recvbuf.base;
768 
769 	if (lpo == NULL) {
770 		lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
771 						    HEAP_ZERO_MEMORY,
772 						    sizeof(IoCompletionInfo));
773 		RUNTIME_CHECK(lpo != NULL);
774 	} else
775 		ZeroMemory(lpo, sizeof(IoCompletionInfo));
776 	lpo->request_type = SOCKET_RECV;
777 
778 	sock->recvbuf.from_addr_len = sizeof(sock->recvbuf.from_addr);
779 
780 	Error = 0;
781 	Result = WSARecvFrom((SOCKET)sock->fd, iov, 1,
782 			     &NumBytes, &Flags,
783 			     (SOCKADDR *)&sock->recvbuf.from_addr,
784 			     &sock->recvbuf.from_addr_len,
785 			     (LPWSAOVERLAPPED)lpo, NULL);
786 
787 	/* Check for errors. */
788 	if (Result == SOCKET_ERROR) {
789 		Error = WSAGetLastError();
790 
791 		switch (Error) {
792 		case WSA_IO_PENDING:
793 			sock->pending_iocp++;
794 			sock->pending_recv++;
795 			break;
796 
797 		/* direct error: no completion event */
798 		case ERROR_HOST_UNREACHABLE:
799 		case WSAENETRESET:
800 		case WSAECONNRESET:
801 			if (!sock->connected) {
802 				/* soft error */
803 				need_retry = true;
804 				break;
805 			}
806 			/* FALLTHROUGH */
807 
808 		default:
809 			isc_result = isc__errno2result(Error);
810 			if (isc_result == ISC_R_UNEXPECTED)
811 				UNEXPECTED_ERROR(__FILE__, __LINE__,
812 					"WSARecvFrom: Windows error code: %d, isc result %d",
813 					Error, isc_result);
814 			send_recvdone_abort(sock, isc_result);
815 			HeapFree(hHeapHandle, 0, lpo);
816 			lpo = NULL;
817 			break;
818 		}
819 	} else {
820 		/*
821 		 * The recv() finished immediately, but we will still get
822 		 * a completion event.  Rather than duplicate code, let
823 		 * that thread handle sending the data along its way.
824 		 */
825 		sock->pending_iocp++;
826 		sock->pending_recv++;
827 	}
828 
829 	socket_log(__LINE__, sock, NULL, IOEVENT,
830 		   isc_msgcat, ISC_MSGSET_SOCKET,
831 		   ISC_MSG_DOIORECV,
832 		   "queue_io_request: fd %d result %d error %d",
833 		   sock->fd, Result, Error);
834 
835 	CONSISTENT(sock);
836 
837 	if (need_retry)
838 		goto retry;
839 }
840 
841 static void
manager_log(isc_socketmgr_t * sockmgr,isc_logcategory_t * category,isc_logmodule_t * module,int level,const char * fmt,...)842 manager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category,
843 	    isc_logmodule_t *module, int level, const char *fmt, ...)
844 {
845 	char msgbuf[2048];
846 	va_list ap;
847 
848 	if (!isc_log_wouldlog(isc_lctx, level))
849 		return;
850 
851 	va_start(ap, fmt);
852 	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
853 	va_end(ap);
854 
855 	isc_log_write(isc_lctx, category, module, level,
856 		      "sockmgr %p: %s", sockmgr, msgbuf);
857 }
858 
859 static void
socket_log(int lineno,isc_socket_t * sock,isc_sockaddr_t * address,isc_logcategory_t * category,isc_logmodule_t * module,int level,isc_msgcat_t * msgcat,int msgset,int message,const char * fmt,...)860 socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
861 	   isc_logcategory_t *category, isc_logmodule_t *module, int level,
862 	   isc_msgcat_t *msgcat, int msgset, int message,
863 	   const char *fmt, ...)
864 {
865 	char msgbuf[2048];
866 	char peerbuf[256];
867 	va_list ap;
868 
869 
870 	if (!isc_log_wouldlog(isc_lctx, level))
871 		return;
872 
873 	va_start(ap, fmt);
874 	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
875 	va_end(ap);
876 
877 	if (address == NULL) {
878 		isc_log_iwrite(isc_lctx, category, module, level,
879 			       msgcat, msgset, message,
880 			       "socket %p line %d: %s", sock, lineno, msgbuf);
881 	} else {
882 		isc_sockaddr_format(address, peerbuf, sizeof(peerbuf));
883 		isc_log_iwrite(isc_lctx, category, module, level,
884 			       msgcat, msgset, message,
885 				   "socket %p line %d %s: %s", sock, lineno,
886 				   peerbuf, msgbuf);
887 	}
888 
889 }
890 
891 /*
892  * Make an fd SOCKET non-blocking.
893  */
894 static isc_result_t
make_nonblock(SOCKET fd)895 make_nonblock(SOCKET fd) {
896 	int ret;
897 	unsigned long flags = 1;
898 	char strbuf[ISC_STRERRORSIZE];
899 
900 	/* Set the socket to non-blocking */
901 	ret = ioctlsocket(fd, FIONBIO, &flags);
902 
903 	if (ret == -1) {
904 		isc__strerror(errno, strbuf, sizeof(strbuf));
905 		UNEXPECTED_ERROR(__FILE__, __LINE__,
906 				 "ioctlsocket(%d, FIOBIO, %d): %s",
907 				 fd, flags, strbuf);
908 
909 		return (ISC_R_UNEXPECTED);
910 	}
911 
912 	return (ISC_R_SUCCESS);
913 }
914 
915 /*
916  * Windows 2000 systems incorrectly cause UDP sockets using WSARecvFrom
917  * to not work correctly, returning a WSACONNRESET error when a WSASendTo
918  * fails with an "ICMP port unreachable" response and preventing the
919  * socket from using the WSARecvFrom in subsequent operations.
920  * The function below fixes this, but requires that Windows 2000
921  * Service Pack 2 or later be installed on the system.  NT 4.0
922  * systems are not affected by this and work correctly.
923  * See Microsoft Knowledge Base Article Q263823 for details of this.
924  */
925 isc_result_t
connection_reset_fix(SOCKET fd)926 connection_reset_fix(SOCKET fd) {
927 	DWORD dwBytesReturned = 0;
928 	BOOL  bNewBehavior = FALSE;
929 	DWORD status;
930 
931 	if (isc_win32os_versioncheck(5, 0, 0, 0) < 0)
932 		return (ISC_R_SUCCESS); /*  NT 4.0 has no problem */
933 
934 	/* disable bad behavior using IOCTL: SIO_UDP_CONNRESET */
935 	status = WSAIoctl(fd, SIO_UDP_CONNRESET, &bNewBehavior,
936 			  sizeof(bNewBehavior), NULL, 0,
937 			  &dwBytesReturned, NULL, NULL);
938 	if (status != SOCKET_ERROR)
939 		return (ISC_R_SUCCESS);
940 	else {
941 		UNEXPECTED_ERROR(__FILE__, __LINE__,
942 				 "WSAIoctl(SIO_UDP_CONNRESET, oldBehaviour) %s",
943 				 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
944 						ISC_MSG_FAILED, "failed"));
945 		return (ISC_R_UNEXPECTED);
946 	}
947 }
948 
949 /*
950  * Construct an iov array and attach it to the msghdr passed in.  This is
951  * the SEND constructor, which will use the used region of the buffer
952  * (if using a buffer list) or will use the internal region (if a single
953  * buffer I/O is requested).
954  *
955  * Nothing can be NULL, and the done event must list at least one buffer
956  * on the buffer linked list for this function to be meaningful.
957  */
958 static void
build_msghdr_send(isc_socket_t * sock,isc_socketevent_t * dev,struct msghdr * msg,char * cmsg,WSABUF * iov,IoCompletionInfo * lpo)959 build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev,
960 		  struct msghdr *msg, char *cmsg, WSABUF *iov,
961 		  IoCompletionInfo  *lpo)
962 {
963 	unsigned int iovcount;
964 	isc_buffer_t *buffer;
965 	buflist_t  *cpbuffer;
966 	isc_region_t used;
967 	size_t write_count;
968 	size_t skip_count;
969 
970 	memset(msg, 0, sizeof(*msg));
971 
972 	memmove(&msg->to_addr, &dev->address.type, dev->address.length);
973 	msg->to_addr_len = dev->address.length;
974 
975 	buffer = ISC_LIST_HEAD(dev->bufferlist);
976 	write_count = 0;
977 	iovcount = 0;
978 
979 	/*
980 	 * Single buffer I/O?  Skip what we've done so far in this region.
981 	 */
982 	if (buffer == NULL) {
983 		write_count = dev->region.length - dev->n;
984 		cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
985 		RUNTIME_CHECK(cpbuffer != NULL);
986 		cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, write_count);
987 		RUNTIME_CHECK(cpbuffer->buf != NULL);
988 
989 		socket_log(__LINE__, sock, NULL, TRACE,
990 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
991 		   "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
992 		   cpbuffer->buf, write_count);
993 
994 		memmove(cpbuffer->buf,(dev->region.base + dev->n), write_count);
995 		cpbuffer->buflen = (unsigned int)write_count;
996 		ISC_LINK_INIT(cpbuffer, link);
997 		ISC_LIST_ENQUEUE(lpo->bufferlist, cpbuffer, link);
998 		iov[0].buf = cpbuffer->buf;
999 		iov[0].len = (u_long)write_count;
1000 		iovcount = 1;
1001 
1002 		goto config;
1003 	}
1004 
1005 	/*
1006 	 * Multibuffer I/O.
1007 	 * Skip the data in the buffer list that we have already written.
1008 	 */
1009 	skip_count = dev->n;
1010 	while (buffer != NULL) {
1011 		REQUIRE(ISC_BUFFER_VALID(buffer));
1012 		if (skip_count < isc_buffer_usedlength(buffer))
1013 			break;
1014 		skip_count -= isc_buffer_usedlength(buffer);
1015 		buffer = ISC_LIST_NEXT(buffer, link);
1016 	}
1017 
1018 	while (buffer != NULL) {
1019 		INSIST(iovcount < MAXSCATTERGATHER_SEND);
1020 
1021 		isc_buffer_usedregion(buffer, &used);
1022 
1023 		if (used.length > 0) {
1024 			int uselen = (int)(used.length - skip_count);
1025 			cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
1026 			RUNTIME_CHECK(cpbuffer != NULL);
1027 			cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, uselen);
1028 			RUNTIME_CHECK(cpbuffer->buf != NULL);
1029 
1030 			socket_log(__LINE__, sock, NULL, TRACE,
1031 			   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
1032 			   "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
1033 			   cpbuffer->buf, write_count);
1034 
1035 			memmove(cpbuffer->buf,(used.base + skip_count), uselen);
1036 			cpbuffer->buflen = uselen;
1037 			iov[iovcount].buf = cpbuffer->buf;
1038 			iov[iovcount].len = (u_long)(used.length - skip_count);
1039 			write_count += uselen;
1040 			skip_count = 0;
1041 			iovcount++;
1042 		}
1043 		buffer = ISC_LIST_NEXT(buffer, link);
1044 	}
1045 
1046 	INSIST(skip_count == 0);
1047 
1048  config:
1049 	msg->msg_iov = iov;
1050 	msg->msg_iovlen = iovcount;
1051 	msg->msg_totallen = (u_int)write_count;
1052 }
1053 
1054 static void
set_dev_address(isc_sockaddr_t * address,isc_socket_t * sock,isc_socketevent_t * dev)1055 set_dev_address(isc_sockaddr_t *address, isc_socket_t *sock,
1056 		isc_socketevent_t *dev)
1057 {
1058 	if (sock->type == isc_sockettype_udp) {
1059 		if (address != NULL)
1060 			dev->address = *address;
1061 		else
1062 			dev->address = sock->address;
1063 	} else if (sock->type == isc_sockettype_tcp) {
1064 		INSIST(address == NULL);
1065 		dev->address = sock->address;
1066 	}
1067 }
1068 
1069 static void
destroy_socketevent(isc_event_t * event)1070 destroy_socketevent(isc_event_t *event) {
1071 	isc_socketevent_t *ev = (isc_socketevent_t *)event;
1072 
1073 	INSIST(ISC_LIST_EMPTY(ev->bufferlist));
1074 
1075 	(ev->destroy)(event);
1076 }
1077 
1078 static isc_socketevent_t *
allocate_socketevent(isc_mem_t * mctx,isc_socket_t * sock,isc_eventtype_t eventtype,isc_taskaction_t action,void * arg)1079 allocate_socketevent(isc_mem_t *mctx, isc_socket_t *sock,
1080 		     isc_eventtype_t eventtype, isc_taskaction_t action,
1081 		     void *arg)
1082 {
1083 	isc_socketevent_t *ev;
1084 
1085 	ev = (isc_socketevent_t *)isc_event_allocate(mctx, sock, eventtype,
1086 						     action, arg,
1087 						     sizeof(*ev));
1088 	if (ev == NULL)
1089 		return (NULL);
1090 
1091 	ev->result = ISC_R_IOERROR; // XXXMLG temporary change to detect failure to set
1092 	ISC_LINK_INIT(ev, ev_link);
1093 	ISC_LIST_INIT(ev->bufferlist);
1094 	ev->region.base = NULL;
1095 	ev->n = 0;
1096 	ev->offset = 0;
1097 	ev->attributes = 0;
1098 	ev->destroy = ev->ev_destroy;
1099 	ev->ev_destroy = destroy_socketevent;
1100 	ev->dscp = 0;
1101 
1102 	return (ev);
1103 }
1104 
1105 #if defined(ISC_SOCKET_DEBUG)
1106 static void
dump_msg(struct msghdr * msg,isc_socket_t * sock)1107 dump_msg(struct msghdr *msg, isc_socket_t *sock) {
1108 	unsigned int i;
1109 
1110 	printf("MSGHDR %p, Socket #: %Iu\n", msg, sock->fd);
1111 	printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen);
1112 	printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen);
1113 	for (i = 0; i < (unsigned int)msg->msg_iovlen; i++)
1114 		printf("\t\t%u\tbase %p, len %u\n", i,
1115 		       msg->msg_iov[i].buf, msg->msg_iov[i].len);
1116 }
1117 #endif
1118 
1119 /*
1120  * map the error code
1121  */
1122 int
map_socket_error(isc_socket_t * sock,int windows_errno,int * isc_errno,char * errorstring,size_t bufsize)1123 map_socket_error(isc_socket_t *sock, int windows_errno, int *isc_errno,
1124 		 char *errorstring, size_t bufsize) {
1125 
1126 	int doreturn;
1127 	switch (windows_errno) {
1128 	case WSAECONNREFUSED:
1129 		*isc_errno = ISC_R_CONNREFUSED;
1130 		if (sock->connected)
1131 			doreturn = DOIO_HARD;
1132 		else
1133 			doreturn = DOIO_SOFT;
1134 		break;
1135 	case WSAENETUNREACH:
1136 	case ERROR_NETWORK_UNREACHABLE:
1137 		*isc_errno = ISC_R_NETUNREACH;
1138 		if (sock->connected)
1139 			doreturn = DOIO_HARD;
1140 		else
1141 			doreturn = DOIO_SOFT;
1142 		break;
1143 	case ERROR_PORT_UNREACHABLE:
1144 	case ERROR_HOST_UNREACHABLE:
1145 	case WSAEHOSTUNREACH:
1146 		*isc_errno = ISC_R_HOSTUNREACH;
1147 		if (sock->connected)
1148 			doreturn = DOIO_HARD;
1149 		else
1150 			doreturn = DOIO_SOFT;
1151 		break;
1152 	case WSAENETDOWN:
1153 		*isc_errno = ISC_R_NETDOWN;
1154 		if (sock->connected)
1155 			doreturn = DOIO_HARD;
1156 		else
1157 			doreturn = DOIO_SOFT;
1158 		break;
1159 	case WSAEHOSTDOWN:
1160 		*isc_errno = ISC_R_HOSTDOWN;
1161 		if (sock->connected)
1162 			doreturn = DOIO_HARD;
1163 		else
1164 			doreturn = DOIO_SOFT;
1165 		break;
1166 	case WSAEACCES:
1167 		*isc_errno = ISC_R_NOPERM;
1168 		if (sock->connected)
1169 			doreturn = DOIO_HARD;
1170 		else
1171 			doreturn = DOIO_SOFT;
1172 		break;
1173 	case WSAECONNRESET:
1174 	case WSAENETRESET:
1175 	case WSAECONNABORTED:
1176 	case WSAEDISCON:
1177 		*isc_errno = ISC_R_CONNECTIONRESET;
1178 		if (sock->connected)
1179 			doreturn = DOIO_HARD;
1180 		else
1181 			doreturn = DOIO_SOFT;
1182 		break;
1183 	case WSAENOTCONN:
1184 		*isc_errno = ISC_R_NOTCONNECTED;
1185 		if (sock->connected)
1186 			doreturn = DOIO_HARD;
1187 		else
1188 			doreturn = DOIO_SOFT;
1189 		break;
1190 	case ERROR_OPERATION_ABORTED:
1191 	case ERROR_CONNECTION_ABORTED:
1192 	case ERROR_REQUEST_ABORTED:
1193 		*isc_errno = ISC_R_CONNECTIONRESET;
1194 		doreturn = DOIO_HARD;
1195 		break;
1196 	case WSAENOBUFS:
1197 		*isc_errno = ISC_R_NORESOURCES;
1198 		doreturn = DOIO_HARD;
1199 		break;
1200 	case WSAEAFNOSUPPORT:
1201 		*isc_errno = ISC_R_FAMILYNOSUPPORT;
1202 		doreturn = DOIO_HARD;
1203 		break;
1204 	case WSAEADDRNOTAVAIL:
1205 		*isc_errno = ISC_R_ADDRNOTAVAIL;
1206 		doreturn = DOIO_HARD;
1207 		break;
1208 	case WSAEDESTADDRREQ:
1209 		*isc_errno = ISC_R_BADADDRESSFORM;
1210 		doreturn = DOIO_HARD;
1211 		break;
1212 	case ERROR_NETNAME_DELETED:
1213 		*isc_errno = ISC_R_NETDOWN;
1214 		doreturn = DOIO_HARD;
1215 		break;
1216 	default:
1217 		*isc_errno = ISC_R_IOERROR;
1218 		doreturn = DOIO_HARD;
1219 		break;
1220 	}
1221 	if (doreturn == DOIO_HARD) {
1222 		isc__strerror(windows_errno, errorstring, bufsize);
1223 	}
1224 	return (doreturn);
1225 }
1226 
1227 static void
fill_recv(isc_socket_t * sock,isc_socketevent_t * dev)1228 fill_recv(isc_socket_t *sock, isc_socketevent_t *dev) {
1229 	isc_region_t r;
1230 	int copylen;
1231 	isc_buffer_t *buffer;
1232 
1233 	INSIST(dev->n < dev->minimum);
1234 	INSIST(sock->recvbuf.remaining > 0);
1235 	INSIST(sock->pending_recv == 0);
1236 
1237 	if (sock->type == isc_sockettype_udp) {
1238 		dev->address.length = sock->recvbuf.from_addr_len;
1239 		memmove(&dev->address.type, &sock->recvbuf.from_addr,
1240 			sock->recvbuf.from_addr_len);
1241 		if (isc_sockaddr_getport(&dev->address) == 0) {
1242 			if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
1243 				socket_log(__LINE__, sock, &dev->address,
1244 					   IOEVENT, isc_msgcat,
1245 					   ISC_MSGSET_SOCKET, ISC_MSG_ZEROPORT,
1246 					   "dropping source port zero packet");
1247 			}
1248 			sock->recvbuf.remaining = 0;
1249 			return;
1250 		}
1251 		/*
1252 		 * Simulate a firewall blocking UDP responses bigger than
1253 		 * 'maxudp' bytes.
1254 		 */
1255 		if (sock->manager->maxudp != 0 &&
1256 		    sock->recvbuf.remaining > sock->manager->maxudp)
1257 		{
1258 			sock->recvbuf.remaining = 0;
1259 			return;
1260 		}
1261 	} else if (sock->type == isc_sockettype_tcp) {
1262 		dev->address = sock->address;
1263 	}
1264 
1265 	/*
1266 	 * Run through the list of buffers we were given, and find the
1267 	 * first one with space.  Once it is found, loop through, filling
1268 	 * the buffers as much as possible.
1269 	 */
1270 	buffer = ISC_LIST_HEAD(dev->bufferlist);
1271 	if (buffer != NULL) { // Multi-buffer receive
1272 		while (buffer != NULL && sock->recvbuf.remaining > 0) {
1273 			REQUIRE(ISC_BUFFER_VALID(buffer));
1274 			if (isc_buffer_availablelength(buffer) > 0) {
1275 				isc_buffer_availableregion(buffer, &r);
1276 				copylen = min(r.length,
1277 					      sock->recvbuf.remaining);
1278 				memmove(r.base, sock->recvbuf.consume_position,
1279 					copylen);
1280 				sock->recvbuf.consume_position += copylen;
1281 				sock->recvbuf.remaining -= copylen;
1282 				isc_buffer_add(buffer, copylen);
1283 				dev->n += copylen;
1284 			}
1285 			buffer = ISC_LIST_NEXT(buffer, link);
1286 		}
1287 	} else { // Single-buffer receive
1288 		copylen = min(dev->region.length - dev->n, sock->recvbuf.remaining);
1289 		memmove(dev->region.base + dev->n,
1290 			sock->recvbuf.consume_position, copylen);
1291 		sock->recvbuf.consume_position += copylen;
1292 		sock->recvbuf.remaining -= copylen;
1293 		dev->n += copylen;
1294 	}
1295 
1296 	/*
1297 	 * UDP receives are all-consuming.  That is, if we have 4k worth of
1298 	 * data in our receive buffer, and the caller only gave us
1299 	 * 1k of space, we will toss the remaining 3k of data.  TCP
1300 	 * will keep the extra data around and use it for later requests.
1301 	 */
1302 	if (sock->type == isc_sockettype_udp)
1303 		sock->recvbuf.remaining = 0;
1304 }
1305 
1306 /*
1307  * Copy out as much data from the internal buffer to done events.
1308  * As each done event is filled, send it along its way.
1309  */
1310 static void
completeio_recv(isc_socket_t * sock)1311 completeio_recv(isc_socket_t *sock)
1312 {
1313 	isc_socketevent_t *dev;
1314 
1315 	/*
1316 	 * If we are in the process of filling our buffer, we cannot
1317 	 * touch it yet, so don't.
1318 	 */
1319 	if (sock->pending_recv > 0)
1320 		return;
1321 
1322 	while (sock->recvbuf.remaining > 0 && !ISC_LIST_EMPTY(sock->recv_list)) {
1323 		dev = ISC_LIST_HEAD(sock->recv_list);
1324 
1325 		/*
1326 		 * See if we have sufficient data in our receive buffer
1327 		 * to handle this.  If we do, copy out the data.
1328 		 */
1329 		fill_recv(sock, dev);
1330 
1331 		/*
1332 		 * Did we satisfy it?
1333 		 */
1334 		if (dev->n >= dev->minimum) {
1335 			dev->result = ISC_R_SUCCESS;
1336 			send_recvdone_event(sock, &dev);
1337 		}
1338 	}
1339 }
1340 
1341 /*
1342  * Returns:
1343  *	DOIO_SUCCESS	The operation succeeded.  dev->result contains
1344  *			ISC_R_SUCCESS.
1345  *
1346  *	DOIO_HARD	A hard or unexpected I/O error was encountered.
1347  *			dev->result contains the appropriate error.
1348  *
1349  *	DOIO_SOFT	A soft I/O error was encountered.  No senddone
1350  *			event was sent.  The operation should be retried.
1351  *
1352  *	No other return values are possible.
1353  */
1354 static int
completeio_send(isc_socket_t * sock,isc_socketevent_t * dev,struct msghdr * messagehdr,int cc,int send_errno)1355 completeio_send(isc_socket_t *sock, isc_socketevent_t *dev,
1356 		struct msghdr *messagehdr, int cc, int send_errno)
1357 {
1358 	char strbuf[ISC_STRERRORSIZE];
1359 
1360 	if (send_errno != 0) {
1361 		if (SOFT_ERROR(send_errno))
1362 			return (DOIO_SOFT);
1363 
1364 		return (map_socket_error(sock, send_errno, &dev->result,
1365 			strbuf, sizeof(strbuf)));
1366 	}
1367 
1368 	/*
1369 	 * If we write less than we expected, update counters, poke.
1370 	 */
1371 	dev->n += cc;
1372 	if (cc != messagehdr->msg_totallen)
1373 		return (DOIO_SOFT);
1374 
1375 	/*
1376 	 * Exactly what we wanted to write.  We're done with this
1377 	 * entry.  Post its completion event.
1378 	 */
1379 	dev->result = ISC_R_SUCCESS;
1380 	return (DOIO_SUCCESS);
1381 }
1382 
1383 static int
startio_send(isc_socket_t * sock,isc_socketevent_t * dev,int * nbytes,int * send_errno)1384 startio_send(isc_socket_t *sock, isc_socketevent_t *dev, int *nbytes,
1385 	     int *send_errno)
1386 {
1387 	char *cmsg = NULL;
1388 	char strbuf[ISC_STRERRORSIZE];
1389 	IoCompletionInfo *lpo;
1390 	int status;
1391 	struct msghdr *mh;
1392 
1393 	/*
1394 	 * Simulate a firewall blocking UDP responses bigger than
1395 	 * 'maxudp' bytes.
1396 	 */
1397 	if (sock->type == isc_sockettype_udp &&
1398 	    sock->manager->maxudp != 0 &&
1399 	    dev->region.length - dev->n > sock->manager->maxudp)
1400 	{
1401 		*nbytes = dev->region.length - dev->n;
1402 		return (DOIO_SUCCESS);
1403 	}
1404 
1405 	lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
1406 					    HEAP_ZERO_MEMORY,
1407 					    sizeof(IoCompletionInfo));
1408 	RUNTIME_CHECK(lpo != NULL);
1409 	lpo->request_type = SOCKET_SEND;
1410 	lpo->dev = dev;
1411 	mh = &lpo->messagehdr;
1412 	memset(mh, 0, sizeof(struct msghdr));
1413 	ISC_LIST_INIT(lpo->bufferlist);
1414 
1415 	build_msghdr_send(sock, dev, mh, cmsg, sock->iov, lpo);
1416 
1417 	*nbytes = internal_sendmsg(sock, lpo, mh, 0, send_errno);
1418 
1419 	if (*nbytes <= 0) {
1420 		/*
1421 		 * I/O has been initiated
1422 		 * completion will be through the completion port
1423 		 */
1424 		if (PENDING_ERROR(*send_errno)) {
1425 			status = DOIO_PENDING;
1426 			goto done;
1427 		}
1428 
1429 		if (SOFT_ERROR(*send_errno)) {
1430 			status = DOIO_SOFT;
1431 			goto done;
1432 		}
1433 
1434 		/*
1435 		 * If we got this far then something is wrong
1436 		 */
1437 		if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
1438 			isc__strerror(*send_errno, strbuf, sizeof(strbuf));
1439 			socket_log(__LINE__, sock, NULL, IOEVENT,
1440 				   isc_msgcat, ISC_MSGSET_SOCKET,
1441 				   ISC_MSG_INTERNALSEND,
1442 				   "startio_send: internal_sendmsg(%d) %d "
1443 				   "bytes, err %d/%s",
1444 				   sock->fd, *nbytes, *send_errno, strbuf);
1445 		}
1446 		status = DOIO_HARD;
1447 		goto done;
1448 	}
1449 	dev->result = ISC_R_SUCCESS;
1450 	status = DOIO_SOFT;
1451  done:
1452 	_set_state(sock, SOCK_DATA);
1453 	return (status);
1454 }
1455 
1456 static void
use_min_mtu(isc_socket_t * sock)1457 use_min_mtu(isc_socket_t *sock) {
1458 #ifdef IPV6_USE_MIN_MTU
1459 	/* use minimum MTU */
1460 	if (sock->pf == AF_INET6) {
1461 		int on = 1;
1462 		(void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
1463 				(void *)&on, sizeof(on));
1464 	}
1465 #else
1466 	UNUSED(sock);
1467 #endif
1468 }
1469 
1470 static isc_result_t
allocate_socket(isc_socketmgr_t * manager,isc_sockettype_t type,isc_socket_t ** socketp)1471 allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
1472 		isc_socket_t **socketp)
1473 {
1474 	isc_socket_t *sock;
1475 	isc_result_t result;
1476 
1477 	sock = isc_mem_get(manager->mctx, sizeof(*sock));
1478 
1479 	if (sock == NULL)
1480 		return (ISC_R_NOMEMORY);
1481 
1482 	sock->magic = 0;
1483 	sock->references = 0;
1484 
1485 	sock->manager = manager;
1486 	sock->type = type;
1487 	sock->fd = INVALID_SOCKET;
1488 
1489 	ISC_LINK_INIT(sock, link);
1490 
1491 	/*
1492 	 * Set up list of readers and writers to be initially empty.
1493 	 */
1494 	ISC_LIST_INIT(sock->recv_list);
1495 	ISC_LIST_INIT(sock->send_list);
1496 	ISC_LIST_INIT(sock->accept_list);
1497 	ISC_LIST_INIT(sock->connect_list);
1498 	sock->pending_accept = 0;
1499 	sock->pending_recv = 0;
1500 	sock->pending_send = 0;
1501 	sock->pending_iocp = 0;
1502 	sock->listener = 0;
1503 	sock->connected = 0;
1504 	sock->pending_connect = 0;
1505 	sock->bound = 0;
1506 	sock->dupped = 0;
1507 	memset(sock->name, 0, sizeof(sock->name));	// zero the name field
1508 	_set_state(sock, SOCK_INITIALIZED);
1509 
1510 	sock->recvbuf.len = 65536;
1511 	sock->recvbuf.consume_position = sock->recvbuf.base;
1512 	sock->recvbuf.remaining = 0;
1513 	sock->recvbuf.base = isc_mem_get(manager->mctx, sock->recvbuf.len); // max buffer size
1514 	if (sock->recvbuf.base == NULL) {
1515 		result = ISC_R_NOMEMORY;
1516 		goto error;
1517 	}
1518 
1519 	/*
1520 	 * Initialize the lock.
1521 	 */
1522 	result = isc_mutex_init(&sock->lock);
1523 	if (result != ISC_R_SUCCESS)
1524 		goto error;
1525 
1526 	socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1527 		   "allocated");
1528 
1529 	sock->magic = SOCKET_MAGIC;
1530 	*socketp = sock;
1531 
1532 	return (ISC_R_SUCCESS);
1533 
1534  error:
1535 	if (sock->recvbuf.base != NULL) {
1536 		isc_mem_put(manager->mctx, sock->recvbuf.base,
1537 			    sock->recvbuf.len);
1538 	}
1539 	isc_mem_put(manager->mctx, sock, sizeof(*sock));
1540 	return (result);
1541 }
1542 
1543 /*
1544  * Verify that the socket state is consistent.
1545  */
1546 static void
consistent(isc_socket_t * sock)1547 consistent(isc_socket_t *sock) {
1548 
1549 	isc_socketevent_t *dev;
1550 	isc_socket_newconnev_t *nev;
1551 	unsigned int count;
1552 	char *crash_reason;
1553 	bool crash = false;
1554 
1555 	REQUIRE(sock->pending_iocp == sock->pending_recv + sock->pending_send
1556 		+ sock->pending_accept + sock->pending_connect);
1557 
1558 	dev = ISC_LIST_HEAD(sock->send_list);
1559 	count = 0;
1560 	while (dev != NULL) {
1561 		count++;
1562 		dev = ISC_LIST_NEXT(dev, ev_link);
1563 	}
1564 	if (count > sock->pending_send) {
1565 		crash = true;
1566 		crash_reason = "send_list > sock->pending_send";
1567 	}
1568 
1569 	nev = ISC_LIST_HEAD(sock->accept_list);
1570 	count = 0;
1571 	while (nev != NULL) {
1572 		count++;
1573 		nev = ISC_LIST_NEXT(nev, ev_link);
1574 	}
1575 	if (count > sock->pending_accept) {
1576 		crash = true;
1577 		crash_reason = "accept_list > sock->pending_accept";
1578 	}
1579 
1580 	if (crash) {
1581 		socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1582 			   ISC_MSG_DESTROYING, "SOCKET INCONSISTENT: %s",
1583 			   crash_reason);
1584 		sock_dump(sock);
1585 		INSIST(crash == false);
1586 	}
1587 }
1588 
1589 /*
1590  * Maybe free the socket.
1591  *
1592  * This function will verify that the socket is no longer in use in any way,
1593  * either internally or externally.  This is the only place where this
1594  * check is to be made; if some bit of code believes that IT is done with
1595  * the socket (e.g., some reference counter reaches zero), it should call
1596  * this function.
1597  *
1598  * When calling this function, the socket must be locked, and the manager
1599  * must be unlocked.
1600  *
1601  * When this function returns, *socketp will be NULL.  No tricks to try
1602  * to hold on to this pointer are allowed.
1603  */
1604 static void
maybe_free_socket(isc_socket_t ** socketp,int lineno)1605 maybe_free_socket(isc_socket_t **socketp, int lineno) {
1606 	isc_socket_t *sock = *socketp;
1607 	*socketp = NULL;
1608 
1609 	INSIST(VALID_SOCKET(sock));
1610 	CONSISTENT(sock);
1611 
1612 	if (sock->pending_iocp > 0
1613 	    || sock->pending_recv > 0
1614 	    || sock->pending_send > 0
1615 	    || sock->pending_accept > 0
1616 	    || sock->references > 0
1617 	    || sock->pending_connect == 1
1618 	    || !ISC_LIST_EMPTY(sock->recv_list)
1619 	    || !ISC_LIST_EMPTY(sock->send_list)
1620 	    || !ISC_LIST_EMPTY(sock->accept_list)
1621 	    || !ISC_LIST_EMPTY(sock->connect_list)
1622 	    || sock->fd != INVALID_SOCKET) {
1623 		UNLOCK(&sock->lock);
1624 		return;
1625 	}
1626 	UNLOCK(&sock->lock);
1627 
1628 	free_socket(&sock, lineno);
1629 }
1630 
1631 void
free_socket(isc_socket_t ** sockp,int lineno)1632 free_socket(isc_socket_t **sockp, int lineno) {
1633 	isc_socketmgr_t *manager;
1634 	isc_socket_t *sock = *sockp;
1635 	*sockp = NULL;
1636 
1637 	/*
1638 	 * Seems we can free the socket after all.
1639 	 */
1640 	manager = sock->manager;
1641 	socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat,
1642 		   ISC_MSGSET_SOCKET, ISC_MSG_DESTROYING,
1643 		   "freeing socket line %d fd %d lock %p semaphore %p",
1644 		   lineno, sock->fd, &sock->lock, sock->lock.LockSemaphore);
1645 
1646 	sock->magic = 0;
1647 	DESTROYLOCK(&sock->lock);
1648 
1649 	if (sock->recvbuf.base != NULL)
1650 		isc_mem_put(manager->mctx, sock->recvbuf.base,
1651 			    sock->recvbuf.len);
1652 
1653 	LOCK(&manager->lock);
1654 	if (ISC_LINK_LINKED(sock, link))
1655 		ISC_LIST_UNLINK(manager->socklist, sock, link);
1656 	isc_mem_put(manager->mctx, sock, sizeof(*sock));
1657 
1658 	if (ISC_LIST_EMPTY(manager->socklist))
1659 		SIGNAL(&manager->shutdown_ok);
1660 	UNLOCK(&manager->lock);
1661 }
1662 
1663 /*
1664  * Create a new 'type' socket managed by 'manager'.  Events
1665  * will be posted to 'task' and when dispatched 'action' will be
1666  * called with 'arg' as the arg value.  The new socket is returned
1667  * in 'socketp'.
1668  */
1669 static isc_result_t
socket_create(isc_socketmgr_t * manager,int pf,isc_sockettype_t type,isc_socket_t ** socketp,isc_socket_t * dup_socket)1670 socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
1671 	      isc_socket_t **socketp, isc_socket_t *dup_socket)
1672 {
1673 	isc_socket_t *sock = NULL;
1674 	isc_result_t result;
1675 #if defined(USE_CMSG)
1676 	int on = 1;
1677 #endif
1678 #if defined(SO_RCVBUF)
1679 	ISC_SOCKADDR_LEN_T optlen;
1680 	int size;
1681 #endif
1682 	int socket_errno;
1683 	char strbuf[ISC_STRERRORSIZE];
1684 
1685 	REQUIRE(VALID_MANAGER(manager));
1686 	REQUIRE(socketp != NULL && *socketp == NULL);
1687 	REQUIRE(type != isc_sockettype_fdwatch);
1688 
1689 #ifndef SOCK_RAW
1690 	if (type == isc_sockettype_raw)
1691 		return (ISC_R_NOTIMPLEMENTED);
1692 #endif
1693 
1694 	result = allocate_socket(manager, type, &sock);
1695 	if (result != ISC_R_SUCCESS)
1696 		return (result);
1697 
1698 	sock->pf = pf;
1699 	switch (type) {
1700 	case isc_sockettype_udp:
1701 		sock->fd = socket(pf, SOCK_DGRAM, IPPROTO_UDP);
1702 		if (sock->fd != INVALID_SOCKET) {
1703 			result = connection_reset_fix(sock->fd);
1704 			if (result != ISC_R_SUCCESS) {
1705 				socket_log(__LINE__, sock,
1706 					NULL, EVENT, NULL, 0, 0,
1707 					"closed %d %d %d "
1708 					"con_reset_fix_failed",
1709 					sock->pending_recv,
1710 					sock->pending_send,
1711 					sock->references);
1712 				closesocket(sock->fd);
1713 				_set_state(sock, SOCK_CLOSED);
1714 				sock->fd = INVALID_SOCKET;
1715 				free_socket(&sock, __LINE__);
1716 				return (result);
1717 			}
1718 		}
1719 		break;
1720 	case isc_sockettype_tcp:
1721 		sock->fd = socket(pf, SOCK_STREAM, IPPROTO_TCP);
1722 		break;
1723 #ifdef SOCK_RAW
1724 	case isc_sockettype_raw:
1725 		sock->fd = socket(pf, SOCK_RAW, 0);
1726 #ifdef PF_ROUTE
1727 		if (pf == PF_ROUTE)
1728 			sock->bound = 1;
1729 #endif
1730 		break;
1731 #endif
1732 	}
1733 
1734 	if (sock->fd == INVALID_SOCKET) {
1735 		socket_errno = WSAGetLastError();
1736 		free_socket(&sock, __LINE__);
1737 
1738 		switch (socket_errno) {
1739 		case WSAEMFILE:
1740 		case WSAENOBUFS:
1741 			return (ISC_R_NORESOURCES);
1742 
1743 		case WSAEPROTONOSUPPORT:
1744 		case WSAEPFNOSUPPORT:
1745 		case WSAEAFNOSUPPORT:
1746 			return (ISC_R_FAMILYNOSUPPORT);
1747 
1748 		default:
1749 			isc__strerror(socket_errno, strbuf, sizeof(strbuf));
1750 			UNEXPECTED_ERROR(__FILE__, __LINE__,
1751 					 "socket() %s: %s",
1752 					 isc_msgcat_get(isc_msgcat,
1753 							ISC_MSGSET_GENERAL,
1754 							ISC_MSG_FAILED,
1755 							"failed"),
1756 					 strbuf);
1757 			return (ISC_R_UNEXPECTED);
1758 		}
1759 	}
1760 
1761 	result = make_nonblock(sock->fd);
1762 	if (result != ISC_R_SUCCESS) {
1763 		socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1764 			"closed %d %d %d make_nonblock_failed",
1765 			sock->pending_recv, sock->pending_send,
1766 			sock->references);
1767 		closesocket(sock->fd);
1768 		sock->fd = INVALID_SOCKET;
1769 		free_socket(&sock, __LINE__);
1770 		return (result);
1771 	}
1772 
1773 	/*
1774 	 * Use minimum mtu if possible.
1775 	 */
1776 	use_min_mtu(sock);
1777 
1778 #if defined(USE_CMSG) || defined(SO_RCVBUF)
1779 	if (type == isc_sockettype_udp) {
1780 
1781 #if defined(USE_CMSG)
1782 #if defined(ISC_PLATFORM_HAVEIPV6)
1783 #ifdef IPV6_RECVPKTINFO
1784 		/* 2292bis */
1785 		if ((pf == AF_INET6)
1786 		    && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1787 				   (char *)&on, sizeof(on)) < 0)) {
1788 			isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
1789 			UNEXPECTED_ERROR(__FILE__, __LINE__,
1790 					 "setsockopt(%d, IPV6_RECVPKTINFO) "
1791 					 "%s: %s", sock->fd,
1792 					 isc_msgcat_get(isc_msgcat,
1793 							ISC_MSGSET_GENERAL,
1794 							ISC_MSG_FAILED,
1795 							"failed"),
1796 					 strbuf);
1797 		}
1798 #else
1799 		/* 2292 */
1800 		if ((pf == AF_INET6)
1801 		    && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO,
1802 				   (char *)&on, sizeof(on)) < 0)) {
1803 			isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
1804 			UNEXPECTED_ERROR(__FILE__, __LINE__,
1805 					 "setsockopt(%d, IPV6_PKTINFO) %s: %s",
1806 					 sock->fd,
1807 					 isc_msgcat_get(isc_msgcat,
1808 							ISC_MSGSET_GENERAL,
1809 							ISC_MSG_FAILED,
1810 							"failed"),
1811 					 strbuf);
1812 		}
1813 #endif /* IPV6_RECVPKTINFO */
1814 #endif /* ISC_PLATFORM_HAVEIPV6 */
1815 #endif /* defined(USE_CMSG) */
1816 
1817 #if defined(SO_RCVBUF)
1818 	       optlen = sizeof(size);
1819 	       if (getsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF,
1820 			      (char *)&size, &optlen) >= 0 &&
1821 		    size < RCVBUFSIZE) {
1822 		       size = RCVBUFSIZE;
1823 		       (void)setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF,
1824 					(char *)&size, sizeof(size));
1825 	       }
1826 #endif
1827 
1828 	}
1829 #endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */
1830 
1831 	_set_state(sock, SOCK_OPEN);
1832 	sock->references = 1;
1833 	*socketp = sock;
1834 
1835 	iocompletionport_update(sock);
1836 
1837 	if (dup_socket) {
1838 #ifndef ISC_ALLOW_MAPPED
1839 		isc__socket_ipv6only(sock, true);
1840 #endif
1841 
1842 		if (dup_socket->bound) {
1843 			isc_sockaddr_t local;
1844 
1845 			result = isc__socket_getsockname(dup_socket, &local);
1846 			if (result != ISC_R_SUCCESS) {
1847 				isc_socket_close(sock);
1848 				return (result);
1849 			}
1850 			result = isc__socket_bind(sock, &local,
1851 						  ISC_SOCKET_REUSEADDRESS);
1852 			if (result != ISC_R_SUCCESS) {
1853 				isc_socket_close(sock);
1854 				return (result);
1855 			}
1856 		}
1857 		sock->dupped = 1;
1858 	}
1859 
1860 	/*
1861 	 * Note we don't have to lock the socket like we normally would because
1862 	 * there are no external references to it yet.
1863 	 */
1864 	LOCK(&manager->lock);
1865 	ISC_LIST_APPEND(manager->socklist, sock, link);
1866 	InterlockedIncrement(&manager->totalSockets);
1867 	UNLOCK(&manager->lock);
1868 
1869 	socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat,
1870 		   ISC_MSGSET_SOCKET, ISC_MSG_CREATED,
1871 		   "created %u type %u", sock->fd, type);
1872 
1873 	return (ISC_R_SUCCESS);
1874 }
1875 
1876 isc_result_t
isc__socket_create(isc_socketmgr_t * manager,int pf,isc_sockettype_t type,isc_socket_t ** socketp)1877 isc__socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
1878 		   isc_socket_t **socketp)
1879 {
1880 	return (socket_create(manager, pf, type, socketp, NULL));
1881 }
1882 
1883 isc_result_t
isc__socket_dup(isc_socket_t * sock,isc_socket_t ** socketp)1884 isc__socket_dup(isc_socket_t *sock, isc_socket_t **socketp) {
1885 	REQUIRE(VALID_SOCKET(sock));
1886 	REQUIRE(socketp != NULL && *socketp == NULL);
1887 
1888 	return (socket_create(sock->manager, sock->pf, sock->type,
1889 			      socketp, sock));
1890 }
1891 
1892 isc_result_t
isc_socket_open(isc_socket_t * sock)1893 isc_socket_open(isc_socket_t *sock) {
1894 	REQUIRE(VALID_SOCKET(sock));
1895 	REQUIRE(sock->type != isc_sockettype_fdwatch);
1896 
1897 	return (ISC_R_NOTIMPLEMENTED);
1898 }
1899 
1900 /*
1901  * Attach to a socket.  Caller must explicitly detach when it is done.
1902  */
1903 void
isc__socket_attach(isc_socket_t * sock,isc_socket_t ** socketp)1904 isc__socket_attach(isc_socket_t *sock, isc_socket_t **socketp) {
1905 	REQUIRE(VALID_SOCKET(sock));
1906 	REQUIRE(socketp != NULL && *socketp == NULL);
1907 
1908 	LOCK(&sock->lock);
1909 	CONSISTENT(sock);
1910 	sock->references++;
1911 	UNLOCK(&sock->lock);
1912 
1913 	*socketp = sock;
1914 }
1915 
1916 /*
1917  * Dereference a socket.  If this is the last reference to it, clean things
1918  * up by destroying the socket.
1919  */
1920 void
isc__socket_detach(isc_socket_t ** socketp)1921 isc__socket_detach(isc_socket_t **socketp) {
1922 	isc_socket_t *sock;
1923 
1924 	REQUIRE(socketp != NULL);
1925 	sock = *socketp;
1926 	REQUIRE(VALID_SOCKET(sock));
1927 	REQUIRE(sock->type != isc_sockettype_fdwatch);
1928 
1929 	LOCK(&sock->lock);
1930 	CONSISTENT(sock);
1931 	REQUIRE(sock->references > 0);
1932 	sock->references--;
1933 
1934 	socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1935 		"detach_socket %d %d %d",
1936 		sock->pending_recv, sock->pending_send,
1937 		sock->references);
1938 
1939 	if (sock->references == 0 && sock->fd != INVALID_SOCKET) {
1940 		closesocket(sock->fd);
1941 		sock->fd = INVALID_SOCKET;
1942 		_set_state(sock, SOCK_CLOSED);
1943 	}
1944 
1945 	maybe_free_socket(&sock, __LINE__);
1946 
1947 	*socketp = NULL;
1948 }
1949 
1950 isc_result_t
isc_socket_close(isc_socket_t * sock)1951 isc_socket_close(isc_socket_t *sock) {
1952 	REQUIRE(VALID_SOCKET(sock));
1953 	REQUIRE(sock->type != isc_sockettype_fdwatch);
1954 
1955 	return (ISC_R_NOTIMPLEMENTED);
1956 }
1957 
1958 /*
1959  * Dequeue an item off the given socket's read queue, set the result code
1960  * in the done event to the one provided, and send it to the task it was
1961  * destined for.
1962  *
1963  * If the event to be sent is on a list, remove it before sending.  If
1964  * asked to, send and detach from the task as well.
1965  *
1966  * Caller must have the socket locked if the event is attached to the socket.
1967  */
1968 static void
send_recvdone_event(isc_socket_t * sock,isc_socketevent_t ** dev)1969 send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1970 	isc_task_t *task;
1971 
1972 	task = (*dev)->ev_sender;
1973 	(*dev)->ev_sender = sock;
1974 
1975 	if (ISC_LINK_LINKED(*dev, ev_link))
1976 		ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link);
1977 
1978 	if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) != 0) {
1979 		isc_task_sendanddetach(&task, (isc_event_t **)dev);
1980 	} else {
1981 		isc_task_send(task, (isc_event_t **)dev);
1982 	}
1983 
1984 	CONSISTENT(sock);
1985 }
1986 
1987 /*
1988  * See comments for send_recvdone_event() above.
1989  */
1990 static void
send_senddone_event(isc_socket_t * sock,isc_socketevent_t ** dev)1991 send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1992 	isc_task_t *task;
1993 
1994 	INSIST(dev != NULL && *dev != NULL);
1995 
1996 	task = (*dev)->ev_sender;
1997 	(*dev)->ev_sender = sock;
1998 
1999 	if (ISC_LINK_LINKED(*dev, ev_link))
2000 		ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link);
2001 
2002 	if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) != 0) {
2003 		isc_task_sendanddetach(&task, (isc_event_t **)dev);
2004 	} else {
2005 		isc_task_send(task, (isc_event_t **)dev);
2006 	}
2007 
2008 	CONSISTENT(sock);
2009 }
2010 
2011 /*
2012  * See comments for send_recvdone_event() above.
2013  */
2014 static void
send_acceptdone_event(isc_socket_t * sock,isc_socket_newconnev_t ** adev)2015 send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev) {
2016 	isc_task_t *task;
2017 
2018 	INSIST(adev != NULL && *adev != NULL);
2019 
2020 	task = (*adev)->ev_sender;
2021 	(*adev)->ev_sender = sock;
2022 
2023 	if (ISC_LINK_LINKED(*adev, ev_link))
2024 		ISC_LIST_DEQUEUE(sock->accept_list, *adev, ev_link);
2025 
2026 	isc_task_sendanddetach(&task, (isc_event_t **)adev);
2027 
2028 	CONSISTENT(sock);
2029 }
2030 
2031 /*
2032  * See comments for send_recvdone_event() above.
2033  */
2034 static void
send_connectdone_event(isc_socket_t * sock,isc_socket_connev_t ** cdev)2035 send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev) {
2036 	isc_task_t *task;
2037 
2038 	INSIST(cdev != NULL && *cdev != NULL);
2039 
2040 	task = (*cdev)->ev_sender;
2041 	(*cdev)->ev_sender = sock;
2042 
2043 	if (ISC_LINK_LINKED(*cdev, ev_link))
2044 		ISC_LIST_DEQUEUE(sock->connect_list, *cdev, ev_link);
2045 
2046 	isc_task_sendanddetach(&task, (isc_event_t **)cdev);
2047 
2048 	CONSISTENT(sock);
2049 }
2050 
2051 /*
2052  * On entry to this function, the event delivered is the internal
2053  * readable event, and the first item on the accept_list should be
2054  * the done event we want to send.  If the list is empty, this is a no-op,
2055  * so just close the new connection, unlock, and return.
2056  *
2057  * Note the socket is locked before entering here
2058  */
2059 static void
internal_accept(isc_socket_t * sock,IoCompletionInfo * lpo,int accept_errno)2060 internal_accept(isc_socket_t *sock, IoCompletionInfo *lpo, int accept_errno) {
2061 	isc_socket_newconnev_t *adev;
2062 	isc_result_t result = ISC_R_SUCCESS;
2063 	isc_socket_t *nsock;
2064 	struct sockaddr *localaddr;
2065 	int localaddr_len = sizeof(*localaddr);
2066 	struct sockaddr *remoteaddr;
2067 	int remoteaddr_len = sizeof(*remoteaddr);
2068 
2069 	INSIST(VALID_SOCKET(sock));
2070 	LOCK(&sock->lock);
2071 	CONSISTENT(sock);
2072 
2073 	socket_log(__LINE__, sock, NULL, TRACE,
2074 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2075 		   "internal_accept called");
2076 
2077 	INSIST(sock->listener);
2078 
2079 	INSIST(sock->pending_iocp > 0);
2080 	sock->pending_iocp--;
2081 	INSIST(sock->pending_accept > 0);
2082 	sock->pending_accept--;
2083 
2084 	adev = lpo->adev;
2085 
2086 	/*
2087 	 * If the event is no longer in the list we can just return.
2088 	 */
2089 	if (!acceptdone_is_active(sock, adev))
2090 		goto done;
2091 
2092 	nsock = adev->newsocket;
2093 
2094 	/*
2095 	 * Pull off the done event.
2096 	 */
2097 	ISC_LIST_UNLINK(sock->accept_list, adev, ev_link);
2098 
2099 	/*
2100 	 * Extract the addresses from the socket, copy them into the structure,
2101 	 * and return the new socket.
2102 	 */
2103 	ISCGetAcceptExSockaddrs(lpo->acceptbuffer, 0,
2104 		sizeof(SOCKADDR_STORAGE) + 16, sizeof(SOCKADDR_STORAGE) + 16,
2105 		(LPSOCKADDR *)&localaddr, &localaddr_len,
2106 		(LPSOCKADDR *)&remoteaddr, &remoteaddr_len);
2107 	memmove(&adev->address.type, remoteaddr, remoteaddr_len);
2108 	adev->address.length = remoteaddr_len;
2109 	nsock->address = adev->address;
2110 	nsock->pf = adev->address.type.sa.sa_family;
2111 
2112 	socket_log(__LINE__, nsock, &nsock->address, TRACE,
2113 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2114 		   "internal_accept parent %p", sock);
2115 
2116 	result = make_nonblock(adev->newsocket->fd);
2117 	INSIST(result == ISC_R_SUCCESS);
2118 
2119 	/*
2120 	 * Use minimum mtu if possible.
2121 	 */
2122 	use_min_mtu(adev->newsocket);
2123 
2124 	INSIST(setsockopt(nsock->fd, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT,
2125 			  (char *)&sock->fd, sizeof(sock->fd)) == 0);
2126 
2127 	/*
2128 	 * Hook it up into the manager.
2129 	 */
2130 	nsock->bound = 1;
2131 	nsock->connected = 1;
2132 	_set_state(nsock, SOCK_OPEN);
2133 
2134 	LOCK(&nsock->manager->lock);
2135 	ISC_LIST_APPEND(nsock->manager->socklist, nsock, link);
2136 	InterlockedIncrement(&nsock->manager->totalSockets);
2137 	UNLOCK(&nsock->manager->lock);
2138 
2139 	socket_log(__LINE__, sock, &nsock->address, CREATION,
2140 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
2141 		   "accepted_connection new_socket %p fd %d",
2142 		   nsock, nsock->fd);
2143 
2144 	adev->result = result;
2145 	send_acceptdone_event(sock, &adev);
2146 
2147 done:
2148 	CONSISTENT(sock);
2149 	UNLOCK(&sock->lock);
2150 
2151 	HeapFree(hHeapHandle, 0, lpo->acceptbuffer);
2152 	lpo->acceptbuffer = NULL;
2153 }
2154 
2155 /*
2156  * Called when a socket with a pending connect() finishes.
2157  * Note that the socket is locked before entering.
2158  */
2159 static void
internal_connect(isc_socket_t * sock,IoCompletionInfo * lpo,int connect_errno)2160 internal_connect(isc_socket_t *sock, IoCompletionInfo *lpo, int connect_errno) {
2161 	isc_socket_connev_t *cdev;
2162 	isc_result_t result;
2163 	char strbuf[ISC_STRERRORSIZE];
2164 
2165 	INSIST(VALID_SOCKET(sock));
2166 
2167 	LOCK(&sock->lock);
2168 
2169 	INSIST(sock->pending_iocp > 0);
2170 	sock->pending_iocp--;
2171 	INSIST(sock->pending_connect == 1);
2172 	sock->pending_connect = 0;
2173 
2174 	/*
2175 	 * If the event is no longer in the list we can just close and return.
2176 	 */
2177 	cdev = lpo->cdev;
2178 	if (!connectdone_is_active(sock, cdev)) {
2179 		sock->pending_connect = 0;
2180 		if (sock->fd != INVALID_SOCKET) {
2181 			closesocket(sock->fd);
2182 			sock->fd = INVALID_SOCKET;
2183 			_set_state(sock, SOCK_CLOSED);
2184 		}
2185 		CONSISTENT(sock);
2186 		UNLOCK(&sock->lock);
2187 		return;
2188 	}
2189 
2190 	/*
2191 	 * Check possible Windows network event error status here.
2192 	 */
2193 	if (connect_errno != 0) {
2194 		/*
2195 		 * If the error is SOFT, just try again on this
2196 		 * fd and pretend nothing strange happened.
2197 		 */
2198 		if (SOFT_ERROR(connect_errno) ||
2199 		    connect_errno == WSAEINPROGRESS) {
2200 			sock->pending_connect = 1;
2201 			CONSISTENT(sock);
2202 			UNLOCK(&sock->lock);
2203 			return;
2204 		}
2205 
2206 		/*
2207 		 * Translate other errors into ISC_R_* flavors.
2208 		 */
2209 		switch (connect_errno) {
2210 #define ERROR_MATCH(a, b) case a: result = b; break;
2211 			ERROR_MATCH(WSAEACCES, ISC_R_NOPERM);
2212 			ERROR_MATCH(WSAEADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
2213 			ERROR_MATCH(WSAEAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
2214 			ERROR_MATCH(WSAECONNREFUSED, ISC_R_CONNREFUSED);
2215 			ERROR_MATCH(WSAEHOSTUNREACH, ISC_R_HOSTUNREACH);
2216 			ERROR_MATCH(WSAEHOSTDOWN, ISC_R_HOSTDOWN);
2217 			ERROR_MATCH(WSAENETUNREACH, ISC_R_NETUNREACH);
2218 			ERROR_MATCH(WSAENETDOWN, ISC_R_NETDOWN);
2219 			ERROR_MATCH(WSAENOBUFS, ISC_R_NORESOURCES);
2220 			ERROR_MATCH(WSAECONNRESET, ISC_R_CONNECTIONRESET);
2221 			ERROR_MATCH(WSAECONNABORTED, ISC_R_CONNECTIONRESET);
2222 			ERROR_MATCH(WSAETIMEDOUT, ISC_R_TIMEDOUT);
2223 #undef ERROR_MATCH
2224 		default:
2225 			result = ISC_R_UNEXPECTED;
2226 			isc__strerror(connect_errno, strbuf, sizeof(strbuf));
2227 			UNEXPECTED_ERROR(__FILE__, __LINE__,
2228 					 "internal_connect: connect() %s",
2229 					 strbuf);
2230 		}
2231 	} else {
2232 		INSIST(setsockopt(sock->fd, SOL_SOCKET,
2233 				  SO_UPDATE_CONNECT_CONTEXT, NULL, 0) == 0);
2234 		result = ISC_R_SUCCESS;
2235 		sock->connected = 1;
2236 		socket_log(__LINE__, sock, &sock->address, IOEVENT,
2237 			   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
2238 			   "internal_connect: success");
2239 	}
2240 
2241 	do {
2242 		cdev->result = result;
2243 		send_connectdone_event(sock, &cdev);
2244 		cdev = ISC_LIST_HEAD(sock->connect_list);
2245 	} while (cdev != NULL);
2246 
2247 	UNLOCK(&sock->lock);
2248 }
2249 
2250 /*
2251  * Loop through the socket, returning ISC_R_EOF for each done event pending.
2252  */
2253 static void
send_recvdone_abort(isc_socket_t * sock,isc_result_t result)2254 send_recvdone_abort(isc_socket_t *sock, isc_result_t result) {
2255 	isc_socketevent_t *dev;
2256 
2257 	while (!ISC_LIST_EMPTY(sock->recv_list)) {
2258 		dev = ISC_LIST_HEAD(sock->recv_list);
2259 		dev->result = result;
2260 		send_recvdone_event(sock, &dev);
2261 	}
2262 }
2263 
2264 /*
2265  * Loop through the socket, returning result for each done event pending.
2266  */
2267 static void
send_connectdone_abort(isc_socket_t * sock,isc_result_t result)2268 send_connectdone_abort(isc_socket_t *sock, isc_result_t result) {
2269 	isc_socket_connev_t *dev;
2270 
2271 	while (!ISC_LIST_EMPTY(sock->connect_list)) {
2272 		dev = ISC_LIST_HEAD(sock->connect_list);
2273 		dev->result = result;
2274 		send_connectdone_event(sock, &dev);
2275 	}
2276 }
2277 
2278 /*
2279  * Take the data we received in our private buffer, and if any recv() calls on
2280  * our list are satisfied, send the corresponding done event.
2281  *
2282  * If we need more data (there are still items on the recv_list after we consume all
2283  * our data) then arrange for another system recv() call to fill our buffers.
2284  */
2285 static void
internal_recv(isc_socket_t * sock,int nbytes)2286 internal_recv(isc_socket_t *sock, int nbytes)
2287 {
2288 	INSIST(VALID_SOCKET(sock));
2289 
2290 	LOCK(&sock->lock);
2291 	CONSISTENT(sock);
2292 
2293 	socket_log(__LINE__, sock, NULL, IOEVENT,
2294 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALRECV,
2295 		   "internal_recv: %d bytes received", nbytes);
2296 
2297 	/*
2298 	 * If we got here, the I/O operation succeeded.  However, we might
2299 	 * still have removed this event from our notification list (or never
2300 	 * placed it on it due to immediate completion.)
2301 	 * Handle the reference counting here, and handle the cancellation
2302 	 * event just after.
2303 	 */
2304 	INSIST(sock->pending_iocp > 0);
2305 	sock->pending_iocp--;
2306 	INSIST(sock->pending_recv > 0);
2307 	sock->pending_recv--;
2308 
2309 	/*
2310 	 * The only way we could have gotten here is that our I/O has
2311 	 * successfully completed. Update our pointers, and move on.
2312 	 *  The only odd case here is that we might not have received
2313 	 * enough data on a TCP stream to satisfy the minimum requirements.
2314 	 * If this is the case, we will re-issue the recv() call for what
2315 	 * we need.
2316 	 *
2317 	 * We do check for a recv() of 0 bytes on a TCP stream.  This
2318 	 * means the remote end has closed.
2319 	 */
2320 	if (nbytes == 0 && sock->type == isc_sockettype_tcp) {
2321 		send_recvdone_abort(sock, ISC_R_EOF);
2322 		maybe_free_socket(&sock, __LINE__);
2323 		return;
2324 	}
2325 	sock->recvbuf.remaining = nbytes;
2326 	sock->recvbuf.consume_position = sock->recvbuf.base;
2327 	completeio_recv(sock);
2328 
2329 	/*
2330 	 * If there are more receivers waiting for data, queue another receive
2331 	 * here.
2332 	 */
2333 	queue_receive_request(sock);
2334 
2335 	/*
2336 	 * Unlock and/or destroy if we are the last thing this socket has left to do.
2337 	 */
2338 	maybe_free_socket(&sock, __LINE__);
2339 }
2340 
2341 static void
internal_send(isc_socket_t * sock,isc_socketevent_t * dev,struct msghdr * messagehdr,int nbytes,int send_errno,IoCompletionInfo * lpo)2342 internal_send(isc_socket_t *sock, isc_socketevent_t *dev,
2343 	      struct msghdr *messagehdr, int nbytes, int send_errno, IoCompletionInfo *lpo)
2344 {
2345 	buflist_t *buffer;
2346 
2347 	/*
2348 	 * Find out what socket this is and lock it.
2349 	 */
2350 	INSIST(VALID_SOCKET(sock));
2351 
2352 	LOCK(&sock->lock);
2353 	CONSISTENT(sock);
2354 
2355 	socket_log(__LINE__, sock, NULL, IOEVENT,
2356 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALSEND,
2357 		   "internal_send: task got socket event %p", dev);
2358 
2359 	buffer = ISC_LIST_HEAD(lpo->bufferlist);
2360 	while (buffer != NULL) {
2361 		ISC_LIST_DEQUEUE(lpo->bufferlist, buffer, link);
2362 
2363 		socket_log(__LINE__, sock, NULL, TRACE,
2364 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2365 		   "free_buffer %p %p", buffer, buffer->buf);
2366 
2367 		HeapFree(hHeapHandle, 0, buffer->buf);
2368 		HeapFree(hHeapHandle, 0, buffer);
2369 		buffer = ISC_LIST_HEAD(lpo->bufferlist);
2370 	}
2371 
2372 	INSIST(sock->pending_iocp > 0);
2373 	sock->pending_iocp--;
2374 	INSIST(sock->pending_send > 0);
2375 	sock->pending_send--;
2376 
2377 	/* If the event is no longer in the list we can just return */
2378 	if (!senddone_is_active(sock, dev))
2379 		goto done;
2380 
2381 	/*
2382 	 * Set the error code and send things on its way.
2383 	 */
2384 	switch (completeio_send(sock, dev, messagehdr, nbytes, send_errno)) {
2385 	case DOIO_SOFT:
2386 		break;
2387 	case DOIO_HARD:
2388 	case DOIO_SUCCESS:
2389 		send_senddone_event(sock, &dev);
2390 		break;
2391 	}
2392 
2393  done:
2394 	maybe_free_socket(&sock, __LINE__);
2395 }
2396 
2397 /*
2398  * These return if the done event passed in is on the list.
2399  * Using these ensures we will not double-send an event.
2400  */
2401 static bool
senddone_is_active(isc_socket_t * sock,isc_socketevent_t * dev)2402 senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev)
2403 {
2404 	isc_socketevent_t *ldev;
2405 
2406 	ldev = ISC_LIST_HEAD(sock->send_list);
2407 	while (ldev != NULL && ldev != dev)
2408 		ldev = ISC_LIST_NEXT(ldev, ev_link);
2409 
2410 	return (ldev == NULL ? false : true);
2411 }
2412 
2413 static bool
acceptdone_is_active(isc_socket_t * sock,isc_socket_newconnev_t * dev)2414 acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev)
2415 {
2416 	isc_socket_newconnev_t *ldev;
2417 
2418 	ldev = ISC_LIST_HEAD(sock->accept_list);
2419 	while (ldev != NULL && ldev != dev)
2420 		ldev = ISC_LIST_NEXT(ldev, ev_link);
2421 
2422 	return (ldev == NULL ? false : true);
2423 }
2424 
2425 static bool
connectdone_is_active(isc_socket_t * sock,isc_socket_connev_t * dev)2426 connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev)
2427 {
2428 	isc_socket_connev_t *cdev;
2429 
2430 	cdev = ISC_LIST_HEAD(sock->connect_list);
2431 	while (cdev != NULL && cdev != dev)
2432 		cdev = ISC_LIST_NEXT(cdev, ev_link);
2433 
2434 	return (cdev == NULL ? false : true);
2435 }
2436 
2437 //
2438 // The Windows network stack seems to have two very distinct paths depending
2439 // on what is installed.  Specifically, if something is looking at network
2440 // connections (like an anti-virus or anti-malware application, such as
2441 // McAfee products) Windows may return additional error conditions which
2442 // were not previously returned.
2443 //
2444 // One specific one is when a TCP SYN scan is used.  In this situation,
2445 // Windows responds with the SYN-ACK, but the scanner never responds with
2446 // the 3rd packet, the ACK.  Windows considers this a partially open connection.
2447 // Most Unix networking stacks, and Windows without McAfee installed, will
2448 // not return this to the caller.  However, with this product installed,
2449 // Windows returns this as a failed status on the Accept() call.  Here, we
2450 // will just re-issue the ISCAcceptEx() call as if nothing had happened.
2451 //
2452 // This code should only be called when the listening socket has received
2453 // such an error.  Additionally, the "parent" socket must be locked.
2454 // Additionally, the lpo argument is re-used here, and must not be freed
2455 // by the caller.
2456 //
2457 static isc_result_t
restart_accept(isc_socket_t * parent,IoCompletionInfo * lpo)2458 restart_accept(isc_socket_t *parent, IoCompletionInfo *lpo)
2459 {
2460 	isc_socket_t *nsock = lpo->adev->newsocket;
2461 	SOCKET new_fd;
2462 
2463 	/*
2464 	 * AcceptEx() requires we pass in a socket.  Note that we carefully
2465 	 * do not close the previous socket in case of an error message returned by
2466 	 * our new socket() call.  If we return an error here, our caller will
2467 	 * clean up.
2468 	 */
2469 	new_fd = socket(parent->pf, SOCK_STREAM, IPPROTO_TCP);
2470 	if (nsock->fd == INVALID_SOCKET) {
2471 		return (ISC_R_FAILURE); // parent will ask windows for error message
2472 	}
2473 	closesocket(nsock->fd);
2474 	nsock->fd = new_fd;
2475 
2476 	memset(&lpo->overlapped, 0, sizeof(lpo->overlapped));
2477 
2478 	ISCAcceptEx(parent->fd,
2479 		    nsock->fd,				/* Accepted Socket */
2480 		    lpo->acceptbuffer,			/* Buffer for initial Recv */
2481 		    0,					/* Length of Buffer */
2482 		    sizeof(SOCKADDR_STORAGE) + 16,	/* Local address length + 16 */
2483 		    sizeof(SOCKADDR_STORAGE) + 16,	/* Remote address length + 16 */
2484 		    (LPDWORD)&lpo->received_bytes,	/* Bytes Recved */
2485 		    (LPOVERLAPPED)lpo			/* Overlapped structure */
2486 		    );
2487 
2488 	InterlockedDecrement(&nsock->manager->iocp_total);
2489 	iocompletionport_update(nsock);
2490 
2491 	return (ISC_R_SUCCESS);
2492 }
2493 
2494 /*
2495  * This is the I/O Completion Port Worker Function. It loops forever
2496  * waiting for I/O to complete and then forwards them for further
2497  * processing. There are a number of these in separate threads.
2498  */
2499 static isc_threadresult_t WINAPI
SocketIoThread(LPVOID ThreadContext)2500 SocketIoThread(LPVOID ThreadContext) {
2501 	isc_socketmgr_t *manager = ThreadContext;
2502 	DWORD nbytes;
2503 	IoCompletionInfo *lpo = NULL;
2504 	isc_socket_t *sock = NULL;
2505 	int request;
2506 	struct msghdr *messagehdr = NULL;
2507 	int errval;
2508 	char strbuf[ISC_STRERRORSIZE];
2509 	int errstatus;
2510 
2511 	REQUIRE(VALID_MANAGER(manager));
2512 
2513 	/*
2514 	 * Set the thread priority high enough so I/O will
2515 	 * preempt normal recv packet processing, but not
2516 	 * higher than the timer sync thread.
2517 	 */
2518 	if (!SetThreadPriority(GetCurrentThread(),
2519 			       THREAD_PRIORITY_ABOVE_NORMAL)) {
2520 		errval = GetLastError();
2521 		isc__strerror(errval, strbuf, sizeof(strbuf));
2522 		FATAL_ERROR(__FILE__, __LINE__,
2523 				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2524 				ISC_MSG_FAILED,
2525 				"Can't set thread priority: %s"),
2526 				strbuf);
2527 	}
2528 
2529 	/*
2530 	 * Loop forever waiting on I/O Completions and then processing them
2531 	 */
2532 	while (TRUE) {
2533 		BOOL bSuccess;
2534 
2535 		wait_again:
2536 		bSuccess = GetQueuedCompletionStatus(manager->hIoCompletionPort,
2537 						     &nbytes,
2538 						     (PULONG_PTR)&sock,
2539 						     (LPWSAOVERLAPPED *)&lpo,
2540 						     INFINITE);
2541 		if (lpo == NULL) /* Received request to exit */
2542 			break;
2543 
2544 		REQUIRE(VALID_SOCKET(sock));
2545 
2546 		request = lpo->request_type;
2547 
2548 		if (!bSuccess)
2549 			errstatus = GetLastError();
2550 		else
2551 			errstatus = 0;
2552 		if (!bSuccess && errstatus != ERROR_MORE_DATA) {
2553 			isc_result_t isc_result;
2554 
2555 			/*
2556 			 * Did the I/O operation complete?
2557 			 */
2558 			isc_result = isc__errno2result(errstatus);
2559 
2560 			LOCK(&sock->lock);
2561 			CONSISTENT(sock);
2562 			switch (request) {
2563 			case SOCKET_RECV:
2564 				INSIST(sock->pending_iocp > 0);
2565 				sock->pending_iocp--;
2566 				INSIST(sock->pending_recv > 0);
2567 				sock->pending_recv--;
2568 				if (!sock->connected &&
2569 				    ((errstatus == ERROR_HOST_UNREACHABLE) ||
2570 				     (errstatus == WSAENETRESET) ||
2571 				     (errstatus == WSAECONNRESET))) {
2572 					/* ignore soft errors */
2573 					queue_receive_request(sock);
2574 					break;
2575 				}
2576 				send_recvdone_abort(sock, isc_result);
2577 				if (isc_result == ISC_R_UNEXPECTED) {
2578 					UNEXPECTED_ERROR(__FILE__, __LINE__,
2579 						"SOCKET_RECV: Windows error code: %d, returning ISC error %d",
2580 						errstatus, isc_result);
2581 				}
2582 				break;
2583 
2584 			case SOCKET_SEND:
2585 				INSIST(sock->pending_iocp > 0);
2586 				sock->pending_iocp--;
2587 				INSIST(sock->pending_send > 0);
2588 				sock->pending_send--;
2589 				if (senddone_is_active(sock, lpo->dev)) {
2590 					lpo->dev->result = isc_result;
2591 					socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2592 						"canceled_send");
2593 					send_senddone_event(sock, &lpo->dev);
2594 				}
2595 				break;
2596 
2597 			case SOCKET_ACCEPT:
2598 				INSIST(sock->pending_iocp > 0);
2599 				INSIST(sock->pending_accept > 0);
2600 
2601 				socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2602 					"Accept: errstatus=%d isc_result=%d", errstatus, isc_result);
2603 
2604 				if (acceptdone_is_active(sock, lpo->adev)) {
2605 					if (restart_accept(sock, lpo) == ISC_R_SUCCESS) {
2606 						UNLOCK(&sock->lock);
2607 						goto wait_again;
2608 					} else {
2609 						errstatus = GetLastError();
2610 						isc_result = isc__errno2result(errstatus);
2611 						socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2612 							"restart_accept() failed: errstatus=%d isc_result=%d",
2613 							errstatus, isc_result);
2614 					}
2615 				}
2616 
2617 				sock->pending_iocp--;
2618 				sock->pending_accept--;
2619 				if (acceptdone_is_active(sock, lpo->adev)) {
2620 					closesocket(lpo->adev->newsocket->fd);
2621 					lpo->adev->newsocket->fd = INVALID_SOCKET;
2622 					lpo->adev->newsocket->references--;
2623 					free_socket(&lpo->adev->newsocket, __LINE__);
2624 					lpo->adev->result = isc_result;
2625 					socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2626 						"canceled_accept");
2627 					send_acceptdone_event(sock, &lpo->adev);
2628 				}
2629 				break;
2630 
2631 			case SOCKET_CONNECT:
2632 				INSIST(sock->pending_iocp > 0);
2633 				sock->pending_iocp--;
2634 				INSIST(sock->pending_connect == 1);
2635 				sock->pending_connect = 0;
2636 				if (connectdone_is_active(sock, lpo->cdev)) {
2637 					socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2638 						"canceled_connect");
2639 					send_connectdone_abort(sock, isc_result);
2640 				}
2641 				break;
2642 			}
2643 			maybe_free_socket(&sock, __LINE__);
2644 
2645 			if (lpo != NULL)
2646 				HeapFree(hHeapHandle, 0, lpo);
2647 			continue;
2648 		}
2649 
2650 		messagehdr = &lpo->messagehdr;
2651 
2652 		switch (request) {
2653 		case SOCKET_RECV:
2654 			internal_recv(sock, nbytes);
2655 			break;
2656 		case SOCKET_SEND:
2657 			internal_send(sock, lpo->dev, messagehdr, nbytes, errstatus, lpo);
2658 			break;
2659 		case SOCKET_ACCEPT:
2660 			internal_accept(sock, lpo, errstatus);
2661 			break;
2662 		case SOCKET_CONNECT:
2663 			internal_connect(sock, lpo, errstatus);
2664 			break;
2665 		}
2666 
2667 		if (lpo != NULL)
2668 			HeapFree(hHeapHandle, 0, lpo);
2669 	}
2670 
2671 	/*
2672 	 * Exit Completion Port Thread
2673 	 */
2674 	manager_log(manager, TRACE,
2675 		    isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2676 				   ISC_MSG_EXITING, "SocketIoThread exiting"));
2677 	return ((isc_threadresult_t)0);
2678 }
2679 
2680 /*
2681  * Create a new socket manager.
2682  */
2683 isc_result_t
isc__socketmgr_create(isc_mem_t * mctx,isc_socketmgr_t ** managerp)2684 isc__socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
2685 	return (isc_socketmgr_create2(mctx, managerp, 0));
2686 }
2687 
2688 isc_result_t
isc__socketmgr_create2(isc_mem_t * mctx,isc_socketmgr_t ** managerp,unsigned int maxsocks)2689 isc__socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp,
2690 		       unsigned int maxsocks)
2691 {
2692 	isc_socketmgr_t *manager;
2693 	isc_result_t result;
2694 
2695 	REQUIRE(managerp != NULL && *managerp == NULL);
2696 
2697 	if (maxsocks != 0)
2698 		return (ISC_R_NOTIMPLEMENTED);
2699 
2700 	manager = isc_mem_get(mctx, sizeof(*manager));
2701 	if (manager == NULL)
2702 		return (ISC_R_NOMEMORY);
2703 
2704 	InitSockets();
2705 
2706 	manager->magic = SOCKET_MANAGER_MAGIC;
2707 	manager->mctx = NULL;
2708 	manager->stats = NULL;
2709 	ISC_LIST_INIT(manager->socklist);
2710 	result = isc_mutex_init(&manager->lock);
2711 	if (result != ISC_R_SUCCESS) {
2712 		isc_mem_put(mctx, manager, sizeof(*manager));
2713 		return (result);
2714 	}
2715 	if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) {
2716 		DESTROYLOCK(&manager->lock);
2717 		isc_mem_put(mctx, manager, sizeof(*manager));
2718 		UNEXPECTED_ERROR(__FILE__, __LINE__,
2719 				 "isc_condition_init() %s",
2720 				 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2721 						ISC_MSG_FAILED, "failed"));
2722 		return (ISC_R_UNEXPECTED);
2723 	}
2724 
2725 	isc_mem_attach(mctx, &manager->mctx);
2726 
2727 	iocompletionport_init(manager);	/* Create the Completion Ports */
2728 
2729 	manager->bShutdown = false;
2730 	manager->totalSockets = 0;
2731 	manager->iocp_total = 0;
2732 	manager->maxudp = 0;
2733 
2734 	*managerp = manager;
2735 
2736 	return (ISC_R_SUCCESS);
2737 }
2738 
2739 isc_result_t
isc_socketmgr_getmaxsockets(isc_socketmgr_t * manager,unsigned int * nsockp)2740 isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp) {
2741 	REQUIRE(VALID_MANAGER(manager));
2742 	REQUIRE(nsockp != NULL);
2743 
2744 	return (ISC_R_NOTIMPLEMENTED);
2745 }
2746 
2747 void
isc_socketmgr_setstats(isc_socketmgr_t * manager,isc_stats_t * stats)2748 isc_socketmgr_setstats(isc_socketmgr_t *manager, isc_stats_t *stats) {
2749 	REQUIRE(VALID_MANAGER(manager));
2750 	REQUIRE(ISC_LIST_EMPTY(manager->socklist));
2751 	REQUIRE(manager->stats == NULL);
2752 	REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max);
2753 
2754 	isc_stats_attach(stats, &manager->stats);
2755 }
2756 
2757 void
isc__socketmgr_destroy(isc_socketmgr_t ** managerp)2758 isc__socketmgr_destroy(isc_socketmgr_t **managerp) {
2759 	isc_socketmgr_t *manager;
2760 	int i;
2761 	isc_mem_t *mctx;
2762 
2763 	/*
2764 	 * Destroy a socket manager.
2765 	 */
2766 
2767 	REQUIRE(managerp != NULL);
2768 	manager = *managerp;
2769 	REQUIRE(VALID_MANAGER(manager));
2770 
2771 	LOCK(&manager->lock);
2772 
2773 	/*
2774 	 * Wait for all sockets to be destroyed.
2775 	 */
2776 	while (!ISC_LIST_EMPTY(manager->socklist)) {
2777 		manager_log(manager, CREATION,
2778 			    isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2779 					   ISC_MSG_SOCKETSREMAIN,
2780 					   "sockets exist"));
2781 		WAIT(&manager->shutdown_ok, &manager->lock);
2782 	}
2783 
2784 	UNLOCK(&manager->lock);
2785 
2786 	/*
2787 	 * Here, we need to had some wait code for the completion port
2788 	 * thread.
2789 	 */
2790 	signal_iocompletionport_exit(manager);
2791 	manager->bShutdown = true;
2792 
2793 	/*
2794 	 * Wait for threads to exit.
2795 	 */
2796 	for (i = 0; i < manager->maxIOCPThreads; i++) {
2797 		if (isc_thread_join((isc_thread_t) manager->hIOCPThreads[i],
2798 			NULL) != ISC_R_SUCCESS)
2799 			UNEXPECTED_ERROR(__FILE__, __LINE__,
2800 				 "isc_thread_join() for Completion Port %s",
2801 				 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2802 						ISC_MSG_FAILED, "failed"));
2803 	}
2804 	/*
2805 	 * Clean up.
2806 	 */
2807 
2808 	CloseHandle(manager->hIoCompletionPort);
2809 
2810 	(void)isc_condition_destroy(&manager->shutdown_ok);
2811 
2812 	DESTROYLOCK(&manager->lock);
2813 	if (manager->stats != NULL)
2814 		isc_stats_detach(&manager->stats);
2815 	manager->magic = 0;
2816 	mctx= manager->mctx;
2817 	isc_mem_put(mctx, manager, sizeof(*manager));
2818 
2819 	isc_mem_detach(&mctx);
2820 
2821 	*managerp = NULL;
2822 }
2823 
2824 static void
queue_receive_event(isc_socket_t * sock,isc_task_t * task,isc_socketevent_t * dev)2825 queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev)
2826 {
2827 	isc_task_t *ntask = NULL;
2828 
2829 	isc_task_attach(task, &ntask);
2830 	dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
2831 
2832 	/*
2833 	 * Enqueue the request.
2834 	 */
2835 	INSIST(!ISC_LINK_LINKED(dev, ev_link));
2836 	ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link);
2837 
2838 	socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2839 		   "queue_receive_event: event %p -> task %p",
2840 		   dev, ntask);
2841 }
2842 
2843 /*
2844  * Check the pending receive queue, and if we have data pending, give it to this
2845  * caller.  If we have none, queue an I/O request.  If this caller is not the first
2846  * on the list, then we will just queue this event and return.
2847  *
2848  * Caller must have the socket locked.
2849  */
2850 static isc_result_t
socket_recv(isc_socket_t * sock,isc_socketevent_t * dev,isc_task_t * task,unsigned int flags)2851 socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2852 	    unsigned int flags)
2853 {
2854 	isc_result_t result = ISC_R_SUCCESS;
2855 
2856 	dev->ev_sender = task;
2857 
2858 	if (sock->fd == INVALID_SOCKET)
2859 		return (ISC_R_EOF);
2860 
2861 	/*
2862 	 * Queue our event on the list of things to do.  Call our function to
2863 	 * attempt to fill buffers as much as possible, and return done events.
2864 	 * We are going to lie about our handling of the ISC_SOCKFLAG_IMMEDIATE
2865 	 * here and tell our caller that we could not satisfy it immediately.
2866 	 */
2867 	queue_receive_event(sock, task, dev);
2868 	if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
2869 		result = ISC_R_INPROGRESS;
2870 
2871 	completeio_recv(sock);
2872 
2873 	/*
2874 	 * If there are more receivers waiting for data, queue another receive
2875 	 * here.  If the
2876 	 */
2877 	queue_receive_request(sock);
2878 
2879 	return (result);
2880 }
2881 
2882 isc_result_t
isc__socket_recvv(isc_socket_t * sock,isc_bufferlist_t * buflist,unsigned int minimum,isc_task_t * task,isc_taskaction_t action,void * arg)2883 isc__socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist,
2884 		 unsigned int minimum, isc_task_t *task,
2885 		 isc_taskaction_t action, void *arg)
2886 {
2887 	isc_socketevent_t *dev;
2888 	isc_socketmgr_t *manager;
2889 	unsigned int iocount;
2890 	isc_buffer_t *buffer;
2891 	isc_result_t ret;
2892 
2893 	REQUIRE(VALID_SOCKET(sock));
2894 	LOCK(&sock->lock);
2895 	CONSISTENT(sock);
2896 
2897 	/*
2898 	 * Make sure that the socket is not closed.  XXXMLG change error here?
2899 	 */
2900 	if (sock->fd == INVALID_SOCKET) {
2901 		UNLOCK(&sock->lock);
2902 		return (ISC_R_CONNREFUSED);
2903 	}
2904 
2905 	REQUIRE(buflist != NULL);
2906 	REQUIRE(!ISC_LIST_EMPTY(*buflist));
2907 	REQUIRE(task != NULL);
2908 	REQUIRE(action != NULL);
2909 
2910 	manager = sock->manager;
2911 	REQUIRE(VALID_MANAGER(manager));
2912 
2913 	iocount = isc_bufferlist_availablecount(buflist);
2914 	REQUIRE(iocount > 0);
2915 
2916 	INSIST(sock->bound);
2917 
2918 	dev = allocate_socketevent(manager->mctx, sock,
2919 				   ISC_SOCKEVENT_RECVDONE, action, arg);
2920 	if (dev == NULL) {
2921 		UNLOCK(&sock->lock);
2922 		return (ISC_R_NOMEMORY);
2923 	}
2924 
2925 	/*
2926 	 * UDP sockets are always partial read
2927 	 */
2928 	if (sock->type == isc_sockettype_udp)
2929 		dev->minimum = 1;
2930 	else {
2931 		if (minimum == 0)
2932 			dev->minimum = iocount;
2933 		else
2934 			dev->minimum = minimum;
2935 	}
2936 
2937 	/*
2938 	 * Move each buffer from the passed in list to our internal one.
2939 	 */
2940 	buffer = ISC_LIST_HEAD(*buflist);
2941 	while (buffer != NULL) {
2942 		ISC_LIST_DEQUEUE(*buflist, buffer, link);
2943 		ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
2944 		buffer = ISC_LIST_HEAD(*buflist);
2945 	}
2946 
2947 	ret = socket_recv(sock, dev, task, 0);
2948 
2949 	UNLOCK(&sock->lock);
2950 	return (ret);
2951 }
2952 
2953 isc_result_t
isc__socket_recv(isc_socket_t * sock,isc_region_t * region,unsigned int minimum,isc_task_t * task,isc_taskaction_t action,void * arg)2954 isc__socket_recv(isc_socket_t *sock, isc_region_t *region,
2955 		 unsigned int minimum, isc_task_t *task,
2956 		 isc_taskaction_t action, void *arg)
2957 {
2958 	isc_socketevent_t *dev;
2959 	isc_socketmgr_t *manager;
2960 	isc_result_t ret;
2961 
2962 	REQUIRE(VALID_SOCKET(sock));
2963 	LOCK(&sock->lock);
2964 	CONSISTENT(sock);
2965 
2966 	/*
2967 	 * make sure that the socket's not closed
2968 	 */
2969 	if (sock->fd == INVALID_SOCKET) {
2970 		UNLOCK(&sock->lock);
2971 		return (ISC_R_CONNREFUSED);
2972 	}
2973 	REQUIRE(action != NULL);
2974 
2975 	manager = sock->manager;
2976 	REQUIRE(VALID_MANAGER(manager));
2977 
2978 	INSIST(sock->bound);
2979 
2980 	dev = allocate_socketevent(manager->mctx, sock,
2981 				   ISC_SOCKEVENT_RECVDONE, action, arg);
2982 	if (dev == NULL) {
2983 		UNLOCK(&sock->lock);
2984 		return (ISC_R_NOMEMORY);
2985 	}
2986 
2987 	ret = isc_socket_recv2(sock, region, minimum, task, dev, 0);
2988 	UNLOCK(&sock->lock);
2989 	return (ret);
2990 }
2991 
2992 isc_result_t
isc__socket_recv2(isc_socket_t * sock,isc_region_t * region,unsigned int minimum,isc_task_t * task,isc_socketevent_t * event,unsigned int flags)2993 isc__socket_recv2(isc_socket_t *sock, isc_region_t *region,
2994 		  unsigned int minimum, isc_task_t *task,
2995 		  isc_socketevent_t *event, unsigned int flags)
2996 {
2997 	isc_result_t ret;
2998 
2999 	REQUIRE(VALID_SOCKET(sock));
3000 	LOCK(&sock->lock);
3001 	CONSISTENT(sock);
3002 
3003 	event->result = ISC_R_UNEXPECTED;
3004 	event->ev_sender = sock;
3005 	/*
3006 	 * make sure that the socket's not closed
3007 	 */
3008 	if (sock->fd == INVALID_SOCKET) {
3009 		UNLOCK(&sock->lock);
3010 		return (ISC_R_CONNREFUSED);
3011 	}
3012 
3013 	ISC_LIST_INIT(event->bufferlist);
3014 	event->region = *region;
3015 	event->n = 0;
3016 	event->offset = 0;
3017 	event->attributes = 0;
3018 
3019 	/*
3020 	 * UDP sockets are always partial read.
3021 	 */
3022 	if (sock->type == isc_sockettype_udp)
3023 		event->minimum = 1;
3024 	else {
3025 		if (minimum == 0)
3026 			event->minimum = region->length;
3027 		else
3028 			event->minimum = minimum;
3029 	}
3030 
3031 	ret = socket_recv(sock, event, task, flags);
3032 	UNLOCK(&sock->lock);
3033 	return (ret);
3034 }
3035 
3036 /*
3037  * Caller must have the socket locked.
3038  */
3039 static isc_result_t
socket_send(isc_socket_t * sock,isc_socketevent_t * dev,isc_task_t * task,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo,unsigned int flags)3040 socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
3041 	    isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
3042 	    unsigned int flags)
3043 {
3044 	int io_state;
3045 	int send_errno = 0;
3046 	int cc = 0;
3047 	isc_task_t *ntask = NULL;
3048 	isc_result_t result = ISC_R_SUCCESS;
3049 
3050 	dev->ev_sender = task;
3051 
3052 	set_dev_address(address, sock, dev);
3053 	if (pktinfo != NULL) {
3054 		socket_log(__LINE__, sock, NULL, TRACE, isc_msgcat, ISC_MSGSET_SOCKET,
3055 			   ISC_MSG_PKTINFOPROVIDED,
3056 			   "pktinfo structure provided, ifindex %u (set to 0)",
3057 			   pktinfo->ipi6_ifindex);
3058 
3059 		dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO;
3060 		dev->pktinfo = *pktinfo;
3061 		/*
3062 		 * Set the pktinfo index to 0 here, to let the kernel decide
3063 		 * what interface it should send on.
3064 		 */
3065 		dev->pktinfo.ipi6_ifindex = 0;
3066 	}
3067 
3068 	io_state = startio_send(sock, dev, &cc, &send_errno);
3069 	switch (io_state) {
3070 	case DOIO_PENDING:	/* I/O started. Enqueue completion event. */
3071 	case DOIO_SOFT:
3072 		/*
3073 		 * We couldn't send all or part of the request right now, so
3074 		 * queue it unless ISC_SOCKFLAG_NORETRY is set.
3075 		 */
3076 		if ((flags & ISC_SOCKFLAG_NORETRY) == 0 ||
3077 		    io_state == DOIO_PENDING) {
3078 			isc_task_attach(task, &ntask);
3079 			dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
3080 
3081 			/*
3082 			 * Enqueue the request.
3083 			 */
3084 			INSIST(!ISC_LINK_LINKED(dev, ev_link));
3085 			ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link);
3086 
3087 			socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
3088 				   "socket_send: event %p -> task %p",
3089 				   dev, ntask);
3090 
3091 			if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
3092 				result = ISC_R_INPROGRESS;
3093 			break;
3094 		}
3095 
3096 	case DOIO_SUCCESS:
3097 		break;
3098 	}
3099 
3100 	return (result);
3101 }
3102 
3103 isc_result_t
isc__socket_send(isc_socket_t * sock,isc_region_t * region,isc_task_t * task,isc_taskaction_t action,void * arg)3104 isc__socket_send(isc_socket_t *sock, isc_region_t *region,
3105 		 isc_task_t *task, isc_taskaction_t action, void *arg)
3106 {
3107 	/*
3108 	 * REQUIRE() checking is performed in isc_socket_sendto().
3109 	 */
3110 	return (isc_socket_sendto(sock, region, task, action, arg, NULL,
3111 				  NULL));
3112 }
3113 
3114 isc_result_t
isc__socket_sendto(isc_socket_t * sock,isc_region_t * region,isc_task_t * task,isc_taskaction_t action,void * arg,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo)3115 isc__socket_sendto(isc_socket_t *sock, isc_region_t *region,
3116 		   isc_task_t *task, isc_taskaction_t action, void *arg,
3117 		   isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
3118 {
3119 	isc_socketevent_t *dev;
3120 	isc_socketmgr_t *manager;
3121 	isc_result_t ret;
3122 
3123 	REQUIRE(VALID_SOCKET(sock));
3124 	REQUIRE(sock->type != isc_sockettype_fdwatch);
3125 
3126 	LOCK(&sock->lock);
3127 	CONSISTENT(sock);
3128 
3129 	/*
3130 	 * make sure that the socket's not closed
3131 	 */
3132 	if (sock->fd == INVALID_SOCKET) {
3133 		UNLOCK(&sock->lock);
3134 		return (ISC_R_CONNREFUSED);
3135 	}
3136 	REQUIRE(region != NULL);
3137 	REQUIRE(task != NULL);
3138 	REQUIRE(action != NULL);
3139 
3140 	manager = sock->manager;
3141 	REQUIRE(VALID_MANAGER(manager));
3142 
3143 	INSIST(sock->bound);
3144 
3145 	dev = allocate_socketevent(manager->mctx, sock,
3146 				   ISC_SOCKEVENT_SENDDONE, action, arg);
3147 	if (dev == NULL) {
3148 		UNLOCK(&sock->lock);
3149 		return (ISC_R_NOMEMORY);
3150 	}
3151 	dev->region = *region;
3152 
3153 	ret = socket_send(sock, dev, task, address, pktinfo, 0);
3154 	UNLOCK(&sock->lock);
3155 	return (ret);
3156 }
3157 
3158 isc_result_t
isc__socket_sendv(isc_socket_t * sock,isc_bufferlist_t * buflist,isc_task_t * task,isc_taskaction_t action,void * arg)3159 isc__socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist,
3160 		  isc_task_t *task, isc_taskaction_t action, void *arg)
3161 {
3162 	return (isc_socket_sendtov2(sock, buflist, task, action, arg, NULL,
3163 				    NULL, 0));
3164 }
3165 
3166 isc_result_t
isc__socket_sendtov(isc_socket_t * sock,isc_bufferlist_t * buflist,isc_task_t * task,isc_taskaction_t action,void * arg,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo)3167 isc__socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist,
3168 		    isc_task_t *task, isc_taskaction_t action, void *arg,
3169 		    isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
3170 {
3171 	return (isc_socket_sendtov2(sock, buflist, task, action, arg, address,
3172 				    pktinfo, 0));
3173 }
3174 
3175 isc_result_t
isc__socket_sendtov2(isc_socket_t * sock,isc_bufferlist_t * buflist,isc_task_t * task,isc_taskaction_t action,void * arg,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo,unsigned int flags)3176 isc__socket_sendtov2(isc_socket_t *sock, isc_bufferlist_t *buflist,
3177 		     isc_task_t *task, isc_taskaction_t action, void *arg,
3178 		     isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
3179 		     unsigned int flags)
3180 {
3181 	isc_socketevent_t *dev;
3182 	isc_socketmgr_t *manager;
3183 	unsigned int iocount;
3184 	isc_buffer_t *buffer;
3185 	isc_result_t ret;
3186 
3187 	REQUIRE(VALID_SOCKET(sock));
3188 
3189 	LOCK(&sock->lock);
3190 	CONSISTENT(sock);
3191 
3192 	/*
3193 	 * make sure that the socket's not closed
3194 	 */
3195 	if (sock->fd == INVALID_SOCKET) {
3196 		UNLOCK(&sock->lock);
3197 		return (ISC_R_CONNREFUSED);
3198 	}
3199 	REQUIRE(buflist != NULL);
3200 	REQUIRE(!ISC_LIST_EMPTY(*buflist));
3201 	REQUIRE(task != NULL);
3202 	REQUIRE(action != NULL);
3203 
3204 	manager = sock->manager;
3205 	REQUIRE(VALID_MANAGER(manager));
3206 
3207 	iocount = isc_bufferlist_usedcount(buflist);
3208 	REQUIRE(iocount > 0);
3209 
3210 	dev = allocate_socketevent(manager->mctx, sock,
3211 				   ISC_SOCKEVENT_SENDDONE, action, arg);
3212 	if (dev == NULL) {
3213 		UNLOCK(&sock->lock);
3214 		return (ISC_R_NOMEMORY);
3215 	}
3216 
3217 	/*
3218 	 * Move each buffer from the passed in list to our internal one.
3219 	 */
3220 	buffer = ISC_LIST_HEAD(*buflist);
3221 	while (buffer != NULL) {
3222 		ISC_LIST_DEQUEUE(*buflist, buffer, link);
3223 		ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
3224 		buffer = ISC_LIST_HEAD(*buflist);
3225 	}
3226 
3227 	ret = socket_send(sock, dev, task, address, pktinfo, flags);
3228 	UNLOCK(&sock->lock);
3229 	return (ret);
3230 }
3231 
3232 isc_result_t
isc__socket_sendto2(isc_socket_t * sock,isc_region_t * region,isc_task_t * task,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo,isc_socketevent_t * event,unsigned int flags)3233 isc__socket_sendto2(isc_socket_t *sock, isc_region_t *region,
3234 		    isc_task_t *task,
3235 		    isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
3236 		    isc_socketevent_t *event, unsigned int flags)
3237 {
3238 	isc_result_t ret;
3239 
3240 	REQUIRE(VALID_SOCKET(sock));
3241 	LOCK(&sock->lock);
3242 	CONSISTENT(sock);
3243 
3244 	REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE|ISC_SOCKFLAG_NORETRY)) == 0);
3245 	if ((flags & ISC_SOCKFLAG_NORETRY) != 0)
3246 		REQUIRE(sock->type == isc_sockettype_udp);
3247 	event->ev_sender = sock;
3248 	event->result = ISC_R_UNEXPECTED;
3249 	/*
3250 	 * make sure that the socket's not closed
3251 	 */
3252 	if (sock->fd == INVALID_SOCKET) {
3253 		UNLOCK(&sock->lock);
3254 		return (ISC_R_CONNREFUSED);
3255 	}
3256 	ISC_LIST_INIT(event->bufferlist);
3257 	event->region = *region;
3258 	event->n = 0;
3259 	event->offset = 0;
3260 	event->attributes = 0;
3261 
3262 	ret = socket_send(sock, event, task, address, pktinfo, flags);
3263 	UNLOCK(&sock->lock);
3264 	return (ret);
3265 }
3266 
3267 isc_result_t
isc__socket_bind(isc_socket_t * sock,isc_sockaddr_t * sockaddr,unsigned int options)3268 isc__socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr,
3269 		 unsigned int options) {
3270 	int bind_errno;
3271 	char strbuf[ISC_STRERRORSIZE];
3272 	int on = 1;
3273 
3274 	REQUIRE(VALID_SOCKET(sock));
3275 	LOCK(&sock->lock);
3276 	CONSISTENT(sock);
3277 
3278 	/*
3279 	 * make sure that the socket's not closed
3280 	 */
3281 	if (sock->fd == INVALID_SOCKET) {
3282 		UNLOCK(&sock->lock);
3283 		return (ISC_R_CONNREFUSED);
3284 	}
3285 
3286 	INSIST(!sock->bound);
3287 	INSIST(!sock->dupped);
3288 
3289 	if (sock->pf != sockaddr->type.sa.sa_family) {
3290 		UNLOCK(&sock->lock);
3291 		return (ISC_R_FAMILYMISMATCH);
3292 	}
3293 	/*
3294 	 * Only set SO_REUSEADDR when we want a specific port.
3295 	 */
3296 	if ((options & ISC_SOCKET_REUSEADDRESS) != 0 &&
3297 	    isc_sockaddr_getport(sockaddr) != (in_port_t)0 &&
3298 	    setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
3299 		       sizeof(on)) < 0) {
3300 		UNEXPECTED_ERROR(__FILE__, __LINE__,
3301 				 "setsockopt(%d) %s", sock->fd,
3302 				 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
3303 						ISC_MSG_FAILED, "failed"));
3304 		/* Press on... */
3305 	}
3306 	if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) {
3307 		bind_errno = WSAGetLastError();
3308 		UNLOCK(&sock->lock);
3309 		switch (bind_errno) {
3310 		case WSAEACCES:
3311 			return (ISC_R_NOPERM);
3312 		case WSAEADDRNOTAVAIL:
3313 			return (ISC_R_ADDRNOTAVAIL);
3314 		case WSAEADDRINUSE:
3315 			return (ISC_R_ADDRINUSE);
3316 		case WSAEINVAL:
3317 			return (ISC_R_BOUND);
3318 		default:
3319 			isc__strerror(bind_errno, strbuf, sizeof(strbuf));
3320 			UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s",
3321 					 strbuf);
3322 			return (ISC_R_UNEXPECTED);
3323 		}
3324 	}
3325 
3326 	socket_log(__LINE__, sock, sockaddr, TRACE,
3327 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "bound");
3328 	sock->bound = 1;
3329 
3330 	UNLOCK(&sock->lock);
3331 	return (ISC_R_SUCCESS);
3332 }
3333 
3334 isc_result_t
isc__socket_filter(isc_socket_t * sock,const char * filter)3335 isc__socket_filter(isc_socket_t *sock, const char *filter) {
3336 	UNUSED(sock);
3337 	UNUSED(filter);
3338 
3339 	REQUIRE(VALID_SOCKET(sock));
3340 	return (ISC_R_NOTIMPLEMENTED);
3341 }
3342 
3343 /*
3344  * Set up to listen on a given socket.  We do this by creating an internal
3345  * event that will be dispatched when the socket has read activity.  The
3346  * watcher will send the internal event to the task when there is a new
3347  * connection.
3348  *
3349  * Unlike in read, we don't preallocate a done event here.  Every time there
3350  * is a new connection we'll have to allocate a new one anyway, so we might
3351  * as well keep things simple rather than having to track them.
3352  */
3353 isc_result_t
isc__socket_listen(isc_socket_t * sock,unsigned int backlog)3354 isc__socket_listen(isc_socket_t *sock, unsigned int backlog) {
3355 	char strbuf[ISC_STRERRORSIZE];
3356 #if defined(ISC_PLATFORM_HAVETFO) && defined(TCP_FASTOPEN)
3357 	char on = 1;
3358 #endif
3359 
3360 	REQUIRE(VALID_SOCKET(sock));
3361 
3362 	LOCK(&sock->lock);
3363 	CONSISTENT(sock);
3364 
3365 	/*
3366 	 * make sure that the socket's not closed
3367 	 */
3368 	if (sock->fd == INVALID_SOCKET) {
3369 		UNLOCK(&sock->lock);
3370 		return (ISC_R_CONNREFUSED);
3371 	}
3372 
3373 	REQUIRE(!sock->listener);
3374 	REQUIRE(sock->bound);
3375 	REQUIRE(sock->type == isc_sockettype_tcp);
3376 
3377 	if (backlog == 0)
3378 		backlog = SOMAXCONN;
3379 
3380 	if (listen(sock->fd, (int)backlog) < 0) {
3381 		UNLOCK(&sock->lock);
3382 		isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
3383 
3384 		UNEXPECTED_ERROR(__FILE__, __LINE__, "listen: %s", strbuf);
3385 
3386 		return (ISC_R_UNEXPECTED);
3387 	}
3388 
3389 #if defined(ISC_PLATFORM_HAVETFO) && defined(TCP_FASTOPEN)
3390 	if (setsockopt(sock->fd, IPPROTO_TCP, TCP_FASTOPEN,
3391 		       &on, sizeof(on)) < 0) {
3392 		isc__strerror(errno, strbuf, sizeof(strbuf));
3393 		UNEXPECTED_ERROR(__FILE__, __LINE__,
3394 				 "setsockopt(%d, TCP_FASTOPEN) failed with %s",
3395 				 sock->fd, strbuf);
3396 		/* TCP_FASTOPEN is experimental so ignore failures */
3397 	}
3398 #endif
3399 
3400 	socket_log(__LINE__, sock, NULL, TRACE,
3401 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "listening");
3402 	sock->listener = 1;
3403 	_set_state(sock, SOCK_LISTEN);
3404 
3405 	UNLOCK(&sock->lock);
3406 	return (ISC_R_SUCCESS);
3407 }
3408 
3409 /*
3410  * This should try to do aggressive accept() XXXMLG
3411  */
3412 isc_result_t
isc__socket_accept(isc_socket_t * sock,isc_task_t * task,isc_taskaction_t action,void * arg)3413 isc__socket_accept(isc_socket_t *sock,
3414 		   isc_task_t *task, isc_taskaction_t action, void *arg)
3415 {
3416 	isc_socket_newconnev_t *adev;
3417 	isc_socketmgr_t *manager;
3418 	isc_task_t *ntask = NULL;
3419 	isc_socket_t *nsock;
3420 	isc_result_t result;
3421 	IoCompletionInfo *lpo;
3422 
3423 	REQUIRE(VALID_SOCKET(sock));
3424 
3425 	manager = sock->manager;
3426 	REQUIRE(VALID_MANAGER(manager));
3427 
3428 	LOCK(&sock->lock);
3429 	CONSISTENT(sock);
3430 
3431 	/*
3432 	 * make sure that the socket's not closed
3433 	 */
3434 	if (sock->fd == INVALID_SOCKET) {
3435 		UNLOCK(&sock->lock);
3436 		return (ISC_R_CONNREFUSED);
3437 	}
3438 
3439 	REQUIRE(sock->listener);
3440 
3441 	/*
3442 	 * Sender field is overloaded here with the task we will be sending
3443 	 * this event to.  Just before the actual event is delivered the
3444 	 * actual ev_sender will be touched up to be the socket.
3445 	 */
3446 	adev = (isc_socket_newconnev_t *)
3447 		isc_event_allocate(manager->mctx, task, ISC_SOCKEVENT_NEWCONN,
3448 				   action, arg, sizeof(*adev));
3449 	if (adev == NULL) {
3450 		UNLOCK(&sock->lock);
3451 		return (ISC_R_NOMEMORY);
3452 	}
3453 	ISC_LINK_INIT(adev, ev_link);
3454 
3455 	result = allocate_socket(manager, sock->type, &nsock);
3456 	if (result != ISC_R_SUCCESS) {
3457 		isc_event_free((isc_event_t **)&adev);
3458 		UNLOCK(&sock->lock);
3459 		return (result);
3460 	}
3461 
3462 	/*
3463 	 * AcceptEx() requires we pass in a socket.
3464 	 */
3465 	nsock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP);
3466 	if (nsock->fd == INVALID_SOCKET) {
3467 		free_socket(&nsock, __LINE__);
3468 		isc_event_free((isc_event_t **)&adev);
3469 		UNLOCK(&sock->lock);
3470 		return (ISC_R_FAILURE); // XXXMLG need real error message
3471 	}
3472 
3473 	/*
3474 	 * Attach to socket and to task.
3475 	 */
3476 	isc_task_attach(task, &ntask);
3477 	if (isc_task_exiting(ntask)) {
3478 		free_socket(&nsock, __LINE__);
3479 		isc_task_detach(&ntask);
3480 		isc_event_free(ISC_EVENT_PTR(&adev));
3481 		UNLOCK(&sock->lock);
3482 		return (ISC_R_SHUTTINGDOWN);
3483 	}
3484 	nsock->references++;
3485 
3486 	adev->ev_sender = ntask;
3487 	adev->newsocket = nsock;
3488 	_set_state(nsock, SOCK_ACCEPT);
3489 
3490 	/*
3491 	 * Queue io completion for an accept().
3492 	 */
3493 	lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
3494 					    HEAP_ZERO_MEMORY,
3495 					    sizeof(IoCompletionInfo));
3496 	RUNTIME_CHECK(lpo != NULL);
3497 	lpo->acceptbuffer = (void *)HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY,
3498 		(sizeof(SOCKADDR_STORAGE) + 16) * 2);
3499 	RUNTIME_CHECK(lpo->acceptbuffer != NULL);
3500 
3501 	lpo->adev = adev;
3502 	lpo->request_type = SOCKET_ACCEPT;
3503 
3504 	ISCAcceptEx(sock->fd,
3505 		    nsock->fd,				/* Accepted Socket */
3506 		    lpo->acceptbuffer,			/* Buffer for initial Recv */
3507 		    0,					/* Length of Buffer */
3508 		    sizeof(SOCKADDR_STORAGE) + 16,		/* Local address length + 16 */
3509 		    sizeof(SOCKADDR_STORAGE) + 16,		/* Remote address length + 16 */
3510 		    (LPDWORD)&lpo->received_bytes,	/* Bytes Recved */
3511 		    (LPOVERLAPPED)lpo			/* Overlapped structure */
3512 		    );
3513 	iocompletionport_update(nsock);
3514 
3515 	socket_log(__LINE__, sock, NULL, TRACE,
3516 		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND,
3517 		   "accepting for nsock %p fd %d", nsock, nsock->fd);
3518 
3519 	/*
3520 	 * Enqueue the event
3521 	 */
3522 	ISC_LIST_ENQUEUE(sock->accept_list, adev, ev_link);
3523 	sock->pending_accept++;
3524 	sock->pending_iocp++;
3525 
3526 	UNLOCK(&sock->lock);
3527 	return (ISC_R_SUCCESS);
3528 }
3529 
3530 isc_result_t
isc__socket_connect(isc_socket_t * sock,isc_sockaddr_t * addr,isc_task_t * task,isc_taskaction_t action,void * arg)3531 isc__socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr,
3532 		    isc_task_t *task, isc_taskaction_t action, void *arg)
3533 {
3534 	char strbuf[ISC_STRERRORSIZE];
3535 	isc_socket_connev_t *cdev;
3536 	isc_task_t *ntask = NULL;
3537 	isc_socketmgr_t *manager;
3538 	IoCompletionInfo *lpo;
3539 	int bind_errno;
3540 
3541 	REQUIRE(VALID_SOCKET(sock));
3542 	REQUIRE(addr != NULL);
3543 	REQUIRE(task != NULL);
3544 	REQUIRE(action != NULL);
3545 
3546 	manager = sock->manager;
3547 	REQUIRE(VALID_MANAGER(manager));
3548 	REQUIRE(addr != NULL);
3549 
3550 	if (isc_sockaddr_ismulticast(addr))
3551 		return (ISC_R_MULTICAST);
3552 
3553 	LOCK(&sock->lock);
3554 	CONSISTENT(sock);
3555 
3556 	/*
3557 	 * make sure that the socket's not closed
3558 	 */
3559 	if (sock->fd == INVALID_SOCKET) {
3560 		UNLOCK(&sock->lock);
3561 		return (ISC_R_CONNREFUSED);
3562 	}
3563 
3564 	/*
3565 	 * Windows sockets won't connect unless the socket is bound.
3566 	 */
3567 	if (!sock->bound) {
3568 		isc_sockaddr_t any;
3569 
3570 		isc_sockaddr_anyofpf(&any, isc_sockaddr_pf(addr));
3571 		if (bind(sock->fd, &any.type.sa, any.length) < 0) {
3572 			bind_errno = WSAGetLastError();
3573 			UNLOCK(&sock->lock);
3574 			switch (bind_errno) {
3575 			case WSAEACCES:
3576 				return (ISC_R_NOPERM);
3577 			case WSAEADDRNOTAVAIL:
3578 				return (ISC_R_ADDRNOTAVAIL);
3579 			case WSAEADDRINUSE:
3580 				return (ISC_R_ADDRINUSE);
3581 			case WSAEINVAL:
3582 				return (ISC_R_BOUND);
3583 			default:
3584 				isc__strerror(bind_errno, strbuf,
3585 					      sizeof(strbuf));
3586 				UNEXPECTED_ERROR(__FILE__, __LINE__,
3587 						 "bind: %s", strbuf);
3588 				return (ISC_R_UNEXPECTED);
3589 			}
3590 		}
3591 		sock->bound = 1;
3592 	}
3593 
3594 	cdev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock,
3595 							ISC_SOCKEVENT_CONNECT,
3596 							action,	arg,
3597 							sizeof(*cdev));
3598 	if (cdev == NULL) {
3599 		UNLOCK(&sock->lock);
3600 		return (ISC_R_NOMEMORY);
3601 	}
3602 	ISC_LINK_INIT(cdev, ev_link);
3603 
3604 	if (sock->connected) {
3605 		INSIST(isc_sockaddr_equal(&sock->address, addr));
3606 		cdev->result = ISC_R_SUCCESS;
3607 		isc_task_send(task, ISC_EVENT_PTR(&cdev));
3608 
3609 		UNLOCK(&sock->lock);
3610 		return (ISC_R_SUCCESS);
3611 	}
3612 
3613 	if ((sock->type == isc_sockettype_tcp) && !sock->pending_connect) {
3614 		/*
3615 		 * Queue io completion for an accept().
3616 		 */
3617 		lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
3618 						    HEAP_ZERO_MEMORY,
3619 						    sizeof(IoCompletionInfo));
3620 		lpo->cdev = cdev;
3621 		lpo->request_type = SOCKET_CONNECT;
3622 
3623 		sock->address = *addr;
3624 		ISCConnectEx(sock->fd, &addr->type.sa, addr->length,
3625 			NULL, 0, NULL, (LPOVERLAPPED)lpo);
3626 
3627 		/*
3628 		 * Attach to task.
3629 		 */
3630 		isc_task_attach(task, &ntask);
3631 		cdev->ev_sender = ntask;
3632 
3633 		sock->pending_connect = 1;
3634 		_set_state(sock, SOCK_CONNECT);
3635 
3636 		/*
3637 		 * Enqueue the request.
3638 		 */
3639 		INSIST(!ISC_LINK_LINKED(cdev, ev_link));
3640 		ISC_LIST_ENQUEUE(sock->connect_list, cdev, ev_link);
3641 		sock->pending_iocp++;
3642 	} else if (sock->type == isc_sockettype_tcp) {
3643 		INSIST(sock->pending_connect);
3644 		INSIST(isc_sockaddr_equal(&sock->address, addr));
3645 		isc_task_attach(task, &ntask);
3646 		cdev->ev_sender = ntask;
3647 		INSIST(!ISC_LINK_LINKED(cdev, ev_link));
3648 		ISC_LIST_ENQUEUE(sock->connect_list, cdev, ev_link);
3649 	} else {
3650 		REQUIRE(!sock->pending_connect);
3651 		WSAConnect(sock->fd, &addr->type.sa, addr->length, NULL, NULL, NULL, NULL);
3652 		cdev->result = ISC_R_SUCCESS;
3653 		isc_task_send(task, (isc_event_t **)&cdev);
3654 	}
3655 	CONSISTENT(sock);
3656 	UNLOCK(&sock->lock);
3657 
3658 	return (ISC_R_SUCCESS);
3659 }
3660 
3661 isc_result_t
isc__socket_getpeername(isc_socket_t * sock,isc_sockaddr_t * addressp)3662 isc__socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3663 	isc_result_t result;
3664 
3665 	REQUIRE(VALID_SOCKET(sock));
3666 	REQUIRE(addressp != NULL);
3667 
3668 	LOCK(&sock->lock);
3669 	CONSISTENT(sock);
3670 
3671 	/*
3672 	 * make sure that the socket's not closed
3673 	 */
3674 	if (sock->fd == INVALID_SOCKET) {
3675 		UNLOCK(&sock->lock);
3676 		return (ISC_R_CONNREFUSED);
3677 	}
3678 
3679 	if (sock->connected) {
3680 		*addressp = sock->address;
3681 		result = ISC_R_SUCCESS;
3682 	} else {
3683 		result = ISC_R_NOTCONNECTED;
3684 	}
3685 
3686 	UNLOCK(&sock->lock);
3687 
3688 	return (result);
3689 }
3690 
3691 isc_result_t
isc__socket_getsockname(isc_socket_t * sock,isc_sockaddr_t * addressp)3692 isc__socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3693 	ISC_SOCKADDR_LEN_T len;
3694 	isc_result_t result;
3695 	char strbuf[ISC_STRERRORSIZE];
3696 
3697 	REQUIRE(VALID_SOCKET(sock));
3698 	REQUIRE(addressp != NULL);
3699 
3700 	LOCK(&sock->lock);
3701 	CONSISTENT(sock);
3702 
3703 	/*
3704 	 * make sure that the socket's not closed
3705 	 */
3706 	if (sock->fd == INVALID_SOCKET) {
3707 		UNLOCK(&sock->lock);
3708 		return (ISC_R_CONNREFUSED);
3709 	}
3710 
3711 	if (!sock->bound) {
3712 		result = ISC_R_NOTBOUND;
3713 		goto out;
3714 	}
3715 
3716 	result = ISC_R_SUCCESS;
3717 
3718 	len = sizeof(addressp->type);
3719 	if (getsockname(sock->fd, &addressp->type.sa, (void *)&len) < 0) {
3720 		isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
3721 		UNEXPECTED_ERROR(__FILE__, __LINE__, "getsockname: %s",
3722 				 strbuf);
3723 		result = ISC_R_UNEXPECTED;
3724 		goto out;
3725 	}
3726 	addressp->length = (unsigned int)len;
3727 
3728  out:
3729 	UNLOCK(&sock->lock);
3730 
3731 	return (result);
3732 }
3733 
3734 /*
3735  * Run through the list of events on this socket, and cancel the ones
3736  * queued for task "task" of type "how".  "how" is a bitmask.
3737  */
3738 void
isc__socket_cancel(isc_socket_t * sock,isc_task_t * task,unsigned int how)3739 isc__socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) {
3740 
3741 	REQUIRE(VALID_SOCKET(sock));
3742 
3743 	/*
3744 	 * Quick exit if there is nothing to do.  Don't even bother locking
3745 	 * in this case.
3746 	 */
3747 	if (how == 0)
3748 		return;
3749 
3750 	LOCK(&sock->lock);
3751 	CONSISTENT(sock);
3752 
3753 	/*
3754 	 * make sure that the socket's not closed
3755 	 */
3756 	if (sock->fd == INVALID_SOCKET) {
3757 		UNLOCK(&sock->lock);
3758 		return;
3759 	}
3760 
3761 	/*
3762 	 * All of these do the same thing, more or less.
3763 	 * Each will:
3764 	 *	o If the internal event is marked as "posted" try to
3765 	 *	  remove it from the task's queue.  If this fails, mark it
3766 	 *	  as canceled instead, and let the task clean it up later.
3767 	 *	o For each I/O request for that task of that type, post
3768 	 *	  its done event with status of "ISC_R_CANCELED".
3769 	 *	o Reset any state needed.
3770 	 */
3771 
3772 	if ((how & ISC_SOCKCANCEL_RECV) != 0) {
3773 		isc_socketevent_t      *dev;
3774 		isc_socketevent_t      *next;
3775 		isc_task_t	       *current_task;
3776 
3777 		dev = ISC_LIST_HEAD(sock->recv_list);
3778 		while (dev != NULL) {
3779 			current_task = dev->ev_sender;
3780 			next = ISC_LIST_NEXT(dev, ev_link);
3781 			if ((task == NULL) || (task == current_task)) {
3782 				dev->result = ISC_R_CANCELED;
3783 				send_recvdone_event(sock, &dev);
3784 			}
3785 			dev = next;
3786 		}
3787 	}
3788 	how &= ~ISC_SOCKCANCEL_RECV;
3789 
3790 	if ((how & ISC_SOCKCANCEL_SEND) != 0) {
3791 		isc_socketevent_t      *dev;
3792 		isc_socketevent_t      *next;
3793 		isc_task_t	       *current_task;
3794 
3795 		dev = ISC_LIST_HEAD(sock->send_list);
3796 
3797 		while (dev != NULL) {
3798 			current_task = dev->ev_sender;
3799 			next = ISC_LIST_NEXT(dev, ev_link);
3800 			if ((task == NULL) || (task == current_task)) {
3801 				dev->result = ISC_R_CANCELED;
3802 				send_senddone_event(sock, &dev);
3803 			}
3804 			dev = next;
3805 		}
3806 	}
3807 	how &= ~ISC_SOCKCANCEL_SEND;
3808 
3809 	if (((how & ISC_SOCKCANCEL_ACCEPT) != 0)
3810 	    && !ISC_LIST_EMPTY(sock->accept_list)) {
3811 		isc_socket_newconnev_t *dev;
3812 		isc_socket_newconnev_t *next;
3813 		isc_task_t	       *current_task;
3814 
3815 		dev = ISC_LIST_HEAD(sock->accept_list);
3816 		while (dev != NULL) {
3817 			current_task = dev->ev_sender;
3818 			next = ISC_LIST_NEXT(dev, ev_link);
3819 
3820 			if ((task == NULL) || (task == current_task)) {
3821 
3822 				dev->newsocket->references--;
3823 				closesocket(dev->newsocket->fd);
3824 				dev->newsocket->fd = INVALID_SOCKET;
3825 				free_socket(&dev->newsocket, __LINE__);
3826 
3827 				dev->result = ISC_R_CANCELED;
3828 				send_acceptdone_event(sock, &dev);
3829 			}
3830 
3831 			dev = next;
3832 		}
3833 	}
3834 	how &= ~ISC_SOCKCANCEL_ACCEPT;
3835 
3836 	if (((how & ISC_SOCKCANCEL_CONNECT) != 0)
3837 	    && !ISC_LIST_EMPTY(sock->connect_list)) {
3838 		isc_socket_connev_t    *dev;
3839 		isc_socket_connev_t    *next;
3840 		isc_task_t	       *current_task;
3841 
3842 		INSIST(sock->pending_connect);
3843 
3844 		dev = ISC_LIST_HEAD(sock->connect_list);
3845 
3846 		while (dev != NULL) {
3847 			current_task = dev->ev_sender;
3848 			next = ISC_LIST_NEXT(dev, ev_link);
3849 			if ((task == NULL) || (task == current_task)) {
3850 				dev->result = ISC_R_CANCELED;
3851 				send_connectdone_event(sock, &dev);
3852 			}
3853 			dev = next;
3854 		}
3855 		closesocket(sock->fd);
3856 		sock->fd = INVALID_SOCKET;
3857 		_set_state(sock, SOCK_CLOSED);
3858 	}
3859 	how &= ~ISC_SOCKCANCEL_CONNECT;
3860 	UNUSED(how);
3861 
3862 	maybe_free_socket(&sock, __LINE__);
3863 }
3864 
3865 isc_sockettype_t
isc__socket_gettype(isc_socket_t * sock)3866 isc__socket_gettype(isc_socket_t *sock) {
3867 	isc_sockettype_t type;
3868 
3869 	REQUIRE(VALID_SOCKET(sock));
3870 
3871 	LOCK(&sock->lock);
3872 
3873 	/*
3874 	 * make sure that the socket's not closed
3875 	 */
3876 	if (sock->fd == INVALID_SOCKET) {
3877 		UNLOCK(&sock->lock);
3878 		return (ISC_R_CONNREFUSED);
3879 	}
3880 
3881 	type = sock->type;
3882 	UNLOCK(&sock->lock);
3883 	return (type);
3884 }
3885 
3886 bool
isc__socket_isbound(isc_socket_t * sock)3887 isc__socket_isbound(isc_socket_t *sock) {
3888 	bool val;
3889 
3890 	REQUIRE(VALID_SOCKET(sock));
3891 
3892 	LOCK(&sock->lock);
3893 	CONSISTENT(sock);
3894 
3895 	/*
3896 	 * make sure that the socket's not closed
3897 	 */
3898 	if (sock->fd == INVALID_SOCKET) {
3899 		UNLOCK(&sock->lock);
3900 		return (false);
3901 	}
3902 
3903 	val = ((sock->bound) ? true : false);
3904 	UNLOCK(&sock->lock);
3905 
3906 	return (val);
3907 }
3908 
3909 void
isc__socket_ipv6only(isc_socket_t * sock,bool yes)3910 isc__socket_ipv6only(isc_socket_t *sock, bool yes) {
3911 #if defined(IPV6_V6ONLY)
3912 	int onoff = yes ? 1 : 0;
3913 #else
3914 	UNUSED(yes);
3915 #endif
3916 
3917 	REQUIRE(VALID_SOCKET(sock));
3918 
3919 #ifdef IPV6_V6ONLY
3920 	if (sock->pf == AF_INET6) {
3921 		(void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY,
3922 				 (char *)&onoff, sizeof(onoff));
3923 	}
3924 #endif
3925 }
3926 
3927 void
isc__socket_dscp(isc_socket_t * sock,isc_dscp_t dscp)3928 isc__socket_dscp(isc_socket_t *sock, isc_dscp_t dscp) {
3929 #if !defined(IP_TOS) && !defined(IPV6_TCLASS)
3930 	UNUSED(dscp);
3931 #else
3932 	if (dscp < 0)
3933 		return;
3934 
3935 	dscp <<= 2;
3936 	dscp &= 0xff;
3937 #endif
3938 
3939 	REQUIRE(VALID_SOCKET(sock));
3940 
3941 #ifdef IP_TOS
3942 	if (sock->pf == AF_INET) {
3943 		(void)setsockopt(sock->fd, IPPROTO_IP, IP_TOS,
3944 				 (char *)&dscp, sizeof(dscp));
3945 	}
3946 #endif
3947 #ifdef IPV6_TCLASS
3948 	if (sock->pf == AF_INET6) {
3949 		(void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_TCLASS,
3950 				 (char *)&dscp, sizeof(dscp));
3951 	}
3952 #endif
3953 }
3954 
3955 void
isc__socket_cleanunix(isc_sockaddr_t * addr,bool active)3956 isc__socket_cleanunix(isc_sockaddr_t *addr, bool active) {
3957 	UNUSED(addr);
3958 	UNUSED(active);
3959 }
3960 
3961 isc_result_t
isc__socket_permunix(isc_sockaddr_t * addr,uint32_t perm,uint32_t owner,uint32_t group)3962 isc__socket_permunix(isc_sockaddr_t *addr, uint32_t perm,
3963 		     uint32_t owner, uint32_t group)
3964 {
3965 	UNUSED(addr);
3966 	UNUSED(perm);
3967 	UNUSED(owner);
3968 	UNUSED(group);
3969 	return (ISC_R_NOTIMPLEMENTED);
3970 }
3971 
3972 void
isc__socket_setname(isc_socket_t * socket,const char * name,void * tag)3973 isc__socket_setname(isc_socket_t *socket, const char *name, void *tag) {
3974 
3975 	/*
3976 	 * Name 'socket'.
3977 	 */
3978 
3979 	REQUIRE(VALID_SOCKET(socket));
3980 
3981 	LOCK(&socket->lock);
3982 	strlcpy(socket->name, name, sizeof(socket->name));
3983 	socket->tag = tag;
3984 	UNLOCK(&socket->lock);
3985 }
3986 
3987 const char *
isc__socket_getname(isc_socket_t * socket)3988 isc__socket_getname(isc_socket_t *socket) {
3989 	return (socket->name);
3990 }
3991 
3992 void *
isc__socket_gettag(isc_socket_t * socket)3993 isc__socket_gettag(isc_socket_t *socket) {
3994 	return (socket->tag);
3995 }
3996 
3997 int
isc__socket_getfd(isc_socket_t * socket)3998 isc__socket_getfd(isc_socket_t *socket) {
3999 	return ((short) socket->fd);
4000 }
4001 
4002 void
isc__socketmgr_setreserved(isc_socketmgr_t * manager,uint32_t reserved)4003 isc__socketmgr_setreserved(isc_socketmgr_t *manager, uint32_t reserved) {
4004 	UNUSED(manager);
4005 	UNUSED(reserved);
4006 }
4007 
4008 void
isc___socketmgr_maxudp(isc_socketmgr_t * manager,unsigned int maxudp)4009 isc___socketmgr_maxudp(isc_socketmgr_t *manager, unsigned int maxudp) {
4010 
4011 	REQUIRE(VALID_MANAGER(manager));
4012 
4013 	manager->maxudp = maxudp;
4014 }
4015 
4016 isc_socketevent_t *
isc_socket_socketevent(isc_mem_t * mctx,void * sender,isc_eventtype_t eventtype,isc_taskaction_t action,void * arg)4017 isc_socket_socketevent(isc_mem_t *mctx, void *sender,
4018 		       isc_eventtype_t eventtype, isc_taskaction_t action,
4019 		       void *arg)
4020 {
4021 	return (allocate_socketevent(mctx, sender, eventtype, action, arg));
4022 }
4023 
4024 #ifdef HAVE_LIBXML2
4025 
4026 static const char *
_socktype(isc_sockettype_t type)4027 _socktype(isc_sockettype_t type) {
4028 	if (type == isc_sockettype_udp)
4029 		return ("udp");
4030 	else if (type == isc_sockettype_tcp)
4031 		return ("tcp");
4032 	else if (type == isc_sockettype_unix)
4033 		return ("unix");
4034 	else if (type == isc_sockettype_fdwatch)
4035 		return ("fdwatch");
4036 	else
4037 		return ("not-initialized");
4038 }
4039 
4040 #define TRY0(a) do { xmlrc = (a); if (xmlrc < 0) goto error; } while(0)
4041 int
isc_socketmgr_renderxml(isc_socketmgr_t * mgr,xmlTextWriterPtr writer)4042 isc_socketmgr_renderxml(isc_socketmgr_t *mgr, xmlTextWriterPtr writer)
4043 {
4044 	isc_socket_t *sock = NULL;
4045 	char peerbuf[ISC_SOCKADDR_FORMATSIZE];
4046 	isc_sockaddr_t addr;
4047 	ISC_SOCKADDR_LEN_T len;
4048 	int xmlrc;
4049 
4050 	LOCK(&mgr->lock);
4051 
4052 #ifndef ISC_PLATFORM_USETHREADS
4053 	TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "references"));
4054 	TRY0(xmlTextWriterWriteFormatString(writer, "%d", mgr->refs));
4055 	TRY0(xmlTextWriterEndElement(writer));
4056 #endif
4057 
4058 	TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "sockets"));
4059 	sock = ISC_LIST_HEAD(mgr->socklist);
4060 	while (sock != NULL) {
4061 		LOCK(&sock->lock);
4062 		TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "socket"));
4063 
4064 		TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "id"));
4065 		TRY0(xmlTextWriterWriteFormatString(writer, "%p", sock));
4066 		TRY0(xmlTextWriterEndElement(writer));
4067 
4068 		if (sock->name[0] != 0) {
4069 			TRY0(xmlTextWriterStartElement(writer,
4070 						       ISC_XMLCHAR "name"));
4071 			TRY0(xmlTextWriterWriteFormatString(writer, "%s",
4072 							    sock->name));
4073 			TRY0(xmlTextWriterEndElement(writer)); /* name */
4074 		}
4075 
4076 		TRY0(xmlTextWriterStartElement(writer,
4077 					       ISC_XMLCHAR "references"));
4078 		TRY0(xmlTextWriterWriteFormatString(writer, "%d",
4079 						    sock->references));
4080 		TRY0(xmlTextWriterEndElement(writer));
4081 
4082 		TRY0(xmlTextWriterWriteElement(writer, ISC_XMLCHAR "type",
4083 					  ISC_XMLCHAR _socktype(sock->type)));
4084 
4085 		if (sock->connected) {
4086 			isc_sockaddr_format(&sock->address, peerbuf,
4087 					    sizeof(peerbuf));
4088 			TRY0(xmlTextWriterWriteElement(writer,
4089 						  ISC_XMLCHAR "peer-address",
4090 						  ISC_XMLCHAR peerbuf));
4091 		}
4092 
4093 		len = sizeof(addr);
4094 		if (getsockname(sock->fd, &addr.type.sa, (void *)&len) == 0) {
4095 			isc_sockaddr_format(&addr, peerbuf, sizeof(peerbuf));
4096 			TRY0(xmlTextWriterWriteElement(writer,
4097 						  ISC_XMLCHAR "local-address",
4098 						  ISC_XMLCHAR peerbuf));
4099 		}
4100 
4101 		TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "states"));
4102 		if (sock->pending_recv)
4103 			TRY0(xmlTextWriterWriteElement(writer,
4104 						ISC_XMLCHAR "state",
4105 						ISC_XMLCHAR "pending-receive"));
4106 		if (sock->pending_send)
4107 			TRY0(xmlTextWriterWriteElement(writer,
4108 						  ISC_XMLCHAR "state",
4109 						  ISC_XMLCHAR "pending-send"));
4110 		if (sock->pending_accept)
4111 			TRY0(xmlTextWriterWriteElement(writer,
4112 						 ISC_XMLCHAR "state",
4113 						 ISC_XMLCHAR "pending_accept"));
4114 		if (sock->listener)
4115 			TRY0(xmlTextWriterWriteElement(writer,
4116 						       ISC_XMLCHAR "state",
4117 						       ISC_XMLCHAR "listener"));
4118 		if (sock->connected)
4119 			TRY0(xmlTextWriterWriteElement(writer,
4120 						     ISC_XMLCHAR "state",
4121 						     ISC_XMLCHAR "connected"));
4122 		if (sock->pending_connect)
4123 			TRY0(xmlTextWriterWriteElement(writer,
4124 						  ISC_XMLCHAR "state",
4125 						  ISC_XMLCHAR "connecting"));
4126 		if (sock->bound)
4127 			TRY0(xmlTextWriterWriteElement(writer,
4128 						  ISC_XMLCHAR "state",
4129 						  ISC_XMLCHAR "bound"));
4130 
4131 		TRY0(xmlTextWriterEndElement(writer)); /* states */
4132 
4133 		TRY0(xmlTextWriterEndElement(writer)); /* socket */
4134 
4135 		UNLOCK(&sock->lock);
4136 		sock = ISC_LIST_NEXT(sock, link);
4137 	}
4138 	TRY0(xmlTextWriterEndElement(writer)); /* sockets */
4139 
4140 error:
4141 	if (sock != NULL)
4142 		UNLOCK(&sock->lock);
4143 
4144 	UNLOCK(&mgr->lock);
4145 
4146 	return (xmlrc);
4147 }
4148 #endif /* HAVE_LIBXML2 */
4149 
4150 #ifdef HAVE_JSON
4151 #define CHECKMEM(m) do { \
4152 	if (m == NULL) { \
4153 		result = ISC_R_NOMEMORY;\
4154 		goto error;\
4155 	} \
4156 } while(0)
4157 
4158 isc_result_t
isc_socketmgr_renderjson(isc_socketmgr_t * mgr,json_object * stats)4159 isc_socketmgr_renderjson(isc_socketmgr_t *mgr, json_object *stats) {
4160 	isc_result_t result = ISC_R_SUCCESS;
4161 	isc_socket_t *sock = NULL;
4162 	char peerbuf[ISC_SOCKADDR_FORMATSIZE];
4163 	isc_sockaddr_t addr;
4164 	ISC_SOCKADDR_LEN_T len;
4165 	json_object *obj, *array = json_object_new_array();
4166 
4167 	CHECKMEM(array);
4168 
4169 	LOCK(&mgr->lock);
4170 
4171 #ifdef USE_SHARED_MANAGER
4172 	obj = json_object_new_int(mgr->refs);
4173 	CHECKMEM(obj);
4174 	json_object_object_add(stats, "references", obj);
4175 #endif	/* USE_SHARED_MANAGER */
4176 
4177 	sock = ISC_LIST_HEAD(mgr->socklist);
4178 	while (sock != NULL) {
4179 		json_object *states, *entry = json_object_new_object();
4180 		char buf[255];
4181 
4182 		CHECKMEM(entry);
4183 		json_object_array_add(array, entry);
4184 
4185 		LOCK(&sock->lock);
4186 
4187 		snprintf(buf, sizeof(buf), "%p", sock);
4188 		obj = json_object_new_string(buf);
4189 		CHECKMEM(obj);
4190 		json_object_object_add(entry, "id", obj);
4191 
4192 		if (sock->name[0] != 0) {
4193 			obj = json_object_new_string(sock->name);
4194 			CHECKMEM(obj);
4195 			json_object_object_add(entry, "name", obj);
4196 		}
4197 
4198 		obj = json_object_new_int(sock->references);
4199 		CHECKMEM(obj);
4200 		json_object_object_add(entry, "references", obj);
4201 
4202 		obj = json_object_new_string(_socktype(sock->type));
4203 		CHECKMEM(obj);
4204 		json_object_object_add(entry, "type", obj);
4205 
4206 		if (sock->connected) {
4207 			isc_sockaddr_format(&sock->address, peerbuf,
4208 					    sizeof(peerbuf));
4209 			obj = json_object_new_string(peerbuf);
4210 			CHECKMEM(obj);
4211 			json_object_object_add(entry, "peer-address", obj);
4212 		}
4213 
4214 		len = sizeof(addr);
4215 		if (getsockname(sock->fd, &addr.type.sa, (void *)&len) == 0) {
4216 			isc_sockaddr_format(&addr, peerbuf, sizeof(peerbuf));
4217 			obj = json_object_new_string(peerbuf);
4218 			CHECKMEM(obj);
4219 			json_object_object_add(entry, "local-address", obj);
4220 		}
4221 
4222 		states = json_object_new_array();
4223 		CHECKMEM(states);
4224 		json_object_object_add(entry, "states", states);
4225 
4226 		if (sock->pending_recv) {
4227 			obj = json_object_new_string("pending-receive");
4228 			CHECKMEM(obj);
4229 			json_object_array_add(states, obj);
4230 		}
4231 
4232 		if (sock->pending_send) {
4233 			obj = json_object_new_string("pending-send");
4234 			CHECKMEM(obj);
4235 			json_object_array_add(states, obj);
4236 		}
4237 
4238 		if (sock->pending_accept) {
4239 			obj = json_object_new_string("pending-accept");
4240 			CHECKMEM(obj);
4241 			json_object_array_add(states, obj);
4242 		}
4243 
4244 		if (sock->listener) {
4245 			obj = json_object_new_string("listener");
4246 			CHECKMEM(obj);
4247 			json_object_array_add(states, obj);
4248 		}
4249 
4250 		if (sock->connected) {
4251 			obj = json_object_new_string("connected");
4252 			CHECKMEM(obj);
4253 			json_object_array_add(states, obj);
4254 		}
4255 
4256 		if (sock->pending_connect) {
4257 			obj = json_object_new_string("connecting");
4258 			CHECKMEM(obj);
4259 			json_object_array_add(states, obj);
4260 		}
4261 
4262 		if (sock->bound) {
4263 			obj = json_object_new_string("bound");
4264 			CHECKMEM(obj);
4265 			json_object_array_add(states, obj);
4266 		}
4267 
4268 		UNLOCK(&sock->lock);
4269 		sock = ISC_LIST_NEXT(sock, link);
4270 	}
4271 
4272 	json_object_object_add(stats, "sockets", array);
4273 	array = NULL;
4274 	result = ISC_R_SUCCESS;
4275 
4276  error:
4277 	if (array != NULL)
4278 		json_object_put(array);
4279 
4280 	if (sock != NULL)
4281 		UNLOCK(&sock->lock);
4282 
4283 	UNLOCK(&mgr->lock);
4284 
4285 	return (result);
4286 }
4287 #endif /* HAVE_JSON */
4288 
4289 /*
4290  * Replace ../socket_api.c
4291  */
4292 
4293 isc_result_t
isc__socket_register(void)4294 isc__socket_register(void) {
4295 	return (ISC_R_SUCCESS);
4296 }
4297 
4298 isc_result_t
isc_socketmgr_createinctx(isc_mem_t * mctx,isc_appctx_t * actx,isc_socketmgr_t ** managerp)4299 isc_socketmgr_createinctx(isc_mem_t *mctx, isc_appctx_t *actx,
4300 			  isc_socketmgr_t **managerp)
4301 {
4302 	isc_result_t result;
4303 
4304 	result = isc_socketmgr_create(mctx, managerp);
4305 
4306 	if (result == ISC_R_SUCCESS)
4307 		isc_appctx_setsocketmgr(actx, *managerp);
4308 
4309 	return (result);
4310 }
4311