1 /*
2 * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3 *
4 * This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, you can obtain one at https://mozilla.org/MPL/2.0/.
7 *
8 * See the COPYRIGHT file distributed with this work for additional
9 * information regarding copyright ownership.
10 */
11
12 /* This code uses functions which are only available on Server 2003 and
13 * higher, and Windows XP and higher.
14 *
15 * This code is by nature multithreaded and takes advantage of various
16 * features to pass on information through the completion port for
17 * when I/O is completed. All sends, receives, accepts, and connects are
18 * completed through the completion port.
19 *
20 * The number of Completion Port Worker threads used is the total number
21 * of CPU's + 1. This increases the likelihood that a Worker Thread is
22 * available for processing a completed request.
23 *
24 * XXXPDM 5 August, 2002
25 */
26
27 #include <config.h>
28
29 #define MAKE_EXTERNAL 1
30
31 #include <sys/types.h>
32
33 #ifndef _WINSOCKAPI_
34 #define _WINSOCKAPI_ /* Prevent inclusion of winsock.h in windows.h */
35 #endif
36
37 #include <errno.h>
38 #include <stdbool.h>
39 #include <stddef.h>
40 #include <inttypes.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44 #include <io.h>
45 #include <fcntl.h>
46 #include <process.h>
47
48 #include <isc/app.h>
49 #include <isc/buffer.h>
50 #include <isc/bufferlist.h>
51 #include <isc/condition.h>
52 #include <isc/list.h>
53 #include <isc/log.h>
54 #include <isc/mem.h>
55 #include <isc/msgs.h>
56 #include <isc/mutex.h>
57 #include <isc/net.h>
58 #include <isc/once.h>
59 #include <isc/os.h>
60 #include <isc/platform.h>
61 #include <isc/print.h>
62 #include <isc/region.h>
63 #include <isc/socket.h>
64 #include <isc/stats.h>
65 #include <isc/strerror.h>
66 #include <isc/string.h>
67 #include <isc/syslog.h>
68 #include <isc/task.h>
69 #include <isc/thread.h>
70 #include <isc/util.h>
71 #include <isc/win32os.h>
72
73 #include <mswsock.h>
74
75 #include "errno2result.h"
76
77 /*
78 * Set by the -T dscp option on the command line. If set to a value
79 * other than -1, we check to make sure DSCP values match it, and
80 * assert if not.
81 */
82 LIBISC_EXTERNAL_DATA int isc_dscp_check_value = -1;
83
84 /*
85 * How in the world can Microsoft exist with APIs like this?
86 * We can't actually call this directly, because it turns out
87 * no library exports this function. Instead, we need to
88 * issue a runtime call to get the address.
89 */
90 LPFN_CONNECTEX ISCConnectEx;
91 LPFN_ACCEPTEX ISCAcceptEx;
92 LPFN_GETACCEPTEXSOCKADDRS ISCGetAcceptExSockaddrs;
93
94 /*
95 * Run expensive internal consistency checks.
96 */
97 #ifdef ISC_SOCKET_CONSISTENCY_CHECKS
98 #define CONSISTENT(sock) consistent(sock)
99 #else
100 #define CONSISTENT(sock) do {} while (0)
101 #endif
102 static void consistent(isc_socket_t *sock);
103
104 /*
105 * Define this macro to control the behavior of connection
106 * resets on UDP sockets. See Microsoft KnowledgeBase Article Q263823
107 * for details.
108 * NOTE: This requires that Windows 2000 systems install Service Pack 2
109 * or later.
110 */
111 #ifndef SIO_UDP_CONNRESET
112 #define SIO_UDP_CONNRESET _WSAIOW(IOC_VENDOR,12)
113 #endif
114
115 /*
116 * Some systems define the socket length argument as an int, some as size_t,
117 * some as socklen_t. This is here so it can be easily changed if needed.
118 */
119 #ifndef ISC_SOCKADDR_LEN_T
120 #define ISC_SOCKADDR_LEN_T unsigned int
121 #endif
122
123 /*
124 * Define what the possible "soft" errors can be. These are non-fatal returns
125 * of various network related functions, like recv() and so on.
126 */
127 #define SOFT_ERROR(e) ((e) == WSAEINTR || \
128 (e) == WSAEWOULDBLOCK || \
129 (e) == EWOULDBLOCK || \
130 (e) == EINTR || \
131 (e) == EAGAIN || \
132 (e) == 0)
133
134 /*
135 * Pending errors are not really errors and should be
136 * kept separate
137 */
138 #define PENDING_ERROR(e) ((e) == WSA_IO_PENDING || (e) == 0)
139
140 #define DOIO_SUCCESS 0 /* i/o ok, event sent */
141 #define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */
142 #define DOIO_HARD 2 /* i/o error, event sent */
143 #define DOIO_EOF 3 /* EOF, no event sent */
144 #define DOIO_PENDING 4 /* status when i/o is in process */
145 #define DOIO_NEEDMORE 5 /* IO was processed, but we need more due to minimum */
146
147 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
148
149 /*
150 * DLVL(90) -- Function entry/exit and other tracing.
151 * DLVL(70) -- Socket "correctness" -- including returning of events, etc.
152 * DLVL(60) -- Socket data send/receive
153 * DLVL(50) -- Event tracing, including receiving/sending completion events.
154 * DLVL(20) -- Socket creation/destruction.
155 */
156 #define TRACE_LEVEL 90
157 #define CORRECTNESS_LEVEL 70
158 #define IOEVENT_LEVEL 60
159 #define EVENT_LEVEL 50
160 #define CREATION_LEVEL 20
161
162 #define TRACE DLVL(TRACE_LEVEL)
163 #define CORRECTNESS DLVL(CORRECTNESS_LEVEL)
164 #define IOEVENT DLVL(IOEVENT_LEVEL)
165 #define EVENT DLVL(EVENT_LEVEL)
166 #define CREATION DLVL(CREATION_LEVEL)
167
168 typedef isc_event_t intev_t;
169
170 /*
171 * Socket State
172 */
173 enum {
174 SOCK_INITIALIZED, /* Socket Initialized */
175 SOCK_OPEN, /* Socket opened but nothing yet to do */
176 SOCK_DATA, /* Socket sending or receiving data */
177 SOCK_LISTEN, /* TCP Socket listening for connects */
178 SOCK_ACCEPT, /* TCP socket is waiting to accept */
179 SOCK_CONNECT, /* TCP Socket connecting */
180 SOCK_CLOSED, /* Socket has been closed */
181 };
182
183 #define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o')
184 #define VALID_SOCKET(t) ISC_MAGIC_VALID(t, SOCKET_MAGIC)
185
186 /*
187 * IPv6 control information. If the socket is an IPv6 socket we want
188 * to collect the destination address and interface so the client can
189 * set them on outgoing packets.
190 */
191 #ifdef ISC_PLATFORM_HAVEIPV6
192 #ifndef USE_CMSG
193 #define USE_CMSG 1
194 #endif
195 #endif
196
197 /*
198 * We really don't want to try and use these control messages. Win32
199 * doesn't have this mechanism before XP.
200 */
201 #undef USE_CMSG
202
203 /*
204 * Message header for recvmsg and sendmsg calls.
205 * Used value-result for recvmsg, value only for sendmsg.
206 */
207 struct msghdr {
208 SOCKADDR_STORAGE to_addr; /* UDP send/recv address */
209 int to_addr_len; /* length of the address */
210 WSABUF *msg_iov; /* scatter/gather array */
211 u_int msg_iovlen; /* # elements in msg_iov */
212 void *msg_control; /* ancillary data, see below */
213 u_int msg_controllen; /* ancillary data buffer len */
214 u_int msg_totallen; /* total length of this message */
215 } msghdr;
216
217 /*
218 * The size to raise the receive buffer to.
219 */
220 #define RCVBUFSIZE (32*1024)
221
222 /*
223 * The number of times a send operation is repeated if the result
224 * is WSAEINTR.
225 */
226 #define NRETRIES 10
227
228 struct isc_socket {
229 /* Not locked. */
230 unsigned int magic;
231 isc_socketmgr_t *manager;
232 isc_mutex_t lock;
233 isc_sockettype_t type;
234
235 /* Pointers to scatter/gather buffers */
236 WSABUF iov[ISC_SOCKET_MAXSCATTERGATHER];
237
238 /* Locked by socket lock. */
239 ISC_LINK(isc_socket_t) link;
240 unsigned int references; /* EXTERNAL references */
241 SOCKET fd; /* file handle */
242 int pf; /* protocol family */
243 char name[16];
244 void * tag;
245
246 /*
247 * Each recv() call uses this buffer. It is a per-socket receive
248 * buffer that allows us to decouple the system recv() from the
249 * recv_list done events. This means the items on the recv_list
250 * can be removed without having to cancel pending system recv()
251 * calls. It also allows us to read-ahead in some cases.
252 */
253 struct {
254 SOCKADDR_STORAGE from_addr; // UDP send/recv address
255 int from_addr_len; // length of the address
256 char *base; // the base of the buffer
257 char *consume_position; // where to start copying data from next
258 unsigned int len; // the actual size of this buffer
259 unsigned int remaining; // the number of bytes remaining
260 } recvbuf;
261
262 ISC_LIST(isc_socketevent_t) send_list;
263 ISC_LIST(isc_socketevent_t) recv_list;
264 ISC_LIST(isc_socket_newconnev_t) accept_list;
265 ISC_LIST(isc_socket_connev_t) connect_list;
266
267 isc_sockaddr_t address; /* remote address */
268
269 unsigned int listener : 1, /* listener socket */
270 connected : 1,
271 pending_connect : 1, /* connect pending */
272 bound : 1, /* bound to local addr */
273 dupped : 1; /* created by isc_socket_dup() */
274 unsigned int pending_iocp; /* Should equal the counters below. Debug. */
275 unsigned int pending_recv; /* Number of outstanding recv() calls. */
276 unsigned int pending_send; /* Number of outstanding send() calls. */
277 unsigned int pending_accept; /* Number of outstanding accept() calls. */
278 unsigned int state; /* Socket state. Debugging and consistency checking. */
279 int state_lineno; /* line which last touched state */
280 };
281
282 #define _set_state(sock, _state) do { (sock)->state = (_state); (sock)->state_lineno = __LINE__; } while (0)
283
284 /*
285 * Buffer structure
286 */
287 typedef struct buflist buflist_t;
288
289 struct buflist {
290 void *buf;
291 unsigned int buflen;
292 ISC_LINK(buflist_t) link;
293 };
294
295 /*
296 * I/O Completion ports Info structures
297 */
298
299 static HANDLE hHeapHandle = NULL;
300 typedef struct IoCompletionInfo {
301 OVERLAPPED overlapped;
302 isc_socketevent_t *dev; /* send()/recv() done event */
303 isc_socket_connev_t *cdev; /* connect() done event */
304 isc_socket_newconnev_t *adev; /* accept() done event */
305 void *acceptbuffer;
306 DWORD received_bytes;
307 int request_type;
308 struct msghdr messagehdr;
309 ISC_LIST(buflist_t) bufferlist; /*%< list of buffers */
310 } IoCompletionInfo;
311
312 /*
313 * Define a maximum number of I/O Completion Port worker threads
314 * to handle the load on the Completion Port. The actual number
315 * used is the number of CPU's + 1.
316 */
317 #define MAX_IOCPTHREADS 20
318
319 #define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g')
320 #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)
321
322 struct isc_socketmgr {
323 /* Not locked. */
324 unsigned int magic;
325 isc_mem_t *mctx;
326 isc_mutex_t lock;
327 isc_stats_t *stats;
328
329 /* Locked by manager lock. */
330 ISC_LIST(isc_socket_t) socklist;
331 bool bShutdown;
332 isc_condition_t shutdown_ok;
333 HANDLE hIoCompletionPort;
334 int maxIOCPThreads;
335 HANDLE hIOCPThreads[MAX_IOCPTHREADS];
336 DWORD dwIOCPThreadIds[MAX_IOCPTHREADS];
337 size_t maxudp;
338
339 /*
340 * Debugging.
341 * Modified by InterlockedIncrement() and InterlockedDecrement()
342 */
343 LONG totalSockets;
344 LONG iocp_total;
345 };
346
347 enum {
348 SOCKET_RECV,
349 SOCKET_SEND,
350 SOCKET_ACCEPT,
351 SOCKET_CONNECT
352 };
353
354 /*
355 * send() and recv() iovec counts
356 */
357 #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER)
358 #define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER)
359
360 static isc_result_t socket_create(isc_socketmgr_t *manager0, int pf,
361 isc_sockettype_t type,
362 isc_socket_t **socketp,
363 isc_socket_t *dup_socket);
364 static isc_threadresult_t WINAPI SocketIoThread(LPVOID ThreadContext);
365 static void maybe_free_socket(isc_socket_t **, int);
366 static void free_socket(isc_socket_t **, int);
367 static bool senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev);
368 static bool acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev);
369 static bool connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev);
370 static void send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev);
371 static void send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev);
372 static void send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev);
373 static void send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev);
374 static void send_recvdone_abort(isc_socket_t *sock, isc_result_t result);
375 static void send_connectdone_abort(isc_socket_t *sock, isc_result_t result);
376 static void queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev);
377 static void queue_receive_request(isc_socket_t *sock);
378
379 /*
380 * This is used to dump the contents of the sock structure
381 * You should make sure that the sock is locked before
382 * dumping it. Since the code uses simple printf() statements
383 * it should only be used interactively.
384 */
385 void
sock_dump(isc_socket_t * sock)386 sock_dump(isc_socket_t *sock) {
387 isc_socketevent_t *ldev;
388 isc_socket_newconnev_t *ndev;
389 isc_socket_connev_t *cdev;
390
391 #if 0
392 isc_sockaddr_t addr;
393 char socktext[ISC_SOCKADDR_FORMATSIZE];
394 isc_result_t result;
395
396 result = isc_socket_getpeername(sock, &addr);
397 if (result == ISC_R_SUCCESS) {
398 isc_sockaddr_format(&addr, socktext, sizeof(socktext));
399 printf("Remote Socket: %s\n", socktext);
400 }
401 result = isc_socket_getsockname(sock, &addr);
402 if (result == ISC_R_SUCCESS) {
403 isc_sockaddr_format(&addr, socktext, sizeof(socktext));
404 printf("This Socket: %s\n", socktext);
405 }
406 #endif
407
408 printf("\n\t\tSock Dump\n");
409 printf("\t\tfd: %Iu\n", sock->fd);
410 printf("\t\treferences: %u\n", sock->references);
411 printf("\t\tpending_accept: %u\n", sock->pending_accept);
412 printf("\t\tconnecting: %u\n", sock->pending_connect);
413 printf("\t\tconnected: %u\n", sock->connected);
414 printf("\t\tbound: %u\n", sock->bound);
415 printf("\t\tpending_iocp: %u\n", sock->pending_iocp);
416 printf("\t\tsocket type: %d\n", sock->type);
417
418 printf("\n\t\tSock Recv List\n");
419 ldev = ISC_LIST_HEAD(sock->recv_list);
420 while (ldev != NULL) {
421 printf("\t\tdev: %p\n", ldev);
422 ldev = ISC_LIST_NEXT(ldev, ev_link);
423 }
424
425 printf("\n\t\tSock Send List\n");
426 ldev = ISC_LIST_HEAD(sock->send_list);
427 while (ldev != NULL) {
428 printf("\t\tdev: %p\n", ldev);
429 ldev = ISC_LIST_NEXT(ldev, ev_link);
430 }
431
432 printf("\n\t\tSock Accept List\n");
433 ndev = ISC_LIST_HEAD(sock->accept_list);
434 while (ndev != NULL) {
435 printf("\t\tdev: %p\n", ldev);
436 ndev = ISC_LIST_NEXT(ndev, ev_link);
437 }
438
439 printf("\n\t\tSock Connect List\n");
440 cdev = ISC_LIST_HEAD(sock->connect_list);
441 while (cdev != NULL) {
442 printf("\t\tdev: %p\n", cdev);
443 cdev = ISC_LIST_NEXT(cdev, ev_link);
444 }
445 }
446
447 static void
448 socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
449 isc_logcategory_t *category, isc_logmodule_t *module, int level,
450 isc_msgcat_t *msgcat, int msgset, int message,
451 const char *fmt, ...) ISC_FORMAT_PRINTF(9, 10);
452
453 /* This function will add an entry to the I/O completion port
454 * that will signal the I/O thread to exit (gracefully)
455 */
456 static void
signal_iocompletionport_exit(isc_socketmgr_t * manager)457 signal_iocompletionport_exit(isc_socketmgr_t *manager) {
458 int i;
459 int errval;
460 char strbuf[ISC_STRERRORSIZE];
461
462 REQUIRE(VALID_MANAGER(manager));
463 for (i = 0; i < manager->maxIOCPThreads; i++) {
464 if (!PostQueuedCompletionStatus(manager->hIoCompletionPort,
465 0, 0, 0)) {
466 errval = GetLastError();
467 isc__strerror(errval, strbuf, sizeof(strbuf));
468 FATAL_ERROR(__FILE__, __LINE__,
469 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
470 ISC_MSG_FAILED,
471 "Can't request service thread to exit: %s"),
472 strbuf);
473 }
474 }
475 }
476
477 /*
478 * Create the worker threads for the I/O Completion Port
479 */
480 void
iocompletionport_createthreads(int total_threads,isc_socketmgr_t * manager)481 iocompletionport_createthreads(int total_threads, isc_socketmgr_t *manager) {
482 int errval;
483 char strbuf[ISC_STRERRORSIZE];
484 int i;
485
486 INSIST(total_threads > 0);
487 REQUIRE(VALID_MANAGER(manager));
488 /*
489 * We need at least one
490 */
491 for (i = 0; i < total_threads; i++) {
492 manager->hIOCPThreads[i] = CreateThread(NULL, 0, SocketIoThread,
493 manager, 0,
494 &manager->dwIOCPThreadIds[i]);
495 if (manager->hIOCPThreads[i] == NULL) {
496 errval = GetLastError();
497 isc__strerror(errval, strbuf, sizeof(strbuf));
498 FATAL_ERROR(__FILE__, __LINE__,
499 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
500 ISC_MSG_FAILED,
501 "Can't create IOCP thread: %s"),
502 strbuf);
503 }
504 }
505 }
506
507 /*
508 * Create/initialise the I/O completion port
509 */
510 void
iocompletionport_init(isc_socketmgr_t * manager)511 iocompletionport_init(isc_socketmgr_t *manager) {
512 int errval;
513 char strbuf[ISC_STRERRORSIZE];
514
515 REQUIRE(VALID_MANAGER(manager));
516 /*
517 * Create a private heap to handle the socket overlapped structure
518 * The minimum number of structures is 10, there is no maximum
519 */
520 hHeapHandle = HeapCreate(0, 10 * sizeof(IoCompletionInfo), 0);
521 if (hHeapHandle == NULL) {
522 errval = GetLastError();
523 isc__strerror(errval, strbuf, sizeof(strbuf));
524 FATAL_ERROR(__FILE__, __LINE__,
525 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
526 ISC_MSG_FAILED,
527 "HeapCreate() failed during "
528 "initialization: %s"),
529 strbuf);
530 }
531
532 manager->maxIOCPThreads = min(isc_os_ncpus() + 1, MAX_IOCPTHREADS);
533
534 /* Now Create the Completion Port */
535 manager->hIoCompletionPort = CreateIoCompletionPort(
536 INVALID_HANDLE_VALUE, NULL,
537 0, manager->maxIOCPThreads);
538 if (manager->hIoCompletionPort == NULL) {
539 errval = GetLastError();
540 isc__strerror(errval, strbuf, sizeof(strbuf));
541 FATAL_ERROR(__FILE__, __LINE__,
542 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
543 ISC_MSG_FAILED,
544 "CreateIoCompletionPort() failed "
545 "during initialization: %s"),
546 strbuf);
547 }
548
549 /*
550 * Worker threads for servicing the I/O
551 */
552 iocompletionport_createthreads(manager->maxIOCPThreads, manager);
553 }
554
555 /*
556 * Associate a socket with an IO Completion Port. This allows us to queue events for it
557 * and have our worker pool of threads process them.
558 */
559 void
iocompletionport_update(isc_socket_t * sock)560 iocompletionport_update(isc_socket_t *sock) {
561 HANDLE hiocp;
562 char strbuf[ISC_STRERRORSIZE];
563
564 REQUIRE(VALID_SOCKET(sock));
565
566 hiocp = CreateIoCompletionPort((HANDLE)sock->fd,
567 sock->manager->hIoCompletionPort, (ULONG_PTR)sock, 0);
568
569 if (hiocp == NULL) {
570 DWORD errval = GetLastError();
571 isc__strerror(errval, strbuf, sizeof(strbuf));
572 isc_log_iwrite(isc_lctx,
573 ISC_LOGCATEGORY_GENERAL,
574 ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
575 isc_msgcat, ISC_MSGSET_SOCKET,
576 ISC_MSG_TOOMANYHANDLES,
577 "iocompletionport_update: failed to open"
578 " io completion port: %s",
579 strbuf);
580
581 /* XXXMLG temporary hack to make failures detected.
582 * This function should return errors to the caller, not
583 * exit here.
584 */
585 FATAL_ERROR(__FILE__, __LINE__,
586 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
587 ISC_MSG_FAILED,
588 "CreateIoCompletionPort() failed "
589 "during initialization: %s"),
590 strbuf);
591 }
592
593 InterlockedIncrement(&sock->manager->iocp_total);
594 }
595
596 /*
597 * Routine to cleanup and then close the socket.
598 * Only close the socket here if it is NOT associated
599 * with an event, otherwise the WSAWaitForMultipleEvents
600 * may fail due to the fact that the Wait should not
601 * be running while closing an event or a socket.
602 * The socket is locked before calling this function
603 */
604 void
socket_close(isc_socket_t * sock)605 socket_close(isc_socket_t *sock) {
606
607 REQUIRE(sock != NULL);
608
609 if (sock->fd != INVALID_SOCKET) {
610 closesocket(sock->fd);
611 sock->fd = INVALID_SOCKET;
612 _set_state(sock, SOCK_CLOSED);
613 InterlockedDecrement(&sock->manager->totalSockets);
614 }
615 }
616
617 static isc_once_t initialise_once = ISC_ONCE_INIT;
618 static bool initialised = false;
619
620 static void
initialise(void)621 initialise(void) {
622 WORD wVersionRequested;
623 WSADATA wsaData;
624 int err;
625 SOCKET sock;
626 GUID GUIDConnectEx = WSAID_CONNECTEX;
627 GUID GUIDAcceptEx = WSAID_ACCEPTEX;
628 GUID GUIDGetAcceptExSockaddrs = WSAID_GETACCEPTEXSOCKADDRS;
629 DWORD dwBytes;
630
631 /* Need Winsock 2.2 or better */
632 wVersionRequested = MAKEWORD(2, 2);
633
634 err = WSAStartup(wVersionRequested, &wsaData);
635 if (err != 0) {
636 char strbuf[ISC_STRERRORSIZE];
637 isc__strerror(err, strbuf, sizeof(strbuf));
638 FATAL_ERROR(__FILE__, __LINE__, "WSAStartup() %s: %s",
639 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
640 ISC_MSG_FAILED, "failed"),
641 strbuf);
642 }
643 /*
644 * The following APIs do not exist as functions in a library, but
645 * we must ask winsock for them. They are "extensions" -- but why
646 * they cannot be actual functions is beyond me. So, ask winsock
647 * for the pointers to the functions we need.
648 */
649 sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
650 INSIST(sock != INVALID_SOCKET);
651 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER,
652 &GUIDConnectEx, sizeof(GUIDConnectEx),
653 &ISCConnectEx, sizeof(ISCConnectEx),
654 &dwBytes, NULL, NULL);
655 INSIST(err == 0);
656
657 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER,
658 &GUIDAcceptEx, sizeof(GUIDAcceptEx),
659 &ISCAcceptEx, sizeof(ISCAcceptEx),
660 &dwBytes, NULL, NULL);
661 INSIST(err == 0);
662
663 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER,
664 &GUIDGetAcceptExSockaddrs, sizeof(GUIDGetAcceptExSockaddrs),
665 &ISCGetAcceptExSockaddrs, sizeof(ISCGetAcceptExSockaddrs),
666 &dwBytes, NULL, NULL);
667 INSIST(err == 0);
668
669 closesocket(sock);
670
671 initialised = true;
672 }
673
674 /*
675 * Initialize socket services
676 */
677 void
InitSockets(void)678 InitSockets(void) {
679 RUNTIME_CHECK(isc_once_do(&initialise_once,
680 initialise) == ISC_R_SUCCESS);
681 if (!initialised)
682 exit(1);
683 }
684
685 int
internal_sendmsg(isc_socket_t * sock,IoCompletionInfo * lpo,struct msghdr * messagehdr,int flags,int * Error)686 internal_sendmsg(isc_socket_t *sock, IoCompletionInfo *lpo,
687 struct msghdr *messagehdr, int flags, int *Error)
688 {
689 int Result;
690 DWORD BytesSent;
691 DWORD Flags = flags;
692 int total_sent;
693
694 *Error = 0;
695 Result = WSASendTo(sock->fd, messagehdr->msg_iov,
696 messagehdr->msg_iovlen, &BytesSent,
697 Flags, (SOCKADDR *)&messagehdr->to_addr,
698 messagehdr->to_addr_len, (LPWSAOVERLAPPED)lpo,
699 NULL);
700
701 total_sent = (int)BytesSent;
702
703 /* Check for errors.*/
704 if (Result == SOCKET_ERROR) {
705 *Error = WSAGetLastError();
706
707 switch (*Error) {
708 case WSA_IO_INCOMPLETE:
709 case WSA_WAIT_IO_COMPLETION:
710 case WSA_IO_PENDING:
711 case NO_ERROR: /* Strange, but okay */
712 sock->pending_iocp++;
713 sock->pending_send++;
714 break;
715
716 default:
717 return (-1);
718 break;
719 }
720 } else {
721 sock->pending_iocp++;
722 sock->pending_send++;
723 }
724
725 if (lpo != NULL)
726 return (0);
727 else
728 return (total_sent);
729 }
730
731 static void
queue_receive_request(isc_socket_t * sock)732 queue_receive_request(isc_socket_t *sock) {
733 DWORD Flags = 0;
734 DWORD NumBytes = 0;
735 int Result;
736 int Error;
737 int need_retry;
738 WSABUF iov[1];
739 IoCompletionInfo *lpo = NULL;
740 isc_result_t isc_result;
741
742 retry:
743 need_retry = false;
744
745 /*
746 * If we already have a receive pending, do nothing.
747 */
748 if (sock->pending_recv > 0) {
749 if (lpo != NULL)
750 HeapFree(hHeapHandle, 0, lpo);
751 return;
752 }
753
754 /*
755 * If no one is waiting, do nothing.
756 */
757 if (ISC_LIST_EMPTY(sock->recv_list)) {
758 if (lpo != NULL)
759 HeapFree(hHeapHandle, 0, lpo);
760 return;
761 }
762
763 INSIST(sock->recvbuf.remaining == 0);
764 INSIST(sock->fd != INVALID_SOCKET);
765
766 iov[0].len = sock->recvbuf.len;
767 iov[0].buf = sock->recvbuf.base;
768
769 if (lpo == NULL) {
770 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
771 HEAP_ZERO_MEMORY,
772 sizeof(IoCompletionInfo));
773 RUNTIME_CHECK(lpo != NULL);
774 } else
775 ZeroMemory(lpo, sizeof(IoCompletionInfo));
776 lpo->request_type = SOCKET_RECV;
777
778 sock->recvbuf.from_addr_len = sizeof(sock->recvbuf.from_addr);
779
780 Error = 0;
781 Result = WSARecvFrom((SOCKET)sock->fd, iov, 1,
782 &NumBytes, &Flags,
783 (SOCKADDR *)&sock->recvbuf.from_addr,
784 &sock->recvbuf.from_addr_len,
785 (LPWSAOVERLAPPED)lpo, NULL);
786
787 /* Check for errors. */
788 if (Result == SOCKET_ERROR) {
789 Error = WSAGetLastError();
790
791 switch (Error) {
792 case WSA_IO_PENDING:
793 sock->pending_iocp++;
794 sock->pending_recv++;
795 break;
796
797 /* direct error: no completion event */
798 case ERROR_HOST_UNREACHABLE:
799 case WSAENETRESET:
800 case WSAECONNRESET:
801 if (!sock->connected) {
802 /* soft error */
803 need_retry = true;
804 break;
805 }
806 /* FALLTHROUGH */
807
808 default:
809 isc_result = isc__errno2result(Error);
810 if (isc_result == ISC_R_UNEXPECTED)
811 UNEXPECTED_ERROR(__FILE__, __LINE__,
812 "WSARecvFrom: Windows error code: %d, isc result %d",
813 Error, isc_result);
814 send_recvdone_abort(sock, isc_result);
815 HeapFree(hHeapHandle, 0, lpo);
816 lpo = NULL;
817 break;
818 }
819 } else {
820 /*
821 * The recv() finished immediately, but we will still get
822 * a completion event. Rather than duplicate code, let
823 * that thread handle sending the data along its way.
824 */
825 sock->pending_iocp++;
826 sock->pending_recv++;
827 }
828
829 socket_log(__LINE__, sock, NULL, IOEVENT,
830 isc_msgcat, ISC_MSGSET_SOCKET,
831 ISC_MSG_DOIORECV,
832 "queue_io_request: fd %d result %d error %d",
833 sock->fd, Result, Error);
834
835 CONSISTENT(sock);
836
837 if (need_retry)
838 goto retry;
839 }
840
841 static void
manager_log(isc_socketmgr_t * sockmgr,isc_logcategory_t * category,isc_logmodule_t * module,int level,const char * fmt,...)842 manager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category,
843 isc_logmodule_t *module, int level, const char *fmt, ...)
844 {
845 char msgbuf[2048];
846 va_list ap;
847
848 if (!isc_log_wouldlog(isc_lctx, level))
849 return;
850
851 va_start(ap, fmt);
852 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
853 va_end(ap);
854
855 isc_log_write(isc_lctx, category, module, level,
856 "sockmgr %p: %s", sockmgr, msgbuf);
857 }
858
859 static void
socket_log(int lineno,isc_socket_t * sock,isc_sockaddr_t * address,isc_logcategory_t * category,isc_logmodule_t * module,int level,isc_msgcat_t * msgcat,int msgset,int message,const char * fmt,...)860 socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
861 isc_logcategory_t *category, isc_logmodule_t *module, int level,
862 isc_msgcat_t *msgcat, int msgset, int message,
863 const char *fmt, ...)
864 {
865 char msgbuf[2048];
866 char peerbuf[256];
867 va_list ap;
868
869
870 if (!isc_log_wouldlog(isc_lctx, level))
871 return;
872
873 va_start(ap, fmt);
874 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
875 va_end(ap);
876
877 if (address == NULL) {
878 isc_log_iwrite(isc_lctx, category, module, level,
879 msgcat, msgset, message,
880 "socket %p line %d: %s", sock, lineno, msgbuf);
881 } else {
882 isc_sockaddr_format(address, peerbuf, sizeof(peerbuf));
883 isc_log_iwrite(isc_lctx, category, module, level,
884 msgcat, msgset, message,
885 "socket %p line %d %s: %s", sock, lineno,
886 peerbuf, msgbuf);
887 }
888
889 }
890
891 /*
892 * Make an fd SOCKET non-blocking.
893 */
894 static isc_result_t
make_nonblock(SOCKET fd)895 make_nonblock(SOCKET fd) {
896 int ret;
897 unsigned long flags = 1;
898 char strbuf[ISC_STRERRORSIZE];
899
900 /* Set the socket to non-blocking */
901 ret = ioctlsocket(fd, FIONBIO, &flags);
902
903 if (ret == -1) {
904 isc__strerror(errno, strbuf, sizeof(strbuf));
905 UNEXPECTED_ERROR(__FILE__, __LINE__,
906 "ioctlsocket(%d, FIOBIO, %d): %s",
907 fd, flags, strbuf);
908
909 return (ISC_R_UNEXPECTED);
910 }
911
912 return (ISC_R_SUCCESS);
913 }
914
915 /*
916 * Windows 2000 systems incorrectly cause UDP sockets using WSARecvFrom
917 * to not work correctly, returning a WSACONNRESET error when a WSASendTo
918 * fails with an "ICMP port unreachable" response and preventing the
919 * socket from using the WSARecvFrom in subsequent operations.
920 * The function below fixes this, but requires that Windows 2000
921 * Service Pack 2 or later be installed on the system. NT 4.0
922 * systems are not affected by this and work correctly.
923 * See Microsoft Knowledge Base Article Q263823 for details of this.
924 */
925 isc_result_t
connection_reset_fix(SOCKET fd)926 connection_reset_fix(SOCKET fd) {
927 DWORD dwBytesReturned = 0;
928 BOOL bNewBehavior = FALSE;
929 DWORD status;
930
931 if (isc_win32os_versioncheck(5, 0, 0, 0) < 0)
932 return (ISC_R_SUCCESS); /* NT 4.0 has no problem */
933
934 /* disable bad behavior using IOCTL: SIO_UDP_CONNRESET */
935 status = WSAIoctl(fd, SIO_UDP_CONNRESET, &bNewBehavior,
936 sizeof(bNewBehavior), NULL, 0,
937 &dwBytesReturned, NULL, NULL);
938 if (status != SOCKET_ERROR)
939 return (ISC_R_SUCCESS);
940 else {
941 UNEXPECTED_ERROR(__FILE__, __LINE__,
942 "WSAIoctl(SIO_UDP_CONNRESET, oldBehaviour) %s",
943 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
944 ISC_MSG_FAILED, "failed"));
945 return (ISC_R_UNEXPECTED);
946 }
947 }
948
949 /*
950 * Construct an iov array and attach it to the msghdr passed in. This is
951 * the SEND constructor, which will use the used region of the buffer
952 * (if using a buffer list) or will use the internal region (if a single
953 * buffer I/O is requested).
954 *
955 * Nothing can be NULL, and the done event must list at least one buffer
956 * on the buffer linked list for this function to be meaningful.
957 */
958 static void
build_msghdr_send(isc_socket_t * sock,isc_socketevent_t * dev,struct msghdr * msg,char * cmsg,WSABUF * iov,IoCompletionInfo * lpo)959 build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev,
960 struct msghdr *msg, char *cmsg, WSABUF *iov,
961 IoCompletionInfo *lpo)
962 {
963 unsigned int iovcount;
964 isc_buffer_t *buffer;
965 buflist_t *cpbuffer;
966 isc_region_t used;
967 size_t write_count;
968 size_t skip_count;
969
970 memset(msg, 0, sizeof(*msg));
971
972 memmove(&msg->to_addr, &dev->address.type, dev->address.length);
973 msg->to_addr_len = dev->address.length;
974
975 buffer = ISC_LIST_HEAD(dev->bufferlist);
976 write_count = 0;
977 iovcount = 0;
978
979 /*
980 * Single buffer I/O? Skip what we've done so far in this region.
981 */
982 if (buffer == NULL) {
983 write_count = dev->region.length - dev->n;
984 cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
985 RUNTIME_CHECK(cpbuffer != NULL);
986 cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, write_count);
987 RUNTIME_CHECK(cpbuffer->buf != NULL);
988
989 socket_log(__LINE__, sock, NULL, TRACE,
990 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
991 "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
992 cpbuffer->buf, write_count);
993
994 memmove(cpbuffer->buf,(dev->region.base + dev->n), write_count);
995 cpbuffer->buflen = (unsigned int)write_count;
996 ISC_LINK_INIT(cpbuffer, link);
997 ISC_LIST_ENQUEUE(lpo->bufferlist, cpbuffer, link);
998 iov[0].buf = cpbuffer->buf;
999 iov[0].len = (u_long)write_count;
1000 iovcount = 1;
1001
1002 goto config;
1003 }
1004
1005 /*
1006 * Multibuffer I/O.
1007 * Skip the data in the buffer list that we have already written.
1008 */
1009 skip_count = dev->n;
1010 while (buffer != NULL) {
1011 REQUIRE(ISC_BUFFER_VALID(buffer));
1012 if (skip_count < isc_buffer_usedlength(buffer))
1013 break;
1014 skip_count -= isc_buffer_usedlength(buffer);
1015 buffer = ISC_LIST_NEXT(buffer, link);
1016 }
1017
1018 while (buffer != NULL) {
1019 INSIST(iovcount < MAXSCATTERGATHER_SEND);
1020
1021 isc_buffer_usedregion(buffer, &used);
1022
1023 if (used.length > 0) {
1024 int uselen = (int)(used.length - skip_count);
1025 cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
1026 RUNTIME_CHECK(cpbuffer != NULL);
1027 cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, uselen);
1028 RUNTIME_CHECK(cpbuffer->buf != NULL);
1029
1030 socket_log(__LINE__, sock, NULL, TRACE,
1031 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
1032 "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
1033 cpbuffer->buf, write_count);
1034
1035 memmove(cpbuffer->buf,(used.base + skip_count), uselen);
1036 cpbuffer->buflen = uselen;
1037 iov[iovcount].buf = cpbuffer->buf;
1038 iov[iovcount].len = (u_long)(used.length - skip_count);
1039 write_count += uselen;
1040 skip_count = 0;
1041 iovcount++;
1042 }
1043 buffer = ISC_LIST_NEXT(buffer, link);
1044 }
1045
1046 INSIST(skip_count == 0);
1047
1048 config:
1049 msg->msg_iov = iov;
1050 msg->msg_iovlen = iovcount;
1051 msg->msg_totallen = (u_int)write_count;
1052 }
1053
1054 static void
set_dev_address(isc_sockaddr_t * address,isc_socket_t * sock,isc_socketevent_t * dev)1055 set_dev_address(isc_sockaddr_t *address, isc_socket_t *sock,
1056 isc_socketevent_t *dev)
1057 {
1058 if (sock->type == isc_sockettype_udp) {
1059 if (address != NULL)
1060 dev->address = *address;
1061 else
1062 dev->address = sock->address;
1063 } else if (sock->type == isc_sockettype_tcp) {
1064 INSIST(address == NULL);
1065 dev->address = sock->address;
1066 }
1067 }
1068
1069 static void
destroy_socketevent(isc_event_t * event)1070 destroy_socketevent(isc_event_t *event) {
1071 isc_socketevent_t *ev = (isc_socketevent_t *)event;
1072
1073 INSIST(ISC_LIST_EMPTY(ev->bufferlist));
1074
1075 (ev->destroy)(event);
1076 }
1077
1078 static isc_socketevent_t *
allocate_socketevent(isc_mem_t * mctx,isc_socket_t * sock,isc_eventtype_t eventtype,isc_taskaction_t action,void * arg)1079 allocate_socketevent(isc_mem_t *mctx, isc_socket_t *sock,
1080 isc_eventtype_t eventtype, isc_taskaction_t action,
1081 void *arg)
1082 {
1083 isc_socketevent_t *ev;
1084
1085 ev = (isc_socketevent_t *)isc_event_allocate(mctx, sock, eventtype,
1086 action, arg,
1087 sizeof(*ev));
1088 if (ev == NULL)
1089 return (NULL);
1090
1091 ev->result = ISC_R_IOERROR; // XXXMLG temporary change to detect failure to set
1092 ISC_LINK_INIT(ev, ev_link);
1093 ISC_LIST_INIT(ev->bufferlist);
1094 ev->region.base = NULL;
1095 ev->n = 0;
1096 ev->offset = 0;
1097 ev->attributes = 0;
1098 ev->destroy = ev->ev_destroy;
1099 ev->ev_destroy = destroy_socketevent;
1100 ev->dscp = 0;
1101
1102 return (ev);
1103 }
1104
1105 #if defined(ISC_SOCKET_DEBUG)
1106 static void
dump_msg(struct msghdr * msg,isc_socket_t * sock)1107 dump_msg(struct msghdr *msg, isc_socket_t *sock) {
1108 unsigned int i;
1109
1110 printf("MSGHDR %p, Socket #: %Iu\n", msg, sock->fd);
1111 printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen);
1112 printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen);
1113 for (i = 0; i < (unsigned int)msg->msg_iovlen; i++)
1114 printf("\t\t%u\tbase %p, len %u\n", i,
1115 msg->msg_iov[i].buf, msg->msg_iov[i].len);
1116 }
1117 #endif
1118
1119 /*
1120 * map the error code
1121 */
1122 int
map_socket_error(isc_socket_t * sock,int windows_errno,int * isc_errno,char * errorstring,size_t bufsize)1123 map_socket_error(isc_socket_t *sock, int windows_errno, int *isc_errno,
1124 char *errorstring, size_t bufsize) {
1125
1126 int doreturn;
1127 switch (windows_errno) {
1128 case WSAECONNREFUSED:
1129 *isc_errno = ISC_R_CONNREFUSED;
1130 if (sock->connected)
1131 doreturn = DOIO_HARD;
1132 else
1133 doreturn = DOIO_SOFT;
1134 break;
1135 case WSAENETUNREACH:
1136 case ERROR_NETWORK_UNREACHABLE:
1137 *isc_errno = ISC_R_NETUNREACH;
1138 if (sock->connected)
1139 doreturn = DOIO_HARD;
1140 else
1141 doreturn = DOIO_SOFT;
1142 break;
1143 case ERROR_PORT_UNREACHABLE:
1144 case ERROR_HOST_UNREACHABLE:
1145 case WSAEHOSTUNREACH:
1146 *isc_errno = ISC_R_HOSTUNREACH;
1147 if (sock->connected)
1148 doreturn = DOIO_HARD;
1149 else
1150 doreturn = DOIO_SOFT;
1151 break;
1152 case WSAENETDOWN:
1153 *isc_errno = ISC_R_NETDOWN;
1154 if (sock->connected)
1155 doreturn = DOIO_HARD;
1156 else
1157 doreturn = DOIO_SOFT;
1158 break;
1159 case WSAEHOSTDOWN:
1160 *isc_errno = ISC_R_HOSTDOWN;
1161 if (sock->connected)
1162 doreturn = DOIO_HARD;
1163 else
1164 doreturn = DOIO_SOFT;
1165 break;
1166 case WSAEACCES:
1167 *isc_errno = ISC_R_NOPERM;
1168 if (sock->connected)
1169 doreturn = DOIO_HARD;
1170 else
1171 doreturn = DOIO_SOFT;
1172 break;
1173 case WSAECONNRESET:
1174 case WSAENETRESET:
1175 case WSAECONNABORTED:
1176 case WSAEDISCON:
1177 *isc_errno = ISC_R_CONNECTIONRESET;
1178 if (sock->connected)
1179 doreturn = DOIO_HARD;
1180 else
1181 doreturn = DOIO_SOFT;
1182 break;
1183 case WSAENOTCONN:
1184 *isc_errno = ISC_R_NOTCONNECTED;
1185 if (sock->connected)
1186 doreturn = DOIO_HARD;
1187 else
1188 doreturn = DOIO_SOFT;
1189 break;
1190 case ERROR_OPERATION_ABORTED:
1191 case ERROR_CONNECTION_ABORTED:
1192 case ERROR_REQUEST_ABORTED:
1193 *isc_errno = ISC_R_CONNECTIONRESET;
1194 doreturn = DOIO_HARD;
1195 break;
1196 case WSAENOBUFS:
1197 *isc_errno = ISC_R_NORESOURCES;
1198 doreturn = DOIO_HARD;
1199 break;
1200 case WSAEAFNOSUPPORT:
1201 *isc_errno = ISC_R_FAMILYNOSUPPORT;
1202 doreturn = DOIO_HARD;
1203 break;
1204 case WSAEADDRNOTAVAIL:
1205 *isc_errno = ISC_R_ADDRNOTAVAIL;
1206 doreturn = DOIO_HARD;
1207 break;
1208 case WSAEDESTADDRREQ:
1209 *isc_errno = ISC_R_BADADDRESSFORM;
1210 doreturn = DOIO_HARD;
1211 break;
1212 case ERROR_NETNAME_DELETED:
1213 *isc_errno = ISC_R_NETDOWN;
1214 doreturn = DOIO_HARD;
1215 break;
1216 default:
1217 *isc_errno = ISC_R_IOERROR;
1218 doreturn = DOIO_HARD;
1219 break;
1220 }
1221 if (doreturn == DOIO_HARD) {
1222 isc__strerror(windows_errno, errorstring, bufsize);
1223 }
1224 return (doreturn);
1225 }
1226
1227 static void
fill_recv(isc_socket_t * sock,isc_socketevent_t * dev)1228 fill_recv(isc_socket_t *sock, isc_socketevent_t *dev) {
1229 isc_region_t r;
1230 int copylen;
1231 isc_buffer_t *buffer;
1232
1233 INSIST(dev->n < dev->minimum);
1234 INSIST(sock->recvbuf.remaining > 0);
1235 INSIST(sock->pending_recv == 0);
1236
1237 if (sock->type == isc_sockettype_udp) {
1238 dev->address.length = sock->recvbuf.from_addr_len;
1239 memmove(&dev->address.type, &sock->recvbuf.from_addr,
1240 sock->recvbuf.from_addr_len);
1241 if (isc_sockaddr_getport(&dev->address) == 0) {
1242 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
1243 socket_log(__LINE__, sock, &dev->address,
1244 IOEVENT, isc_msgcat,
1245 ISC_MSGSET_SOCKET, ISC_MSG_ZEROPORT,
1246 "dropping source port zero packet");
1247 }
1248 sock->recvbuf.remaining = 0;
1249 return;
1250 }
1251 /*
1252 * Simulate a firewall blocking UDP responses bigger than
1253 * 'maxudp' bytes.
1254 */
1255 if (sock->manager->maxudp != 0 &&
1256 sock->recvbuf.remaining > sock->manager->maxudp)
1257 {
1258 sock->recvbuf.remaining = 0;
1259 return;
1260 }
1261 } else if (sock->type == isc_sockettype_tcp) {
1262 dev->address = sock->address;
1263 }
1264
1265 /*
1266 * Run through the list of buffers we were given, and find the
1267 * first one with space. Once it is found, loop through, filling
1268 * the buffers as much as possible.
1269 */
1270 buffer = ISC_LIST_HEAD(dev->bufferlist);
1271 if (buffer != NULL) { // Multi-buffer receive
1272 while (buffer != NULL && sock->recvbuf.remaining > 0) {
1273 REQUIRE(ISC_BUFFER_VALID(buffer));
1274 if (isc_buffer_availablelength(buffer) > 0) {
1275 isc_buffer_availableregion(buffer, &r);
1276 copylen = min(r.length,
1277 sock->recvbuf.remaining);
1278 memmove(r.base, sock->recvbuf.consume_position,
1279 copylen);
1280 sock->recvbuf.consume_position += copylen;
1281 sock->recvbuf.remaining -= copylen;
1282 isc_buffer_add(buffer, copylen);
1283 dev->n += copylen;
1284 }
1285 buffer = ISC_LIST_NEXT(buffer, link);
1286 }
1287 } else { // Single-buffer receive
1288 copylen = min(dev->region.length - dev->n, sock->recvbuf.remaining);
1289 memmove(dev->region.base + dev->n,
1290 sock->recvbuf.consume_position, copylen);
1291 sock->recvbuf.consume_position += copylen;
1292 sock->recvbuf.remaining -= copylen;
1293 dev->n += copylen;
1294 }
1295
1296 /*
1297 * UDP receives are all-consuming. That is, if we have 4k worth of
1298 * data in our receive buffer, and the caller only gave us
1299 * 1k of space, we will toss the remaining 3k of data. TCP
1300 * will keep the extra data around and use it for later requests.
1301 */
1302 if (sock->type == isc_sockettype_udp)
1303 sock->recvbuf.remaining = 0;
1304 }
1305
1306 /*
1307 * Copy out as much data from the internal buffer to done events.
1308 * As each done event is filled, send it along its way.
1309 */
1310 static void
completeio_recv(isc_socket_t * sock)1311 completeio_recv(isc_socket_t *sock)
1312 {
1313 isc_socketevent_t *dev;
1314
1315 /*
1316 * If we are in the process of filling our buffer, we cannot
1317 * touch it yet, so don't.
1318 */
1319 if (sock->pending_recv > 0)
1320 return;
1321
1322 while (sock->recvbuf.remaining > 0 && !ISC_LIST_EMPTY(sock->recv_list)) {
1323 dev = ISC_LIST_HEAD(sock->recv_list);
1324
1325 /*
1326 * See if we have sufficient data in our receive buffer
1327 * to handle this. If we do, copy out the data.
1328 */
1329 fill_recv(sock, dev);
1330
1331 /*
1332 * Did we satisfy it?
1333 */
1334 if (dev->n >= dev->minimum) {
1335 dev->result = ISC_R_SUCCESS;
1336 send_recvdone_event(sock, &dev);
1337 }
1338 }
1339 }
1340
1341 /*
1342 * Returns:
1343 * DOIO_SUCCESS The operation succeeded. dev->result contains
1344 * ISC_R_SUCCESS.
1345 *
1346 * DOIO_HARD A hard or unexpected I/O error was encountered.
1347 * dev->result contains the appropriate error.
1348 *
1349 * DOIO_SOFT A soft I/O error was encountered. No senddone
1350 * event was sent. The operation should be retried.
1351 *
1352 * No other return values are possible.
1353 */
1354 static int
completeio_send(isc_socket_t * sock,isc_socketevent_t * dev,struct msghdr * messagehdr,int cc,int send_errno)1355 completeio_send(isc_socket_t *sock, isc_socketevent_t *dev,
1356 struct msghdr *messagehdr, int cc, int send_errno)
1357 {
1358 char strbuf[ISC_STRERRORSIZE];
1359
1360 if (send_errno != 0) {
1361 if (SOFT_ERROR(send_errno))
1362 return (DOIO_SOFT);
1363
1364 return (map_socket_error(sock, send_errno, &dev->result,
1365 strbuf, sizeof(strbuf)));
1366 }
1367
1368 /*
1369 * If we write less than we expected, update counters, poke.
1370 */
1371 dev->n += cc;
1372 if (cc != messagehdr->msg_totallen)
1373 return (DOIO_SOFT);
1374
1375 /*
1376 * Exactly what we wanted to write. We're done with this
1377 * entry. Post its completion event.
1378 */
1379 dev->result = ISC_R_SUCCESS;
1380 return (DOIO_SUCCESS);
1381 }
1382
1383 static int
startio_send(isc_socket_t * sock,isc_socketevent_t * dev,int * nbytes,int * send_errno)1384 startio_send(isc_socket_t *sock, isc_socketevent_t *dev, int *nbytes,
1385 int *send_errno)
1386 {
1387 char *cmsg = NULL;
1388 char strbuf[ISC_STRERRORSIZE];
1389 IoCompletionInfo *lpo;
1390 int status;
1391 struct msghdr *mh;
1392
1393 /*
1394 * Simulate a firewall blocking UDP responses bigger than
1395 * 'maxudp' bytes.
1396 */
1397 if (sock->type == isc_sockettype_udp &&
1398 sock->manager->maxudp != 0 &&
1399 dev->region.length - dev->n > sock->manager->maxudp)
1400 {
1401 *nbytes = dev->region.length - dev->n;
1402 return (DOIO_SUCCESS);
1403 }
1404
1405 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
1406 HEAP_ZERO_MEMORY,
1407 sizeof(IoCompletionInfo));
1408 RUNTIME_CHECK(lpo != NULL);
1409 lpo->request_type = SOCKET_SEND;
1410 lpo->dev = dev;
1411 mh = &lpo->messagehdr;
1412 memset(mh, 0, sizeof(struct msghdr));
1413 ISC_LIST_INIT(lpo->bufferlist);
1414
1415 build_msghdr_send(sock, dev, mh, cmsg, sock->iov, lpo);
1416
1417 *nbytes = internal_sendmsg(sock, lpo, mh, 0, send_errno);
1418
1419 if (*nbytes <= 0) {
1420 /*
1421 * I/O has been initiated
1422 * completion will be through the completion port
1423 */
1424 if (PENDING_ERROR(*send_errno)) {
1425 status = DOIO_PENDING;
1426 goto done;
1427 }
1428
1429 if (SOFT_ERROR(*send_errno)) {
1430 status = DOIO_SOFT;
1431 goto done;
1432 }
1433
1434 /*
1435 * If we got this far then something is wrong
1436 */
1437 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
1438 isc__strerror(*send_errno, strbuf, sizeof(strbuf));
1439 socket_log(__LINE__, sock, NULL, IOEVENT,
1440 isc_msgcat, ISC_MSGSET_SOCKET,
1441 ISC_MSG_INTERNALSEND,
1442 "startio_send: internal_sendmsg(%d) %d "
1443 "bytes, err %d/%s",
1444 sock->fd, *nbytes, *send_errno, strbuf);
1445 }
1446 status = DOIO_HARD;
1447 goto done;
1448 }
1449 dev->result = ISC_R_SUCCESS;
1450 status = DOIO_SOFT;
1451 done:
1452 _set_state(sock, SOCK_DATA);
1453 return (status);
1454 }
1455
1456 static void
use_min_mtu(isc_socket_t * sock)1457 use_min_mtu(isc_socket_t *sock) {
1458 #ifdef IPV6_USE_MIN_MTU
1459 /* use minimum MTU */
1460 if (sock->pf == AF_INET6) {
1461 int on = 1;
1462 (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_USE_MIN_MTU,
1463 (void *)&on, sizeof(on));
1464 }
1465 #else
1466 UNUSED(sock);
1467 #endif
1468 }
1469
1470 static isc_result_t
allocate_socket(isc_socketmgr_t * manager,isc_sockettype_t type,isc_socket_t ** socketp)1471 allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
1472 isc_socket_t **socketp)
1473 {
1474 isc_socket_t *sock;
1475 isc_result_t result;
1476
1477 sock = isc_mem_get(manager->mctx, sizeof(*sock));
1478
1479 if (sock == NULL)
1480 return (ISC_R_NOMEMORY);
1481
1482 sock->magic = 0;
1483 sock->references = 0;
1484
1485 sock->manager = manager;
1486 sock->type = type;
1487 sock->fd = INVALID_SOCKET;
1488
1489 ISC_LINK_INIT(sock, link);
1490
1491 /*
1492 * Set up list of readers and writers to be initially empty.
1493 */
1494 ISC_LIST_INIT(sock->recv_list);
1495 ISC_LIST_INIT(sock->send_list);
1496 ISC_LIST_INIT(sock->accept_list);
1497 ISC_LIST_INIT(sock->connect_list);
1498 sock->pending_accept = 0;
1499 sock->pending_recv = 0;
1500 sock->pending_send = 0;
1501 sock->pending_iocp = 0;
1502 sock->listener = 0;
1503 sock->connected = 0;
1504 sock->pending_connect = 0;
1505 sock->bound = 0;
1506 sock->dupped = 0;
1507 memset(sock->name, 0, sizeof(sock->name)); // zero the name field
1508 _set_state(sock, SOCK_INITIALIZED);
1509
1510 sock->recvbuf.len = 65536;
1511 sock->recvbuf.consume_position = sock->recvbuf.base;
1512 sock->recvbuf.remaining = 0;
1513 sock->recvbuf.base = isc_mem_get(manager->mctx, sock->recvbuf.len); // max buffer size
1514 if (sock->recvbuf.base == NULL) {
1515 result = ISC_R_NOMEMORY;
1516 goto error;
1517 }
1518
1519 /*
1520 * Initialize the lock.
1521 */
1522 result = isc_mutex_init(&sock->lock);
1523 if (result != ISC_R_SUCCESS)
1524 goto error;
1525
1526 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1527 "allocated");
1528
1529 sock->magic = SOCKET_MAGIC;
1530 *socketp = sock;
1531
1532 return (ISC_R_SUCCESS);
1533
1534 error:
1535 if (sock->recvbuf.base != NULL) {
1536 isc_mem_put(manager->mctx, sock->recvbuf.base,
1537 sock->recvbuf.len);
1538 }
1539 isc_mem_put(manager->mctx, sock, sizeof(*sock));
1540 return (result);
1541 }
1542
1543 /*
1544 * Verify that the socket state is consistent.
1545 */
1546 static void
consistent(isc_socket_t * sock)1547 consistent(isc_socket_t *sock) {
1548
1549 isc_socketevent_t *dev;
1550 isc_socket_newconnev_t *nev;
1551 unsigned int count;
1552 char *crash_reason;
1553 bool crash = false;
1554
1555 REQUIRE(sock->pending_iocp == sock->pending_recv + sock->pending_send
1556 + sock->pending_accept + sock->pending_connect);
1557
1558 dev = ISC_LIST_HEAD(sock->send_list);
1559 count = 0;
1560 while (dev != NULL) {
1561 count++;
1562 dev = ISC_LIST_NEXT(dev, ev_link);
1563 }
1564 if (count > sock->pending_send) {
1565 crash = true;
1566 crash_reason = "send_list > sock->pending_send";
1567 }
1568
1569 nev = ISC_LIST_HEAD(sock->accept_list);
1570 count = 0;
1571 while (nev != NULL) {
1572 count++;
1573 nev = ISC_LIST_NEXT(nev, ev_link);
1574 }
1575 if (count > sock->pending_accept) {
1576 crash = true;
1577 crash_reason = "accept_list > sock->pending_accept";
1578 }
1579
1580 if (crash) {
1581 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1582 ISC_MSG_DESTROYING, "SOCKET INCONSISTENT: %s",
1583 crash_reason);
1584 sock_dump(sock);
1585 INSIST(crash == false);
1586 }
1587 }
1588
1589 /*
1590 * Maybe free the socket.
1591 *
1592 * This function will verify that the socket is no longer in use in any way,
1593 * either internally or externally. This is the only place where this
1594 * check is to be made; if some bit of code believes that IT is done with
1595 * the socket (e.g., some reference counter reaches zero), it should call
1596 * this function.
1597 *
1598 * When calling this function, the socket must be locked, and the manager
1599 * must be unlocked.
1600 *
1601 * When this function returns, *socketp will be NULL. No tricks to try
1602 * to hold on to this pointer are allowed.
1603 */
1604 static void
maybe_free_socket(isc_socket_t ** socketp,int lineno)1605 maybe_free_socket(isc_socket_t **socketp, int lineno) {
1606 isc_socket_t *sock = *socketp;
1607 *socketp = NULL;
1608
1609 INSIST(VALID_SOCKET(sock));
1610 CONSISTENT(sock);
1611
1612 if (sock->pending_iocp > 0
1613 || sock->pending_recv > 0
1614 || sock->pending_send > 0
1615 || sock->pending_accept > 0
1616 || sock->references > 0
1617 || sock->pending_connect == 1
1618 || !ISC_LIST_EMPTY(sock->recv_list)
1619 || !ISC_LIST_EMPTY(sock->send_list)
1620 || !ISC_LIST_EMPTY(sock->accept_list)
1621 || !ISC_LIST_EMPTY(sock->connect_list)
1622 || sock->fd != INVALID_SOCKET) {
1623 UNLOCK(&sock->lock);
1624 return;
1625 }
1626 UNLOCK(&sock->lock);
1627
1628 free_socket(&sock, lineno);
1629 }
1630
1631 void
free_socket(isc_socket_t ** sockp,int lineno)1632 free_socket(isc_socket_t **sockp, int lineno) {
1633 isc_socketmgr_t *manager;
1634 isc_socket_t *sock = *sockp;
1635 *sockp = NULL;
1636
1637 /*
1638 * Seems we can free the socket after all.
1639 */
1640 manager = sock->manager;
1641 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat,
1642 ISC_MSGSET_SOCKET, ISC_MSG_DESTROYING,
1643 "freeing socket line %d fd %d lock %p semaphore %p",
1644 lineno, sock->fd, &sock->lock, sock->lock.LockSemaphore);
1645
1646 sock->magic = 0;
1647 DESTROYLOCK(&sock->lock);
1648
1649 if (sock->recvbuf.base != NULL)
1650 isc_mem_put(manager->mctx, sock->recvbuf.base,
1651 sock->recvbuf.len);
1652
1653 LOCK(&manager->lock);
1654 if (ISC_LINK_LINKED(sock, link))
1655 ISC_LIST_UNLINK(manager->socklist, sock, link);
1656 isc_mem_put(manager->mctx, sock, sizeof(*sock));
1657
1658 if (ISC_LIST_EMPTY(manager->socklist))
1659 SIGNAL(&manager->shutdown_ok);
1660 UNLOCK(&manager->lock);
1661 }
1662
1663 /*
1664 * Create a new 'type' socket managed by 'manager'. Events
1665 * will be posted to 'task' and when dispatched 'action' will be
1666 * called with 'arg' as the arg value. The new socket is returned
1667 * in 'socketp'.
1668 */
1669 static isc_result_t
socket_create(isc_socketmgr_t * manager,int pf,isc_sockettype_t type,isc_socket_t ** socketp,isc_socket_t * dup_socket)1670 socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
1671 isc_socket_t **socketp, isc_socket_t *dup_socket)
1672 {
1673 isc_socket_t *sock = NULL;
1674 isc_result_t result;
1675 #if defined(USE_CMSG)
1676 int on = 1;
1677 #endif
1678 #if defined(SO_RCVBUF)
1679 ISC_SOCKADDR_LEN_T optlen;
1680 int size;
1681 #endif
1682 int socket_errno;
1683 char strbuf[ISC_STRERRORSIZE];
1684
1685 REQUIRE(VALID_MANAGER(manager));
1686 REQUIRE(socketp != NULL && *socketp == NULL);
1687 REQUIRE(type != isc_sockettype_fdwatch);
1688
1689 #ifndef SOCK_RAW
1690 if (type == isc_sockettype_raw)
1691 return (ISC_R_NOTIMPLEMENTED);
1692 #endif
1693
1694 result = allocate_socket(manager, type, &sock);
1695 if (result != ISC_R_SUCCESS)
1696 return (result);
1697
1698 sock->pf = pf;
1699 switch (type) {
1700 case isc_sockettype_udp:
1701 sock->fd = socket(pf, SOCK_DGRAM, IPPROTO_UDP);
1702 if (sock->fd != INVALID_SOCKET) {
1703 result = connection_reset_fix(sock->fd);
1704 if (result != ISC_R_SUCCESS) {
1705 socket_log(__LINE__, sock,
1706 NULL, EVENT, NULL, 0, 0,
1707 "closed %d %d %d "
1708 "con_reset_fix_failed",
1709 sock->pending_recv,
1710 sock->pending_send,
1711 sock->references);
1712 closesocket(sock->fd);
1713 _set_state(sock, SOCK_CLOSED);
1714 sock->fd = INVALID_SOCKET;
1715 free_socket(&sock, __LINE__);
1716 return (result);
1717 }
1718 }
1719 break;
1720 case isc_sockettype_tcp:
1721 sock->fd = socket(pf, SOCK_STREAM, IPPROTO_TCP);
1722 break;
1723 #ifdef SOCK_RAW
1724 case isc_sockettype_raw:
1725 sock->fd = socket(pf, SOCK_RAW, 0);
1726 #ifdef PF_ROUTE
1727 if (pf == PF_ROUTE)
1728 sock->bound = 1;
1729 #endif
1730 break;
1731 #endif
1732 }
1733
1734 if (sock->fd == INVALID_SOCKET) {
1735 socket_errno = WSAGetLastError();
1736 free_socket(&sock, __LINE__);
1737
1738 switch (socket_errno) {
1739 case WSAEMFILE:
1740 case WSAENOBUFS:
1741 return (ISC_R_NORESOURCES);
1742
1743 case WSAEPROTONOSUPPORT:
1744 case WSAEPFNOSUPPORT:
1745 case WSAEAFNOSUPPORT:
1746 return (ISC_R_FAMILYNOSUPPORT);
1747
1748 default:
1749 isc__strerror(socket_errno, strbuf, sizeof(strbuf));
1750 UNEXPECTED_ERROR(__FILE__, __LINE__,
1751 "socket() %s: %s",
1752 isc_msgcat_get(isc_msgcat,
1753 ISC_MSGSET_GENERAL,
1754 ISC_MSG_FAILED,
1755 "failed"),
1756 strbuf);
1757 return (ISC_R_UNEXPECTED);
1758 }
1759 }
1760
1761 result = make_nonblock(sock->fd);
1762 if (result != ISC_R_SUCCESS) {
1763 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1764 "closed %d %d %d make_nonblock_failed",
1765 sock->pending_recv, sock->pending_send,
1766 sock->references);
1767 closesocket(sock->fd);
1768 sock->fd = INVALID_SOCKET;
1769 free_socket(&sock, __LINE__);
1770 return (result);
1771 }
1772
1773 /*
1774 * Use minimum mtu if possible.
1775 */
1776 use_min_mtu(sock);
1777
1778 #if defined(USE_CMSG) || defined(SO_RCVBUF)
1779 if (type == isc_sockettype_udp) {
1780
1781 #if defined(USE_CMSG)
1782 #if defined(ISC_PLATFORM_HAVEIPV6)
1783 #ifdef IPV6_RECVPKTINFO
1784 /* 2292bis */
1785 if ((pf == AF_INET6)
1786 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1787 (char *)&on, sizeof(on)) < 0)) {
1788 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
1789 UNEXPECTED_ERROR(__FILE__, __LINE__,
1790 "setsockopt(%d, IPV6_RECVPKTINFO) "
1791 "%s: %s", sock->fd,
1792 isc_msgcat_get(isc_msgcat,
1793 ISC_MSGSET_GENERAL,
1794 ISC_MSG_FAILED,
1795 "failed"),
1796 strbuf);
1797 }
1798 #else
1799 /* 2292 */
1800 if ((pf == AF_INET6)
1801 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO,
1802 (char *)&on, sizeof(on)) < 0)) {
1803 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
1804 UNEXPECTED_ERROR(__FILE__, __LINE__,
1805 "setsockopt(%d, IPV6_PKTINFO) %s: %s",
1806 sock->fd,
1807 isc_msgcat_get(isc_msgcat,
1808 ISC_MSGSET_GENERAL,
1809 ISC_MSG_FAILED,
1810 "failed"),
1811 strbuf);
1812 }
1813 #endif /* IPV6_RECVPKTINFO */
1814 #endif /* ISC_PLATFORM_HAVEIPV6 */
1815 #endif /* defined(USE_CMSG) */
1816
1817 #if defined(SO_RCVBUF)
1818 optlen = sizeof(size);
1819 if (getsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF,
1820 (char *)&size, &optlen) >= 0 &&
1821 size < RCVBUFSIZE) {
1822 size = RCVBUFSIZE;
1823 (void)setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF,
1824 (char *)&size, sizeof(size));
1825 }
1826 #endif
1827
1828 }
1829 #endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */
1830
1831 _set_state(sock, SOCK_OPEN);
1832 sock->references = 1;
1833 *socketp = sock;
1834
1835 iocompletionport_update(sock);
1836
1837 if (dup_socket) {
1838 #ifndef ISC_ALLOW_MAPPED
1839 isc__socket_ipv6only(sock, true);
1840 #endif
1841
1842 if (dup_socket->bound) {
1843 isc_sockaddr_t local;
1844
1845 result = isc__socket_getsockname(dup_socket, &local);
1846 if (result != ISC_R_SUCCESS) {
1847 isc_socket_close(sock);
1848 return (result);
1849 }
1850 result = isc__socket_bind(sock, &local,
1851 ISC_SOCKET_REUSEADDRESS);
1852 if (result != ISC_R_SUCCESS) {
1853 isc_socket_close(sock);
1854 return (result);
1855 }
1856 }
1857 sock->dupped = 1;
1858 }
1859
1860 /*
1861 * Note we don't have to lock the socket like we normally would because
1862 * there are no external references to it yet.
1863 */
1864 LOCK(&manager->lock);
1865 ISC_LIST_APPEND(manager->socklist, sock, link);
1866 InterlockedIncrement(&manager->totalSockets);
1867 UNLOCK(&manager->lock);
1868
1869 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat,
1870 ISC_MSGSET_SOCKET, ISC_MSG_CREATED,
1871 "created %u type %u", sock->fd, type);
1872
1873 return (ISC_R_SUCCESS);
1874 }
1875
1876 isc_result_t
isc__socket_create(isc_socketmgr_t * manager,int pf,isc_sockettype_t type,isc_socket_t ** socketp)1877 isc__socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
1878 isc_socket_t **socketp)
1879 {
1880 return (socket_create(manager, pf, type, socketp, NULL));
1881 }
1882
1883 isc_result_t
isc__socket_dup(isc_socket_t * sock,isc_socket_t ** socketp)1884 isc__socket_dup(isc_socket_t *sock, isc_socket_t **socketp) {
1885 REQUIRE(VALID_SOCKET(sock));
1886 REQUIRE(socketp != NULL && *socketp == NULL);
1887
1888 return (socket_create(sock->manager, sock->pf, sock->type,
1889 socketp, sock));
1890 }
1891
1892 isc_result_t
isc_socket_open(isc_socket_t * sock)1893 isc_socket_open(isc_socket_t *sock) {
1894 REQUIRE(VALID_SOCKET(sock));
1895 REQUIRE(sock->type != isc_sockettype_fdwatch);
1896
1897 return (ISC_R_NOTIMPLEMENTED);
1898 }
1899
1900 /*
1901 * Attach to a socket. Caller must explicitly detach when it is done.
1902 */
1903 void
isc__socket_attach(isc_socket_t * sock,isc_socket_t ** socketp)1904 isc__socket_attach(isc_socket_t *sock, isc_socket_t **socketp) {
1905 REQUIRE(VALID_SOCKET(sock));
1906 REQUIRE(socketp != NULL && *socketp == NULL);
1907
1908 LOCK(&sock->lock);
1909 CONSISTENT(sock);
1910 sock->references++;
1911 UNLOCK(&sock->lock);
1912
1913 *socketp = sock;
1914 }
1915
1916 /*
1917 * Dereference a socket. If this is the last reference to it, clean things
1918 * up by destroying the socket.
1919 */
1920 void
isc__socket_detach(isc_socket_t ** socketp)1921 isc__socket_detach(isc_socket_t **socketp) {
1922 isc_socket_t *sock;
1923
1924 REQUIRE(socketp != NULL);
1925 sock = *socketp;
1926 REQUIRE(VALID_SOCKET(sock));
1927 REQUIRE(sock->type != isc_sockettype_fdwatch);
1928
1929 LOCK(&sock->lock);
1930 CONSISTENT(sock);
1931 REQUIRE(sock->references > 0);
1932 sock->references--;
1933
1934 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1935 "detach_socket %d %d %d",
1936 sock->pending_recv, sock->pending_send,
1937 sock->references);
1938
1939 if (sock->references == 0 && sock->fd != INVALID_SOCKET) {
1940 closesocket(sock->fd);
1941 sock->fd = INVALID_SOCKET;
1942 _set_state(sock, SOCK_CLOSED);
1943 }
1944
1945 maybe_free_socket(&sock, __LINE__);
1946
1947 *socketp = NULL;
1948 }
1949
1950 isc_result_t
isc_socket_close(isc_socket_t * sock)1951 isc_socket_close(isc_socket_t *sock) {
1952 REQUIRE(VALID_SOCKET(sock));
1953 REQUIRE(sock->type != isc_sockettype_fdwatch);
1954
1955 return (ISC_R_NOTIMPLEMENTED);
1956 }
1957
1958 /*
1959 * Dequeue an item off the given socket's read queue, set the result code
1960 * in the done event to the one provided, and send it to the task it was
1961 * destined for.
1962 *
1963 * If the event to be sent is on a list, remove it before sending. If
1964 * asked to, send and detach from the task as well.
1965 *
1966 * Caller must have the socket locked if the event is attached to the socket.
1967 */
1968 static void
send_recvdone_event(isc_socket_t * sock,isc_socketevent_t ** dev)1969 send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1970 isc_task_t *task;
1971
1972 task = (*dev)->ev_sender;
1973 (*dev)->ev_sender = sock;
1974
1975 if (ISC_LINK_LINKED(*dev, ev_link))
1976 ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link);
1977
1978 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) != 0) {
1979 isc_task_sendanddetach(&task, (isc_event_t **)dev);
1980 } else {
1981 isc_task_send(task, (isc_event_t **)dev);
1982 }
1983
1984 CONSISTENT(sock);
1985 }
1986
1987 /*
1988 * See comments for send_recvdone_event() above.
1989 */
1990 static void
send_senddone_event(isc_socket_t * sock,isc_socketevent_t ** dev)1991 send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1992 isc_task_t *task;
1993
1994 INSIST(dev != NULL && *dev != NULL);
1995
1996 task = (*dev)->ev_sender;
1997 (*dev)->ev_sender = sock;
1998
1999 if (ISC_LINK_LINKED(*dev, ev_link))
2000 ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link);
2001
2002 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) != 0) {
2003 isc_task_sendanddetach(&task, (isc_event_t **)dev);
2004 } else {
2005 isc_task_send(task, (isc_event_t **)dev);
2006 }
2007
2008 CONSISTENT(sock);
2009 }
2010
2011 /*
2012 * See comments for send_recvdone_event() above.
2013 */
2014 static void
send_acceptdone_event(isc_socket_t * sock,isc_socket_newconnev_t ** adev)2015 send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev) {
2016 isc_task_t *task;
2017
2018 INSIST(adev != NULL && *adev != NULL);
2019
2020 task = (*adev)->ev_sender;
2021 (*adev)->ev_sender = sock;
2022
2023 if (ISC_LINK_LINKED(*adev, ev_link))
2024 ISC_LIST_DEQUEUE(sock->accept_list, *adev, ev_link);
2025
2026 isc_task_sendanddetach(&task, (isc_event_t **)adev);
2027
2028 CONSISTENT(sock);
2029 }
2030
2031 /*
2032 * See comments for send_recvdone_event() above.
2033 */
2034 static void
send_connectdone_event(isc_socket_t * sock,isc_socket_connev_t ** cdev)2035 send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev) {
2036 isc_task_t *task;
2037
2038 INSIST(cdev != NULL && *cdev != NULL);
2039
2040 task = (*cdev)->ev_sender;
2041 (*cdev)->ev_sender = sock;
2042
2043 if (ISC_LINK_LINKED(*cdev, ev_link))
2044 ISC_LIST_DEQUEUE(sock->connect_list, *cdev, ev_link);
2045
2046 isc_task_sendanddetach(&task, (isc_event_t **)cdev);
2047
2048 CONSISTENT(sock);
2049 }
2050
2051 /*
2052 * On entry to this function, the event delivered is the internal
2053 * readable event, and the first item on the accept_list should be
2054 * the done event we want to send. If the list is empty, this is a no-op,
2055 * so just close the new connection, unlock, and return.
2056 *
2057 * Note the socket is locked before entering here
2058 */
2059 static void
internal_accept(isc_socket_t * sock,IoCompletionInfo * lpo,int accept_errno)2060 internal_accept(isc_socket_t *sock, IoCompletionInfo *lpo, int accept_errno) {
2061 isc_socket_newconnev_t *adev;
2062 isc_result_t result = ISC_R_SUCCESS;
2063 isc_socket_t *nsock;
2064 struct sockaddr *localaddr;
2065 int localaddr_len = sizeof(*localaddr);
2066 struct sockaddr *remoteaddr;
2067 int remoteaddr_len = sizeof(*remoteaddr);
2068
2069 INSIST(VALID_SOCKET(sock));
2070 LOCK(&sock->lock);
2071 CONSISTENT(sock);
2072
2073 socket_log(__LINE__, sock, NULL, TRACE,
2074 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2075 "internal_accept called");
2076
2077 INSIST(sock->listener);
2078
2079 INSIST(sock->pending_iocp > 0);
2080 sock->pending_iocp--;
2081 INSIST(sock->pending_accept > 0);
2082 sock->pending_accept--;
2083
2084 adev = lpo->adev;
2085
2086 /*
2087 * If the event is no longer in the list we can just return.
2088 */
2089 if (!acceptdone_is_active(sock, adev))
2090 goto done;
2091
2092 nsock = adev->newsocket;
2093
2094 /*
2095 * Pull off the done event.
2096 */
2097 ISC_LIST_UNLINK(sock->accept_list, adev, ev_link);
2098
2099 /*
2100 * Extract the addresses from the socket, copy them into the structure,
2101 * and return the new socket.
2102 */
2103 ISCGetAcceptExSockaddrs(lpo->acceptbuffer, 0,
2104 sizeof(SOCKADDR_STORAGE) + 16, sizeof(SOCKADDR_STORAGE) + 16,
2105 (LPSOCKADDR *)&localaddr, &localaddr_len,
2106 (LPSOCKADDR *)&remoteaddr, &remoteaddr_len);
2107 memmove(&adev->address.type, remoteaddr, remoteaddr_len);
2108 adev->address.length = remoteaddr_len;
2109 nsock->address = adev->address;
2110 nsock->pf = adev->address.type.sa.sa_family;
2111
2112 socket_log(__LINE__, nsock, &nsock->address, TRACE,
2113 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2114 "internal_accept parent %p", sock);
2115
2116 result = make_nonblock(adev->newsocket->fd);
2117 INSIST(result == ISC_R_SUCCESS);
2118
2119 /*
2120 * Use minimum mtu if possible.
2121 */
2122 use_min_mtu(adev->newsocket);
2123
2124 INSIST(setsockopt(nsock->fd, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT,
2125 (char *)&sock->fd, sizeof(sock->fd)) == 0);
2126
2127 /*
2128 * Hook it up into the manager.
2129 */
2130 nsock->bound = 1;
2131 nsock->connected = 1;
2132 _set_state(nsock, SOCK_OPEN);
2133
2134 LOCK(&nsock->manager->lock);
2135 ISC_LIST_APPEND(nsock->manager->socklist, nsock, link);
2136 InterlockedIncrement(&nsock->manager->totalSockets);
2137 UNLOCK(&nsock->manager->lock);
2138
2139 socket_log(__LINE__, sock, &nsock->address, CREATION,
2140 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
2141 "accepted_connection new_socket %p fd %d",
2142 nsock, nsock->fd);
2143
2144 adev->result = result;
2145 send_acceptdone_event(sock, &adev);
2146
2147 done:
2148 CONSISTENT(sock);
2149 UNLOCK(&sock->lock);
2150
2151 HeapFree(hHeapHandle, 0, lpo->acceptbuffer);
2152 lpo->acceptbuffer = NULL;
2153 }
2154
2155 /*
2156 * Called when a socket with a pending connect() finishes.
2157 * Note that the socket is locked before entering.
2158 */
2159 static void
internal_connect(isc_socket_t * sock,IoCompletionInfo * lpo,int connect_errno)2160 internal_connect(isc_socket_t *sock, IoCompletionInfo *lpo, int connect_errno) {
2161 isc_socket_connev_t *cdev;
2162 isc_result_t result;
2163 char strbuf[ISC_STRERRORSIZE];
2164
2165 INSIST(VALID_SOCKET(sock));
2166
2167 LOCK(&sock->lock);
2168
2169 INSIST(sock->pending_iocp > 0);
2170 sock->pending_iocp--;
2171 INSIST(sock->pending_connect == 1);
2172 sock->pending_connect = 0;
2173
2174 /*
2175 * If the event is no longer in the list we can just close and return.
2176 */
2177 cdev = lpo->cdev;
2178 if (!connectdone_is_active(sock, cdev)) {
2179 sock->pending_connect = 0;
2180 if (sock->fd != INVALID_SOCKET) {
2181 closesocket(sock->fd);
2182 sock->fd = INVALID_SOCKET;
2183 _set_state(sock, SOCK_CLOSED);
2184 }
2185 CONSISTENT(sock);
2186 UNLOCK(&sock->lock);
2187 return;
2188 }
2189
2190 /*
2191 * Check possible Windows network event error status here.
2192 */
2193 if (connect_errno != 0) {
2194 /*
2195 * If the error is SOFT, just try again on this
2196 * fd and pretend nothing strange happened.
2197 */
2198 if (SOFT_ERROR(connect_errno) ||
2199 connect_errno == WSAEINPROGRESS) {
2200 sock->pending_connect = 1;
2201 CONSISTENT(sock);
2202 UNLOCK(&sock->lock);
2203 return;
2204 }
2205
2206 /*
2207 * Translate other errors into ISC_R_* flavors.
2208 */
2209 switch (connect_errno) {
2210 #define ERROR_MATCH(a, b) case a: result = b; break;
2211 ERROR_MATCH(WSAEACCES, ISC_R_NOPERM);
2212 ERROR_MATCH(WSAEADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
2213 ERROR_MATCH(WSAEAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
2214 ERROR_MATCH(WSAECONNREFUSED, ISC_R_CONNREFUSED);
2215 ERROR_MATCH(WSAEHOSTUNREACH, ISC_R_HOSTUNREACH);
2216 ERROR_MATCH(WSAEHOSTDOWN, ISC_R_HOSTDOWN);
2217 ERROR_MATCH(WSAENETUNREACH, ISC_R_NETUNREACH);
2218 ERROR_MATCH(WSAENETDOWN, ISC_R_NETDOWN);
2219 ERROR_MATCH(WSAENOBUFS, ISC_R_NORESOURCES);
2220 ERROR_MATCH(WSAECONNRESET, ISC_R_CONNECTIONRESET);
2221 ERROR_MATCH(WSAECONNABORTED, ISC_R_CONNECTIONRESET);
2222 ERROR_MATCH(WSAETIMEDOUT, ISC_R_TIMEDOUT);
2223 #undef ERROR_MATCH
2224 default:
2225 result = ISC_R_UNEXPECTED;
2226 isc__strerror(connect_errno, strbuf, sizeof(strbuf));
2227 UNEXPECTED_ERROR(__FILE__, __LINE__,
2228 "internal_connect: connect() %s",
2229 strbuf);
2230 }
2231 } else {
2232 INSIST(setsockopt(sock->fd, SOL_SOCKET,
2233 SO_UPDATE_CONNECT_CONTEXT, NULL, 0) == 0);
2234 result = ISC_R_SUCCESS;
2235 sock->connected = 1;
2236 socket_log(__LINE__, sock, &sock->address, IOEVENT,
2237 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
2238 "internal_connect: success");
2239 }
2240
2241 do {
2242 cdev->result = result;
2243 send_connectdone_event(sock, &cdev);
2244 cdev = ISC_LIST_HEAD(sock->connect_list);
2245 } while (cdev != NULL);
2246
2247 UNLOCK(&sock->lock);
2248 }
2249
2250 /*
2251 * Loop through the socket, returning ISC_R_EOF for each done event pending.
2252 */
2253 static void
send_recvdone_abort(isc_socket_t * sock,isc_result_t result)2254 send_recvdone_abort(isc_socket_t *sock, isc_result_t result) {
2255 isc_socketevent_t *dev;
2256
2257 while (!ISC_LIST_EMPTY(sock->recv_list)) {
2258 dev = ISC_LIST_HEAD(sock->recv_list);
2259 dev->result = result;
2260 send_recvdone_event(sock, &dev);
2261 }
2262 }
2263
2264 /*
2265 * Loop through the socket, returning result for each done event pending.
2266 */
2267 static void
send_connectdone_abort(isc_socket_t * sock,isc_result_t result)2268 send_connectdone_abort(isc_socket_t *sock, isc_result_t result) {
2269 isc_socket_connev_t *dev;
2270
2271 while (!ISC_LIST_EMPTY(sock->connect_list)) {
2272 dev = ISC_LIST_HEAD(sock->connect_list);
2273 dev->result = result;
2274 send_connectdone_event(sock, &dev);
2275 }
2276 }
2277
2278 /*
2279 * Take the data we received in our private buffer, and if any recv() calls on
2280 * our list are satisfied, send the corresponding done event.
2281 *
2282 * If we need more data (there are still items on the recv_list after we consume all
2283 * our data) then arrange for another system recv() call to fill our buffers.
2284 */
2285 static void
internal_recv(isc_socket_t * sock,int nbytes)2286 internal_recv(isc_socket_t *sock, int nbytes)
2287 {
2288 INSIST(VALID_SOCKET(sock));
2289
2290 LOCK(&sock->lock);
2291 CONSISTENT(sock);
2292
2293 socket_log(__LINE__, sock, NULL, IOEVENT,
2294 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALRECV,
2295 "internal_recv: %d bytes received", nbytes);
2296
2297 /*
2298 * If we got here, the I/O operation succeeded. However, we might
2299 * still have removed this event from our notification list (or never
2300 * placed it on it due to immediate completion.)
2301 * Handle the reference counting here, and handle the cancellation
2302 * event just after.
2303 */
2304 INSIST(sock->pending_iocp > 0);
2305 sock->pending_iocp--;
2306 INSIST(sock->pending_recv > 0);
2307 sock->pending_recv--;
2308
2309 /*
2310 * The only way we could have gotten here is that our I/O has
2311 * successfully completed. Update our pointers, and move on.
2312 * The only odd case here is that we might not have received
2313 * enough data on a TCP stream to satisfy the minimum requirements.
2314 * If this is the case, we will re-issue the recv() call for what
2315 * we need.
2316 *
2317 * We do check for a recv() of 0 bytes on a TCP stream. This
2318 * means the remote end has closed.
2319 */
2320 if (nbytes == 0 && sock->type == isc_sockettype_tcp) {
2321 send_recvdone_abort(sock, ISC_R_EOF);
2322 maybe_free_socket(&sock, __LINE__);
2323 return;
2324 }
2325 sock->recvbuf.remaining = nbytes;
2326 sock->recvbuf.consume_position = sock->recvbuf.base;
2327 completeio_recv(sock);
2328
2329 /*
2330 * If there are more receivers waiting for data, queue another receive
2331 * here.
2332 */
2333 queue_receive_request(sock);
2334
2335 /*
2336 * Unlock and/or destroy if we are the last thing this socket has left to do.
2337 */
2338 maybe_free_socket(&sock, __LINE__);
2339 }
2340
2341 static void
internal_send(isc_socket_t * sock,isc_socketevent_t * dev,struct msghdr * messagehdr,int nbytes,int send_errno,IoCompletionInfo * lpo)2342 internal_send(isc_socket_t *sock, isc_socketevent_t *dev,
2343 struct msghdr *messagehdr, int nbytes, int send_errno, IoCompletionInfo *lpo)
2344 {
2345 buflist_t *buffer;
2346
2347 /*
2348 * Find out what socket this is and lock it.
2349 */
2350 INSIST(VALID_SOCKET(sock));
2351
2352 LOCK(&sock->lock);
2353 CONSISTENT(sock);
2354
2355 socket_log(__LINE__, sock, NULL, IOEVENT,
2356 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALSEND,
2357 "internal_send: task got socket event %p", dev);
2358
2359 buffer = ISC_LIST_HEAD(lpo->bufferlist);
2360 while (buffer != NULL) {
2361 ISC_LIST_DEQUEUE(lpo->bufferlist, buffer, link);
2362
2363 socket_log(__LINE__, sock, NULL, TRACE,
2364 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2365 "free_buffer %p %p", buffer, buffer->buf);
2366
2367 HeapFree(hHeapHandle, 0, buffer->buf);
2368 HeapFree(hHeapHandle, 0, buffer);
2369 buffer = ISC_LIST_HEAD(lpo->bufferlist);
2370 }
2371
2372 INSIST(sock->pending_iocp > 0);
2373 sock->pending_iocp--;
2374 INSIST(sock->pending_send > 0);
2375 sock->pending_send--;
2376
2377 /* If the event is no longer in the list we can just return */
2378 if (!senddone_is_active(sock, dev))
2379 goto done;
2380
2381 /*
2382 * Set the error code and send things on its way.
2383 */
2384 switch (completeio_send(sock, dev, messagehdr, nbytes, send_errno)) {
2385 case DOIO_SOFT:
2386 break;
2387 case DOIO_HARD:
2388 case DOIO_SUCCESS:
2389 send_senddone_event(sock, &dev);
2390 break;
2391 }
2392
2393 done:
2394 maybe_free_socket(&sock, __LINE__);
2395 }
2396
2397 /*
2398 * These return if the done event passed in is on the list.
2399 * Using these ensures we will not double-send an event.
2400 */
2401 static bool
senddone_is_active(isc_socket_t * sock,isc_socketevent_t * dev)2402 senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev)
2403 {
2404 isc_socketevent_t *ldev;
2405
2406 ldev = ISC_LIST_HEAD(sock->send_list);
2407 while (ldev != NULL && ldev != dev)
2408 ldev = ISC_LIST_NEXT(ldev, ev_link);
2409
2410 return (ldev == NULL ? false : true);
2411 }
2412
2413 static bool
acceptdone_is_active(isc_socket_t * sock,isc_socket_newconnev_t * dev)2414 acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev)
2415 {
2416 isc_socket_newconnev_t *ldev;
2417
2418 ldev = ISC_LIST_HEAD(sock->accept_list);
2419 while (ldev != NULL && ldev != dev)
2420 ldev = ISC_LIST_NEXT(ldev, ev_link);
2421
2422 return (ldev == NULL ? false : true);
2423 }
2424
2425 static bool
connectdone_is_active(isc_socket_t * sock,isc_socket_connev_t * dev)2426 connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev)
2427 {
2428 isc_socket_connev_t *cdev;
2429
2430 cdev = ISC_LIST_HEAD(sock->connect_list);
2431 while (cdev != NULL && cdev != dev)
2432 cdev = ISC_LIST_NEXT(cdev, ev_link);
2433
2434 return (cdev == NULL ? false : true);
2435 }
2436
2437 //
2438 // The Windows network stack seems to have two very distinct paths depending
2439 // on what is installed. Specifically, if something is looking at network
2440 // connections (like an anti-virus or anti-malware application, such as
2441 // McAfee products) Windows may return additional error conditions which
2442 // were not previously returned.
2443 //
2444 // One specific one is when a TCP SYN scan is used. In this situation,
2445 // Windows responds with the SYN-ACK, but the scanner never responds with
2446 // the 3rd packet, the ACK. Windows considers this a partially open connection.
2447 // Most Unix networking stacks, and Windows without McAfee installed, will
2448 // not return this to the caller. However, with this product installed,
2449 // Windows returns this as a failed status on the Accept() call. Here, we
2450 // will just re-issue the ISCAcceptEx() call as if nothing had happened.
2451 //
2452 // This code should only be called when the listening socket has received
2453 // such an error. Additionally, the "parent" socket must be locked.
2454 // Additionally, the lpo argument is re-used here, and must not be freed
2455 // by the caller.
2456 //
2457 static isc_result_t
restart_accept(isc_socket_t * parent,IoCompletionInfo * lpo)2458 restart_accept(isc_socket_t *parent, IoCompletionInfo *lpo)
2459 {
2460 isc_socket_t *nsock = lpo->adev->newsocket;
2461 SOCKET new_fd;
2462
2463 /*
2464 * AcceptEx() requires we pass in a socket. Note that we carefully
2465 * do not close the previous socket in case of an error message returned by
2466 * our new socket() call. If we return an error here, our caller will
2467 * clean up.
2468 */
2469 new_fd = socket(parent->pf, SOCK_STREAM, IPPROTO_TCP);
2470 if (nsock->fd == INVALID_SOCKET) {
2471 return (ISC_R_FAILURE); // parent will ask windows for error message
2472 }
2473 closesocket(nsock->fd);
2474 nsock->fd = new_fd;
2475
2476 memset(&lpo->overlapped, 0, sizeof(lpo->overlapped));
2477
2478 ISCAcceptEx(parent->fd,
2479 nsock->fd, /* Accepted Socket */
2480 lpo->acceptbuffer, /* Buffer for initial Recv */
2481 0, /* Length of Buffer */
2482 sizeof(SOCKADDR_STORAGE) + 16, /* Local address length + 16 */
2483 sizeof(SOCKADDR_STORAGE) + 16, /* Remote address length + 16 */
2484 (LPDWORD)&lpo->received_bytes, /* Bytes Recved */
2485 (LPOVERLAPPED)lpo /* Overlapped structure */
2486 );
2487
2488 InterlockedDecrement(&nsock->manager->iocp_total);
2489 iocompletionport_update(nsock);
2490
2491 return (ISC_R_SUCCESS);
2492 }
2493
2494 /*
2495 * This is the I/O Completion Port Worker Function. It loops forever
2496 * waiting for I/O to complete and then forwards them for further
2497 * processing. There are a number of these in separate threads.
2498 */
2499 static isc_threadresult_t WINAPI
SocketIoThread(LPVOID ThreadContext)2500 SocketIoThread(LPVOID ThreadContext) {
2501 isc_socketmgr_t *manager = ThreadContext;
2502 DWORD nbytes;
2503 IoCompletionInfo *lpo = NULL;
2504 isc_socket_t *sock = NULL;
2505 int request;
2506 struct msghdr *messagehdr = NULL;
2507 int errval;
2508 char strbuf[ISC_STRERRORSIZE];
2509 int errstatus;
2510
2511 REQUIRE(VALID_MANAGER(manager));
2512
2513 /*
2514 * Set the thread priority high enough so I/O will
2515 * preempt normal recv packet processing, but not
2516 * higher than the timer sync thread.
2517 */
2518 if (!SetThreadPriority(GetCurrentThread(),
2519 THREAD_PRIORITY_ABOVE_NORMAL)) {
2520 errval = GetLastError();
2521 isc__strerror(errval, strbuf, sizeof(strbuf));
2522 FATAL_ERROR(__FILE__, __LINE__,
2523 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2524 ISC_MSG_FAILED,
2525 "Can't set thread priority: %s"),
2526 strbuf);
2527 }
2528
2529 /*
2530 * Loop forever waiting on I/O Completions and then processing them
2531 */
2532 while (TRUE) {
2533 BOOL bSuccess;
2534
2535 wait_again:
2536 bSuccess = GetQueuedCompletionStatus(manager->hIoCompletionPort,
2537 &nbytes,
2538 (PULONG_PTR)&sock,
2539 (LPWSAOVERLAPPED *)&lpo,
2540 INFINITE);
2541 if (lpo == NULL) /* Received request to exit */
2542 break;
2543
2544 REQUIRE(VALID_SOCKET(sock));
2545
2546 request = lpo->request_type;
2547
2548 if (!bSuccess)
2549 errstatus = GetLastError();
2550 else
2551 errstatus = 0;
2552 if (!bSuccess && errstatus != ERROR_MORE_DATA) {
2553 isc_result_t isc_result;
2554
2555 /*
2556 * Did the I/O operation complete?
2557 */
2558 isc_result = isc__errno2result(errstatus);
2559
2560 LOCK(&sock->lock);
2561 CONSISTENT(sock);
2562 switch (request) {
2563 case SOCKET_RECV:
2564 INSIST(sock->pending_iocp > 0);
2565 sock->pending_iocp--;
2566 INSIST(sock->pending_recv > 0);
2567 sock->pending_recv--;
2568 if (!sock->connected &&
2569 ((errstatus == ERROR_HOST_UNREACHABLE) ||
2570 (errstatus == WSAENETRESET) ||
2571 (errstatus == WSAECONNRESET))) {
2572 /* ignore soft errors */
2573 queue_receive_request(sock);
2574 break;
2575 }
2576 send_recvdone_abort(sock, isc_result);
2577 if (isc_result == ISC_R_UNEXPECTED) {
2578 UNEXPECTED_ERROR(__FILE__, __LINE__,
2579 "SOCKET_RECV: Windows error code: %d, returning ISC error %d",
2580 errstatus, isc_result);
2581 }
2582 break;
2583
2584 case SOCKET_SEND:
2585 INSIST(sock->pending_iocp > 0);
2586 sock->pending_iocp--;
2587 INSIST(sock->pending_send > 0);
2588 sock->pending_send--;
2589 if (senddone_is_active(sock, lpo->dev)) {
2590 lpo->dev->result = isc_result;
2591 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2592 "canceled_send");
2593 send_senddone_event(sock, &lpo->dev);
2594 }
2595 break;
2596
2597 case SOCKET_ACCEPT:
2598 INSIST(sock->pending_iocp > 0);
2599 INSIST(sock->pending_accept > 0);
2600
2601 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2602 "Accept: errstatus=%d isc_result=%d", errstatus, isc_result);
2603
2604 if (acceptdone_is_active(sock, lpo->adev)) {
2605 if (restart_accept(sock, lpo) == ISC_R_SUCCESS) {
2606 UNLOCK(&sock->lock);
2607 goto wait_again;
2608 } else {
2609 errstatus = GetLastError();
2610 isc_result = isc__errno2result(errstatus);
2611 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2612 "restart_accept() failed: errstatus=%d isc_result=%d",
2613 errstatus, isc_result);
2614 }
2615 }
2616
2617 sock->pending_iocp--;
2618 sock->pending_accept--;
2619 if (acceptdone_is_active(sock, lpo->adev)) {
2620 closesocket(lpo->adev->newsocket->fd);
2621 lpo->adev->newsocket->fd = INVALID_SOCKET;
2622 lpo->adev->newsocket->references--;
2623 free_socket(&lpo->adev->newsocket, __LINE__);
2624 lpo->adev->result = isc_result;
2625 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2626 "canceled_accept");
2627 send_acceptdone_event(sock, &lpo->adev);
2628 }
2629 break;
2630
2631 case SOCKET_CONNECT:
2632 INSIST(sock->pending_iocp > 0);
2633 sock->pending_iocp--;
2634 INSIST(sock->pending_connect == 1);
2635 sock->pending_connect = 0;
2636 if (connectdone_is_active(sock, lpo->cdev)) {
2637 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2638 "canceled_connect");
2639 send_connectdone_abort(sock, isc_result);
2640 }
2641 break;
2642 }
2643 maybe_free_socket(&sock, __LINE__);
2644
2645 if (lpo != NULL)
2646 HeapFree(hHeapHandle, 0, lpo);
2647 continue;
2648 }
2649
2650 messagehdr = &lpo->messagehdr;
2651
2652 switch (request) {
2653 case SOCKET_RECV:
2654 internal_recv(sock, nbytes);
2655 break;
2656 case SOCKET_SEND:
2657 internal_send(sock, lpo->dev, messagehdr, nbytes, errstatus, lpo);
2658 break;
2659 case SOCKET_ACCEPT:
2660 internal_accept(sock, lpo, errstatus);
2661 break;
2662 case SOCKET_CONNECT:
2663 internal_connect(sock, lpo, errstatus);
2664 break;
2665 }
2666
2667 if (lpo != NULL)
2668 HeapFree(hHeapHandle, 0, lpo);
2669 }
2670
2671 /*
2672 * Exit Completion Port Thread
2673 */
2674 manager_log(manager, TRACE,
2675 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2676 ISC_MSG_EXITING, "SocketIoThread exiting"));
2677 return ((isc_threadresult_t)0);
2678 }
2679
2680 /*
2681 * Create a new socket manager.
2682 */
2683 isc_result_t
isc__socketmgr_create(isc_mem_t * mctx,isc_socketmgr_t ** managerp)2684 isc__socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
2685 return (isc_socketmgr_create2(mctx, managerp, 0));
2686 }
2687
2688 isc_result_t
isc__socketmgr_create2(isc_mem_t * mctx,isc_socketmgr_t ** managerp,unsigned int maxsocks)2689 isc__socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp,
2690 unsigned int maxsocks)
2691 {
2692 isc_socketmgr_t *manager;
2693 isc_result_t result;
2694
2695 REQUIRE(managerp != NULL && *managerp == NULL);
2696
2697 if (maxsocks != 0)
2698 return (ISC_R_NOTIMPLEMENTED);
2699
2700 manager = isc_mem_get(mctx, sizeof(*manager));
2701 if (manager == NULL)
2702 return (ISC_R_NOMEMORY);
2703
2704 InitSockets();
2705
2706 manager->magic = SOCKET_MANAGER_MAGIC;
2707 manager->mctx = NULL;
2708 manager->stats = NULL;
2709 ISC_LIST_INIT(manager->socklist);
2710 result = isc_mutex_init(&manager->lock);
2711 if (result != ISC_R_SUCCESS) {
2712 isc_mem_put(mctx, manager, sizeof(*manager));
2713 return (result);
2714 }
2715 if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) {
2716 DESTROYLOCK(&manager->lock);
2717 isc_mem_put(mctx, manager, sizeof(*manager));
2718 UNEXPECTED_ERROR(__FILE__, __LINE__,
2719 "isc_condition_init() %s",
2720 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2721 ISC_MSG_FAILED, "failed"));
2722 return (ISC_R_UNEXPECTED);
2723 }
2724
2725 isc_mem_attach(mctx, &manager->mctx);
2726
2727 iocompletionport_init(manager); /* Create the Completion Ports */
2728
2729 manager->bShutdown = false;
2730 manager->totalSockets = 0;
2731 manager->iocp_total = 0;
2732 manager->maxudp = 0;
2733
2734 *managerp = manager;
2735
2736 return (ISC_R_SUCCESS);
2737 }
2738
2739 isc_result_t
isc_socketmgr_getmaxsockets(isc_socketmgr_t * manager,unsigned int * nsockp)2740 isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp) {
2741 REQUIRE(VALID_MANAGER(manager));
2742 REQUIRE(nsockp != NULL);
2743
2744 return (ISC_R_NOTIMPLEMENTED);
2745 }
2746
2747 void
isc_socketmgr_setstats(isc_socketmgr_t * manager,isc_stats_t * stats)2748 isc_socketmgr_setstats(isc_socketmgr_t *manager, isc_stats_t *stats) {
2749 REQUIRE(VALID_MANAGER(manager));
2750 REQUIRE(ISC_LIST_EMPTY(manager->socklist));
2751 REQUIRE(manager->stats == NULL);
2752 REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max);
2753
2754 isc_stats_attach(stats, &manager->stats);
2755 }
2756
2757 void
isc__socketmgr_destroy(isc_socketmgr_t ** managerp)2758 isc__socketmgr_destroy(isc_socketmgr_t **managerp) {
2759 isc_socketmgr_t *manager;
2760 int i;
2761 isc_mem_t *mctx;
2762
2763 /*
2764 * Destroy a socket manager.
2765 */
2766
2767 REQUIRE(managerp != NULL);
2768 manager = *managerp;
2769 REQUIRE(VALID_MANAGER(manager));
2770
2771 LOCK(&manager->lock);
2772
2773 /*
2774 * Wait for all sockets to be destroyed.
2775 */
2776 while (!ISC_LIST_EMPTY(manager->socklist)) {
2777 manager_log(manager, CREATION,
2778 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2779 ISC_MSG_SOCKETSREMAIN,
2780 "sockets exist"));
2781 WAIT(&manager->shutdown_ok, &manager->lock);
2782 }
2783
2784 UNLOCK(&manager->lock);
2785
2786 /*
2787 * Here, we need to had some wait code for the completion port
2788 * thread.
2789 */
2790 signal_iocompletionport_exit(manager);
2791 manager->bShutdown = true;
2792
2793 /*
2794 * Wait for threads to exit.
2795 */
2796 for (i = 0; i < manager->maxIOCPThreads; i++) {
2797 if (isc_thread_join((isc_thread_t) manager->hIOCPThreads[i],
2798 NULL) != ISC_R_SUCCESS)
2799 UNEXPECTED_ERROR(__FILE__, __LINE__,
2800 "isc_thread_join() for Completion Port %s",
2801 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2802 ISC_MSG_FAILED, "failed"));
2803 }
2804 /*
2805 * Clean up.
2806 */
2807
2808 CloseHandle(manager->hIoCompletionPort);
2809
2810 (void)isc_condition_destroy(&manager->shutdown_ok);
2811
2812 DESTROYLOCK(&manager->lock);
2813 if (manager->stats != NULL)
2814 isc_stats_detach(&manager->stats);
2815 manager->magic = 0;
2816 mctx= manager->mctx;
2817 isc_mem_put(mctx, manager, sizeof(*manager));
2818
2819 isc_mem_detach(&mctx);
2820
2821 *managerp = NULL;
2822 }
2823
2824 static void
queue_receive_event(isc_socket_t * sock,isc_task_t * task,isc_socketevent_t * dev)2825 queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev)
2826 {
2827 isc_task_t *ntask = NULL;
2828
2829 isc_task_attach(task, &ntask);
2830 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
2831
2832 /*
2833 * Enqueue the request.
2834 */
2835 INSIST(!ISC_LINK_LINKED(dev, ev_link));
2836 ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link);
2837
2838 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2839 "queue_receive_event: event %p -> task %p",
2840 dev, ntask);
2841 }
2842
2843 /*
2844 * Check the pending receive queue, and if we have data pending, give it to this
2845 * caller. If we have none, queue an I/O request. If this caller is not the first
2846 * on the list, then we will just queue this event and return.
2847 *
2848 * Caller must have the socket locked.
2849 */
2850 static isc_result_t
socket_recv(isc_socket_t * sock,isc_socketevent_t * dev,isc_task_t * task,unsigned int flags)2851 socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2852 unsigned int flags)
2853 {
2854 isc_result_t result = ISC_R_SUCCESS;
2855
2856 dev->ev_sender = task;
2857
2858 if (sock->fd == INVALID_SOCKET)
2859 return (ISC_R_EOF);
2860
2861 /*
2862 * Queue our event on the list of things to do. Call our function to
2863 * attempt to fill buffers as much as possible, and return done events.
2864 * We are going to lie about our handling of the ISC_SOCKFLAG_IMMEDIATE
2865 * here and tell our caller that we could not satisfy it immediately.
2866 */
2867 queue_receive_event(sock, task, dev);
2868 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
2869 result = ISC_R_INPROGRESS;
2870
2871 completeio_recv(sock);
2872
2873 /*
2874 * If there are more receivers waiting for data, queue another receive
2875 * here. If the
2876 */
2877 queue_receive_request(sock);
2878
2879 return (result);
2880 }
2881
2882 isc_result_t
isc__socket_recvv(isc_socket_t * sock,isc_bufferlist_t * buflist,unsigned int minimum,isc_task_t * task,isc_taskaction_t action,void * arg)2883 isc__socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist,
2884 unsigned int minimum, isc_task_t *task,
2885 isc_taskaction_t action, void *arg)
2886 {
2887 isc_socketevent_t *dev;
2888 isc_socketmgr_t *manager;
2889 unsigned int iocount;
2890 isc_buffer_t *buffer;
2891 isc_result_t ret;
2892
2893 REQUIRE(VALID_SOCKET(sock));
2894 LOCK(&sock->lock);
2895 CONSISTENT(sock);
2896
2897 /*
2898 * Make sure that the socket is not closed. XXXMLG change error here?
2899 */
2900 if (sock->fd == INVALID_SOCKET) {
2901 UNLOCK(&sock->lock);
2902 return (ISC_R_CONNREFUSED);
2903 }
2904
2905 REQUIRE(buflist != NULL);
2906 REQUIRE(!ISC_LIST_EMPTY(*buflist));
2907 REQUIRE(task != NULL);
2908 REQUIRE(action != NULL);
2909
2910 manager = sock->manager;
2911 REQUIRE(VALID_MANAGER(manager));
2912
2913 iocount = isc_bufferlist_availablecount(buflist);
2914 REQUIRE(iocount > 0);
2915
2916 INSIST(sock->bound);
2917
2918 dev = allocate_socketevent(manager->mctx, sock,
2919 ISC_SOCKEVENT_RECVDONE, action, arg);
2920 if (dev == NULL) {
2921 UNLOCK(&sock->lock);
2922 return (ISC_R_NOMEMORY);
2923 }
2924
2925 /*
2926 * UDP sockets are always partial read
2927 */
2928 if (sock->type == isc_sockettype_udp)
2929 dev->minimum = 1;
2930 else {
2931 if (minimum == 0)
2932 dev->minimum = iocount;
2933 else
2934 dev->minimum = minimum;
2935 }
2936
2937 /*
2938 * Move each buffer from the passed in list to our internal one.
2939 */
2940 buffer = ISC_LIST_HEAD(*buflist);
2941 while (buffer != NULL) {
2942 ISC_LIST_DEQUEUE(*buflist, buffer, link);
2943 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
2944 buffer = ISC_LIST_HEAD(*buflist);
2945 }
2946
2947 ret = socket_recv(sock, dev, task, 0);
2948
2949 UNLOCK(&sock->lock);
2950 return (ret);
2951 }
2952
2953 isc_result_t
isc__socket_recv(isc_socket_t * sock,isc_region_t * region,unsigned int minimum,isc_task_t * task,isc_taskaction_t action,void * arg)2954 isc__socket_recv(isc_socket_t *sock, isc_region_t *region,
2955 unsigned int minimum, isc_task_t *task,
2956 isc_taskaction_t action, void *arg)
2957 {
2958 isc_socketevent_t *dev;
2959 isc_socketmgr_t *manager;
2960 isc_result_t ret;
2961
2962 REQUIRE(VALID_SOCKET(sock));
2963 LOCK(&sock->lock);
2964 CONSISTENT(sock);
2965
2966 /*
2967 * make sure that the socket's not closed
2968 */
2969 if (sock->fd == INVALID_SOCKET) {
2970 UNLOCK(&sock->lock);
2971 return (ISC_R_CONNREFUSED);
2972 }
2973 REQUIRE(action != NULL);
2974
2975 manager = sock->manager;
2976 REQUIRE(VALID_MANAGER(manager));
2977
2978 INSIST(sock->bound);
2979
2980 dev = allocate_socketevent(manager->mctx, sock,
2981 ISC_SOCKEVENT_RECVDONE, action, arg);
2982 if (dev == NULL) {
2983 UNLOCK(&sock->lock);
2984 return (ISC_R_NOMEMORY);
2985 }
2986
2987 ret = isc_socket_recv2(sock, region, minimum, task, dev, 0);
2988 UNLOCK(&sock->lock);
2989 return (ret);
2990 }
2991
2992 isc_result_t
isc__socket_recv2(isc_socket_t * sock,isc_region_t * region,unsigned int minimum,isc_task_t * task,isc_socketevent_t * event,unsigned int flags)2993 isc__socket_recv2(isc_socket_t *sock, isc_region_t *region,
2994 unsigned int minimum, isc_task_t *task,
2995 isc_socketevent_t *event, unsigned int flags)
2996 {
2997 isc_result_t ret;
2998
2999 REQUIRE(VALID_SOCKET(sock));
3000 LOCK(&sock->lock);
3001 CONSISTENT(sock);
3002
3003 event->result = ISC_R_UNEXPECTED;
3004 event->ev_sender = sock;
3005 /*
3006 * make sure that the socket's not closed
3007 */
3008 if (sock->fd == INVALID_SOCKET) {
3009 UNLOCK(&sock->lock);
3010 return (ISC_R_CONNREFUSED);
3011 }
3012
3013 ISC_LIST_INIT(event->bufferlist);
3014 event->region = *region;
3015 event->n = 0;
3016 event->offset = 0;
3017 event->attributes = 0;
3018
3019 /*
3020 * UDP sockets are always partial read.
3021 */
3022 if (sock->type == isc_sockettype_udp)
3023 event->minimum = 1;
3024 else {
3025 if (minimum == 0)
3026 event->minimum = region->length;
3027 else
3028 event->minimum = minimum;
3029 }
3030
3031 ret = socket_recv(sock, event, task, flags);
3032 UNLOCK(&sock->lock);
3033 return (ret);
3034 }
3035
3036 /*
3037 * Caller must have the socket locked.
3038 */
3039 static isc_result_t
socket_send(isc_socket_t * sock,isc_socketevent_t * dev,isc_task_t * task,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo,unsigned int flags)3040 socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
3041 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
3042 unsigned int flags)
3043 {
3044 int io_state;
3045 int send_errno = 0;
3046 int cc = 0;
3047 isc_task_t *ntask = NULL;
3048 isc_result_t result = ISC_R_SUCCESS;
3049
3050 dev->ev_sender = task;
3051
3052 set_dev_address(address, sock, dev);
3053 if (pktinfo != NULL) {
3054 socket_log(__LINE__, sock, NULL, TRACE, isc_msgcat, ISC_MSGSET_SOCKET,
3055 ISC_MSG_PKTINFOPROVIDED,
3056 "pktinfo structure provided, ifindex %u (set to 0)",
3057 pktinfo->ipi6_ifindex);
3058
3059 dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO;
3060 dev->pktinfo = *pktinfo;
3061 /*
3062 * Set the pktinfo index to 0 here, to let the kernel decide
3063 * what interface it should send on.
3064 */
3065 dev->pktinfo.ipi6_ifindex = 0;
3066 }
3067
3068 io_state = startio_send(sock, dev, &cc, &send_errno);
3069 switch (io_state) {
3070 case DOIO_PENDING: /* I/O started. Enqueue completion event. */
3071 case DOIO_SOFT:
3072 /*
3073 * We couldn't send all or part of the request right now, so
3074 * queue it unless ISC_SOCKFLAG_NORETRY is set.
3075 */
3076 if ((flags & ISC_SOCKFLAG_NORETRY) == 0 ||
3077 io_state == DOIO_PENDING) {
3078 isc_task_attach(task, &ntask);
3079 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
3080
3081 /*
3082 * Enqueue the request.
3083 */
3084 INSIST(!ISC_LINK_LINKED(dev, ev_link));
3085 ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link);
3086
3087 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
3088 "socket_send: event %p -> task %p",
3089 dev, ntask);
3090
3091 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
3092 result = ISC_R_INPROGRESS;
3093 break;
3094 }
3095
3096 case DOIO_SUCCESS:
3097 break;
3098 }
3099
3100 return (result);
3101 }
3102
3103 isc_result_t
isc__socket_send(isc_socket_t * sock,isc_region_t * region,isc_task_t * task,isc_taskaction_t action,void * arg)3104 isc__socket_send(isc_socket_t *sock, isc_region_t *region,
3105 isc_task_t *task, isc_taskaction_t action, void *arg)
3106 {
3107 /*
3108 * REQUIRE() checking is performed in isc_socket_sendto().
3109 */
3110 return (isc_socket_sendto(sock, region, task, action, arg, NULL,
3111 NULL));
3112 }
3113
3114 isc_result_t
isc__socket_sendto(isc_socket_t * sock,isc_region_t * region,isc_task_t * task,isc_taskaction_t action,void * arg,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo)3115 isc__socket_sendto(isc_socket_t *sock, isc_region_t *region,
3116 isc_task_t *task, isc_taskaction_t action, void *arg,
3117 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
3118 {
3119 isc_socketevent_t *dev;
3120 isc_socketmgr_t *manager;
3121 isc_result_t ret;
3122
3123 REQUIRE(VALID_SOCKET(sock));
3124 REQUIRE(sock->type != isc_sockettype_fdwatch);
3125
3126 LOCK(&sock->lock);
3127 CONSISTENT(sock);
3128
3129 /*
3130 * make sure that the socket's not closed
3131 */
3132 if (sock->fd == INVALID_SOCKET) {
3133 UNLOCK(&sock->lock);
3134 return (ISC_R_CONNREFUSED);
3135 }
3136 REQUIRE(region != NULL);
3137 REQUIRE(task != NULL);
3138 REQUIRE(action != NULL);
3139
3140 manager = sock->manager;
3141 REQUIRE(VALID_MANAGER(manager));
3142
3143 INSIST(sock->bound);
3144
3145 dev = allocate_socketevent(manager->mctx, sock,
3146 ISC_SOCKEVENT_SENDDONE, action, arg);
3147 if (dev == NULL) {
3148 UNLOCK(&sock->lock);
3149 return (ISC_R_NOMEMORY);
3150 }
3151 dev->region = *region;
3152
3153 ret = socket_send(sock, dev, task, address, pktinfo, 0);
3154 UNLOCK(&sock->lock);
3155 return (ret);
3156 }
3157
3158 isc_result_t
isc__socket_sendv(isc_socket_t * sock,isc_bufferlist_t * buflist,isc_task_t * task,isc_taskaction_t action,void * arg)3159 isc__socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist,
3160 isc_task_t *task, isc_taskaction_t action, void *arg)
3161 {
3162 return (isc_socket_sendtov2(sock, buflist, task, action, arg, NULL,
3163 NULL, 0));
3164 }
3165
3166 isc_result_t
isc__socket_sendtov(isc_socket_t * sock,isc_bufferlist_t * buflist,isc_task_t * task,isc_taskaction_t action,void * arg,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo)3167 isc__socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist,
3168 isc_task_t *task, isc_taskaction_t action, void *arg,
3169 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
3170 {
3171 return (isc_socket_sendtov2(sock, buflist, task, action, arg, address,
3172 pktinfo, 0));
3173 }
3174
3175 isc_result_t
isc__socket_sendtov2(isc_socket_t * sock,isc_bufferlist_t * buflist,isc_task_t * task,isc_taskaction_t action,void * arg,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo,unsigned int flags)3176 isc__socket_sendtov2(isc_socket_t *sock, isc_bufferlist_t *buflist,
3177 isc_task_t *task, isc_taskaction_t action, void *arg,
3178 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
3179 unsigned int flags)
3180 {
3181 isc_socketevent_t *dev;
3182 isc_socketmgr_t *manager;
3183 unsigned int iocount;
3184 isc_buffer_t *buffer;
3185 isc_result_t ret;
3186
3187 REQUIRE(VALID_SOCKET(sock));
3188
3189 LOCK(&sock->lock);
3190 CONSISTENT(sock);
3191
3192 /*
3193 * make sure that the socket's not closed
3194 */
3195 if (sock->fd == INVALID_SOCKET) {
3196 UNLOCK(&sock->lock);
3197 return (ISC_R_CONNREFUSED);
3198 }
3199 REQUIRE(buflist != NULL);
3200 REQUIRE(!ISC_LIST_EMPTY(*buflist));
3201 REQUIRE(task != NULL);
3202 REQUIRE(action != NULL);
3203
3204 manager = sock->manager;
3205 REQUIRE(VALID_MANAGER(manager));
3206
3207 iocount = isc_bufferlist_usedcount(buflist);
3208 REQUIRE(iocount > 0);
3209
3210 dev = allocate_socketevent(manager->mctx, sock,
3211 ISC_SOCKEVENT_SENDDONE, action, arg);
3212 if (dev == NULL) {
3213 UNLOCK(&sock->lock);
3214 return (ISC_R_NOMEMORY);
3215 }
3216
3217 /*
3218 * Move each buffer from the passed in list to our internal one.
3219 */
3220 buffer = ISC_LIST_HEAD(*buflist);
3221 while (buffer != NULL) {
3222 ISC_LIST_DEQUEUE(*buflist, buffer, link);
3223 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
3224 buffer = ISC_LIST_HEAD(*buflist);
3225 }
3226
3227 ret = socket_send(sock, dev, task, address, pktinfo, flags);
3228 UNLOCK(&sock->lock);
3229 return (ret);
3230 }
3231
3232 isc_result_t
isc__socket_sendto2(isc_socket_t * sock,isc_region_t * region,isc_task_t * task,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo,isc_socketevent_t * event,unsigned int flags)3233 isc__socket_sendto2(isc_socket_t *sock, isc_region_t *region,
3234 isc_task_t *task,
3235 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
3236 isc_socketevent_t *event, unsigned int flags)
3237 {
3238 isc_result_t ret;
3239
3240 REQUIRE(VALID_SOCKET(sock));
3241 LOCK(&sock->lock);
3242 CONSISTENT(sock);
3243
3244 REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE|ISC_SOCKFLAG_NORETRY)) == 0);
3245 if ((flags & ISC_SOCKFLAG_NORETRY) != 0)
3246 REQUIRE(sock->type == isc_sockettype_udp);
3247 event->ev_sender = sock;
3248 event->result = ISC_R_UNEXPECTED;
3249 /*
3250 * make sure that the socket's not closed
3251 */
3252 if (sock->fd == INVALID_SOCKET) {
3253 UNLOCK(&sock->lock);
3254 return (ISC_R_CONNREFUSED);
3255 }
3256 ISC_LIST_INIT(event->bufferlist);
3257 event->region = *region;
3258 event->n = 0;
3259 event->offset = 0;
3260 event->attributes = 0;
3261
3262 ret = socket_send(sock, event, task, address, pktinfo, flags);
3263 UNLOCK(&sock->lock);
3264 return (ret);
3265 }
3266
3267 isc_result_t
isc__socket_bind(isc_socket_t * sock,isc_sockaddr_t * sockaddr,unsigned int options)3268 isc__socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr,
3269 unsigned int options) {
3270 int bind_errno;
3271 char strbuf[ISC_STRERRORSIZE];
3272 int on = 1;
3273
3274 REQUIRE(VALID_SOCKET(sock));
3275 LOCK(&sock->lock);
3276 CONSISTENT(sock);
3277
3278 /*
3279 * make sure that the socket's not closed
3280 */
3281 if (sock->fd == INVALID_SOCKET) {
3282 UNLOCK(&sock->lock);
3283 return (ISC_R_CONNREFUSED);
3284 }
3285
3286 INSIST(!sock->bound);
3287 INSIST(!sock->dupped);
3288
3289 if (sock->pf != sockaddr->type.sa.sa_family) {
3290 UNLOCK(&sock->lock);
3291 return (ISC_R_FAMILYMISMATCH);
3292 }
3293 /*
3294 * Only set SO_REUSEADDR when we want a specific port.
3295 */
3296 if ((options & ISC_SOCKET_REUSEADDRESS) != 0 &&
3297 isc_sockaddr_getport(sockaddr) != (in_port_t)0 &&
3298 setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
3299 sizeof(on)) < 0) {
3300 UNEXPECTED_ERROR(__FILE__, __LINE__,
3301 "setsockopt(%d) %s", sock->fd,
3302 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
3303 ISC_MSG_FAILED, "failed"));
3304 /* Press on... */
3305 }
3306 if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) {
3307 bind_errno = WSAGetLastError();
3308 UNLOCK(&sock->lock);
3309 switch (bind_errno) {
3310 case WSAEACCES:
3311 return (ISC_R_NOPERM);
3312 case WSAEADDRNOTAVAIL:
3313 return (ISC_R_ADDRNOTAVAIL);
3314 case WSAEADDRINUSE:
3315 return (ISC_R_ADDRINUSE);
3316 case WSAEINVAL:
3317 return (ISC_R_BOUND);
3318 default:
3319 isc__strerror(bind_errno, strbuf, sizeof(strbuf));
3320 UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s",
3321 strbuf);
3322 return (ISC_R_UNEXPECTED);
3323 }
3324 }
3325
3326 socket_log(__LINE__, sock, sockaddr, TRACE,
3327 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "bound");
3328 sock->bound = 1;
3329
3330 UNLOCK(&sock->lock);
3331 return (ISC_R_SUCCESS);
3332 }
3333
3334 isc_result_t
isc__socket_filter(isc_socket_t * sock,const char * filter)3335 isc__socket_filter(isc_socket_t *sock, const char *filter) {
3336 UNUSED(sock);
3337 UNUSED(filter);
3338
3339 REQUIRE(VALID_SOCKET(sock));
3340 return (ISC_R_NOTIMPLEMENTED);
3341 }
3342
3343 /*
3344 * Set up to listen on a given socket. We do this by creating an internal
3345 * event that will be dispatched when the socket has read activity. The
3346 * watcher will send the internal event to the task when there is a new
3347 * connection.
3348 *
3349 * Unlike in read, we don't preallocate a done event here. Every time there
3350 * is a new connection we'll have to allocate a new one anyway, so we might
3351 * as well keep things simple rather than having to track them.
3352 */
3353 isc_result_t
isc__socket_listen(isc_socket_t * sock,unsigned int backlog)3354 isc__socket_listen(isc_socket_t *sock, unsigned int backlog) {
3355 char strbuf[ISC_STRERRORSIZE];
3356 #if defined(ISC_PLATFORM_HAVETFO) && defined(TCP_FASTOPEN)
3357 char on = 1;
3358 #endif
3359
3360 REQUIRE(VALID_SOCKET(sock));
3361
3362 LOCK(&sock->lock);
3363 CONSISTENT(sock);
3364
3365 /*
3366 * make sure that the socket's not closed
3367 */
3368 if (sock->fd == INVALID_SOCKET) {
3369 UNLOCK(&sock->lock);
3370 return (ISC_R_CONNREFUSED);
3371 }
3372
3373 REQUIRE(!sock->listener);
3374 REQUIRE(sock->bound);
3375 REQUIRE(sock->type == isc_sockettype_tcp);
3376
3377 if (backlog == 0)
3378 backlog = SOMAXCONN;
3379
3380 if (listen(sock->fd, (int)backlog) < 0) {
3381 UNLOCK(&sock->lock);
3382 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
3383
3384 UNEXPECTED_ERROR(__FILE__, __LINE__, "listen: %s", strbuf);
3385
3386 return (ISC_R_UNEXPECTED);
3387 }
3388
3389 #if defined(ISC_PLATFORM_HAVETFO) && defined(TCP_FASTOPEN)
3390 if (setsockopt(sock->fd, IPPROTO_TCP, TCP_FASTOPEN,
3391 &on, sizeof(on)) < 0) {
3392 isc__strerror(errno, strbuf, sizeof(strbuf));
3393 UNEXPECTED_ERROR(__FILE__, __LINE__,
3394 "setsockopt(%d, TCP_FASTOPEN) failed with %s",
3395 sock->fd, strbuf);
3396 /* TCP_FASTOPEN is experimental so ignore failures */
3397 }
3398 #endif
3399
3400 socket_log(__LINE__, sock, NULL, TRACE,
3401 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "listening");
3402 sock->listener = 1;
3403 _set_state(sock, SOCK_LISTEN);
3404
3405 UNLOCK(&sock->lock);
3406 return (ISC_R_SUCCESS);
3407 }
3408
3409 /*
3410 * This should try to do aggressive accept() XXXMLG
3411 */
3412 isc_result_t
isc__socket_accept(isc_socket_t * sock,isc_task_t * task,isc_taskaction_t action,void * arg)3413 isc__socket_accept(isc_socket_t *sock,
3414 isc_task_t *task, isc_taskaction_t action, void *arg)
3415 {
3416 isc_socket_newconnev_t *adev;
3417 isc_socketmgr_t *manager;
3418 isc_task_t *ntask = NULL;
3419 isc_socket_t *nsock;
3420 isc_result_t result;
3421 IoCompletionInfo *lpo;
3422
3423 REQUIRE(VALID_SOCKET(sock));
3424
3425 manager = sock->manager;
3426 REQUIRE(VALID_MANAGER(manager));
3427
3428 LOCK(&sock->lock);
3429 CONSISTENT(sock);
3430
3431 /*
3432 * make sure that the socket's not closed
3433 */
3434 if (sock->fd == INVALID_SOCKET) {
3435 UNLOCK(&sock->lock);
3436 return (ISC_R_CONNREFUSED);
3437 }
3438
3439 REQUIRE(sock->listener);
3440
3441 /*
3442 * Sender field is overloaded here with the task we will be sending
3443 * this event to. Just before the actual event is delivered the
3444 * actual ev_sender will be touched up to be the socket.
3445 */
3446 adev = (isc_socket_newconnev_t *)
3447 isc_event_allocate(manager->mctx, task, ISC_SOCKEVENT_NEWCONN,
3448 action, arg, sizeof(*adev));
3449 if (adev == NULL) {
3450 UNLOCK(&sock->lock);
3451 return (ISC_R_NOMEMORY);
3452 }
3453 ISC_LINK_INIT(adev, ev_link);
3454
3455 result = allocate_socket(manager, sock->type, &nsock);
3456 if (result != ISC_R_SUCCESS) {
3457 isc_event_free((isc_event_t **)&adev);
3458 UNLOCK(&sock->lock);
3459 return (result);
3460 }
3461
3462 /*
3463 * AcceptEx() requires we pass in a socket.
3464 */
3465 nsock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP);
3466 if (nsock->fd == INVALID_SOCKET) {
3467 free_socket(&nsock, __LINE__);
3468 isc_event_free((isc_event_t **)&adev);
3469 UNLOCK(&sock->lock);
3470 return (ISC_R_FAILURE); // XXXMLG need real error message
3471 }
3472
3473 /*
3474 * Attach to socket and to task.
3475 */
3476 isc_task_attach(task, &ntask);
3477 if (isc_task_exiting(ntask)) {
3478 free_socket(&nsock, __LINE__);
3479 isc_task_detach(&ntask);
3480 isc_event_free(ISC_EVENT_PTR(&adev));
3481 UNLOCK(&sock->lock);
3482 return (ISC_R_SHUTTINGDOWN);
3483 }
3484 nsock->references++;
3485
3486 adev->ev_sender = ntask;
3487 adev->newsocket = nsock;
3488 _set_state(nsock, SOCK_ACCEPT);
3489
3490 /*
3491 * Queue io completion for an accept().
3492 */
3493 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
3494 HEAP_ZERO_MEMORY,
3495 sizeof(IoCompletionInfo));
3496 RUNTIME_CHECK(lpo != NULL);
3497 lpo->acceptbuffer = (void *)HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY,
3498 (sizeof(SOCKADDR_STORAGE) + 16) * 2);
3499 RUNTIME_CHECK(lpo->acceptbuffer != NULL);
3500
3501 lpo->adev = adev;
3502 lpo->request_type = SOCKET_ACCEPT;
3503
3504 ISCAcceptEx(sock->fd,
3505 nsock->fd, /* Accepted Socket */
3506 lpo->acceptbuffer, /* Buffer for initial Recv */
3507 0, /* Length of Buffer */
3508 sizeof(SOCKADDR_STORAGE) + 16, /* Local address length + 16 */
3509 sizeof(SOCKADDR_STORAGE) + 16, /* Remote address length + 16 */
3510 (LPDWORD)&lpo->received_bytes, /* Bytes Recved */
3511 (LPOVERLAPPED)lpo /* Overlapped structure */
3512 );
3513 iocompletionport_update(nsock);
3514
3515 socket_log(__LINE__, sock, NULL, TRACE,
3516 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND,
3517 "accepting for nsock %p fd %d", nsock, nsock->fd);
3518
3519 /*
3520 * Enqueue the event
3521 */
3522 ISC_LIST_ENQUEUE(sock->accept_list, adev, ev_link);
3523 sock->pending_accept++;
3524 sock->pending_iocp++;
3525
3526 UNLOCK(&sock->lock);
3527 return (ISC_R_SUCCESS);
3528 }
3529
3530 isc_result_t
isc__socket_connect(isc_socket_t * sock,isc_sockaddr_t * addr,isc_task_t * task,isc_taskaction_t action,void * arg)3531 isc__socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr,
3532 isc_task_t *task, isc_taskaction_t action, void *arg)
3533 {
3534 char strbuf[ISC_STRERRORSIZE];
3535 isc_socket_connev_t *cdev;
3536 isc_task_t *ntask = NULL;
3537 isc_socketmgr_t *manager;
3538 IoCompletionInfo *lpo;
3539 int bind_errno;
3540
3541 REQUIRE(VALID_SOCKET(sock));
3542 REQUIRE(addr != NULL);
3543 REQUIRE(task != NULL);
3544 REQUIRE(action != NULL);
3545
3546 manager = sock->manager;
3547 REQUIRE(VALID_MANAGER(manager));
3548 REQUIRE(addr != NULL);
3549
3550 if (isc_sockaddr_ismulticast(addr))
3551 return (ISC_R_MULTICAST);
3552
3553 LOCK(&sock->lock);
3554 CONSISTENT(sock);
3555
3556 /*
3557 * make sure that the socket's not closed
3558 */
3559 if (sock->fd == INVALID_SOCKET) {
3560 UNLOCK(&sock->lock);
3561 return (ISC_R_CONNREFUSED);
3562 }
3563
3564 /*
3565 * Windows sockets won't connect unless the socket is bound.
3566 */
3567 if (!sock->bound) {
3568 isc_sockaddr_t any;
3569
3570 isc_sockaddr_anyofpf(&any, isc_sockaddr_pf(addr));
3571 if (bind(sock->fd, &any.type.sa, any.length) < 0) {
3572 bind_errno = WSAGetLastError();
3573 UNLOCK(&sock->lock);
3574 switch (bind_errno) {
3575 case WSAEACCES:
3576 return (ISC_R_NOPERM);
3577 case WSAEADDRNOTAVAIL:
3578 return (ISC_R_ADDRNOTAVAIL);
3579 case WSAEADDRINUSE:
3580 return (ISC_R_ADDRINUSE);
3581 case WSAEINVAL:
3582 return (ISC_R_BOUND);
3583 default:
3584 isc__strerror(bind_errno, strbuf,
3585 sizeof(strbuf));
3586 UNEXPECTED_ERROR(__FILE__, __LINE__,
3587 "bind: %s", strbuf);
3588 return (ISC_R_UNEXPECTED);
3589 }
3590 }
3591 sock->bound = 1;
3592 }
3593
3594 cdev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock,
3595 ISC_SOCKEVENT_CONNECT,
3596 action, arg,
3597 sizeof(*cdev));
3598 if (cdev == NULL) {
3599 UNLOCK(&sock->lock);
3600 return (ISC_R_NOMEMORY);
3601 }
3602 ISC_LINK_INIT(cdev, ev_link);
3603
3604 if (sock->connected) {
3605 INSIST(isc_sockaddr_equal(&sock->address, addr));
3606 cdev->result = ISC_R_SUCCESS;
3607 isc_task_send(task, ISC_EVENT_PTR(&cdev));
3608
3609 UNLOCK(&sock->lock);
3610 return (ISC_R_SUCCESS);
3611 }
3612
3613 if ((sock->type == isc_sockettype_tcp) && !sock->pending_connect) {
3614 /*
3615 * Queue io completion for an accept().
3616 */
3617 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
3618 HEAP_ZERO_MEMORY,
3619 sizeof(IoCompletionInfo));
3620 lpo->cdev = cdev;
3621 lpo->request_type = SOCKET_CONNECT;
3622
3623 sock->address = *addr;
3624 ISCConnectEx(sock->fd, &addr->type.sa, addr->length,
3625 NULL, 0, NULL, (LPOVERLAPPED)lpo);
3626
3627 /*
3628 * Attach to task.
3629 */
3630 isc_task_attach(task, &ntask);
3631 cdev->ev_sender = ntask;
3632
3633 sock->pending_connect = 1;
3634 _set_state(sock, SOCK_CONNECT);
3635
3636 /*
3637 * Enqueue the request.
3638 */
3639 INSIST(!ISC_LINK_LINKED(cdev, ev_link));
3640 ISC_LIST_ENQUEUE(sock->connect_list, cdev, ev_link);
3641 sock->pending_iocp++;
3642 } else if (sock->type == isc_sockettype_tcp) {
3643 INSIST(sock->pending_connect);
3644 INSIST(isc_sockaddr_equal(&sock->address, addr));
3645 isc_task_attach(task, &ntask);
3646 cdev->ev_sender = ntask;
3647 INSIST(!ISC_LINK_LINKED(cdev, ev_link));
3648 ISC_LIST_ENQUEUE(sock->connect_list, cdev, ev_link);
3649 } else {
3650 REQUIRE(!sock->pending_connect);
3651 WSAConnect(sock->fd, &addr->type.sa, addr->length, NULL, NULL, NULL, NULL);
3652 cdev->result = ISC_R_SUCCESS;
3653 isc_task_send(task, (isc_event_t **)&cdev);
3654 }
3655 CONSISTENT(sock);
3656 UNLOCK(&sock->lock);
3657
3658 return (ISC_R_SUCCESS);
3659 }
3660
3661 isc_result_t
isc__socket_getpeername(isc_socket_t * sock,isc_sockaddr_t * addressp)3662 isc__socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3663 isc_result_t result;
3664
3665 REQUIRE(VALID_SOCKET(sock));
3666 REQUIRE(addressp != NULL);
3667
3668 LOCK(&sock->lock);
3669 CONSISTENT(sock);
3670
3671 /*
3672 * make sure that the socket's not closed
3673 */
3674 if (sock->fd == INVALID_SOCKET) {
3675 UNLOCK(&sock->lock);
3676 return (ISC_R_CONNREFUSED);
3677 }
3678
3679 if (sock->connected) {
3680 *addressp = sock->address;
3681 result = ISC_R_SUCCESS;
3682 } else {
3683 result = ISC_R_NOTCONNECTED;
3684 }
3685
3686 UNLOCK(&sock->lock);
3687
3688 return (result);
3689 }
3690
3691 isc_result_t
isc__socket_getsockname(isc_socket_t * sock,isc_sockaddr_t * addressp)3692 isc__socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3693 ISC_SOCKADDR_LEN_T len;
3694 isc_result_t result;
3695 char strbuf[ISC_STRERRORSIZE];
3696
3697 REQUIRE(VALID_SOCKET(sock));
3698 REQUIRE(addressp != NULL);
3699
3700 LOCK(&sock->lock);
3701 CONSISTENT(sock);
3702
3703 /*
3704 * make sure that the socket's not closed
3705 */
3706 if (sock->fd == INVALID_SOCKET) {
3707 UNLOCK(&sock->lock);
3708 return (ISC_R_CONNREFUSED);
3709 }
3710
3711 if (!sock->bound) {
3712 result = ISC_R_NOTBOUND;
3713 goto out;
3714 }
3715
3716 result = ISC_R_SUCCESS;
3717
3718 len = sizeof(addressp->type);
3719 if (getsockname(sock->fd, &addressp->type.sa, (void *)&len) < 0) {
3720 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
3721 UNEXPECTED_ERROR(__FILE__, __LINE__, "getsockname: %s",
3722 strbuf);
3723 result = ISC_R_UNEXPECTED;
3724 goto out;
3725 }
3726 addressp->length = (unsigned int)len;
3727
3728 out:
3729 UNLOCK(&sock->lock);
3730
3731 return (result);
3732 }
3733
3734 /*
3735 * Run through the list of events on this socket, and cancel the ones
3736 * queued for task "task" of type "how". "how" is a bitmask.
3737 */
3738 void
isc__socket_cancel(isc_socket_t * sock,isc_task_t * task,unsigned int how)3739 isc__socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) {
3740
3741 REQUIRE(VALID_SOCKET(sock));
3742
3743 /*
3744 * Quick exit if there is nothing to do. Don't even bother locking
3745 * in this case.
3746 */
3747 if (how == 0)
3748 return;
3749
3750 LOCK(&sock->lock);
3751 CONSISTENT(sock);
3752
3753 /*
3754 * make sure that the socket's not closed
3755 */
3756 if (sock->fd == INVALID_SOCKET) {
3757 UNLOCK(&sock->lock);
3758 return;
3759 }
3760
3761 /*
3762 * All of these do the same thing, more or less.
3763 * Each will:
3764 * o If the internal event is marked as "posted" try to
3765 * remove it from the task's queue. If this fails, mark it
3766 * as canceled instead, and let the task clean it up later.
3767 * o For each I/O request for that task of that type, post
3768 * its done event with status of "ISC_R_CANCELED".
3769 * o Reset any state needed.
3770 */
3771
3772 if ((how & ISC_SOCKCANCEL_RECV) != 0) {
3773 isc_socketevent_t *dev;
3774 isc_socketevent_t *next;
3775 isc_task_t *current_task;
3776
3777 dev = ISC_LIST_HEAD(sock->recv_list);
3778 while (dev != NULL) {
3779 current_task = dev->ev_sender;
3780 next = ISC_LIST_NEXT(dev, ev_link);
3781 if ((task == NULL) || (task == current_task)) {
3782 dev->result = ISC_R_CANCELED;
3783 send_recvdone_event(sock, &dev);
3784 }
3785 dev = next;
3786 }
3787 }
3788 how &= ~ISC_SOCKCANCEL_RECV;
3789
3790 if ((how & ISC_SOCKCANCEL_SEND) != 0) {
3791 isc_socketevent_t *dev;
3792 isc_socketevent_t *next;
3793 isc_task_t *current_task;
3794
3795 dev = ISC_LIST_HEAD(sock->send_list);
3796
3797 while (dev != NULL) {
3798 current_task = dev->ev_sender;
3799 next = ISC_LIST_NEXT(dev, ev_link);
3800 if ((task == NULL) || (task == current_task)) {
3801 dev->result = ISC_R_CANCELED;
3802 send_senddone_event(sock, &dev);
3803 }
3804 dev = next;
3805 }
3806 }
3807 how &= ~ISC_SOCKCANCEL_SEND;
3808
3809 if (((how & ISC_SOCKCANCEL_ACCEPT) != 0)
3810 && !ISC_LIST_EMPTY(sock->accept_list)) {
3811 isc_socket_newconnev_t *dev;
3812 isc_socket_newconnev_t *next;
3813 isc_task_t *current_task;
3814
3815 dev = ISC_LIST_HEAD(sock->accept_list);
3816 while (dev != NULL) {
3817 current_task = dev->ev_sender;
3818 next = ISC_LIST_NEXT(dev, ev_link);
3819
3820 if ((task == NULL) || (task == current_task)) {
3821
3822 dev->newsocket->references--;
3823 closesocket(dev->newsocket->fd);
3824 dev->newsocket->fd = INVALID_SOCKET;
3825 free_socket(&dev->newsocket, __LINE__);
3826
3827 dev->result = ISC_R_CANCELED;
3828 send_acceptdone_event(sock, &dev);
3829 }
3830
3831 dev = next;
3832 }
3833 }
3834 how &= ~ISC_SOCKCANCEL_ACCEPT;
3835
3836 if (((how & ISC_SOCKCANCEL_CONNECT) != 0)
3837 && !ISC_LIST_EMPTY(sock->connect_list)) {
3838 isc_socket_connev_t *dev;
3839 isc_socket_connev_t *next;
3840 isc_task_t *current_task;
3841
3842 INSIST(sock->pending_connect);
3843
3844 dev = ISC_LIST_HEAD(sock->connect_list);
3845
3846 while (dev != NULL) {
3847 current_task = dev->ev_sender;
3848 next = ISC_LIST_NEXT(dev, ev_link);
3849 if ((task == NULL) || (task == current_task)) {
3850 dev->result = ISC_R_CANCELED;
3851 send_connectdone_event(sock, &dev);
3852 }
3853 dev = next;
3854 }
3855 closesocket(sock->fd);
3856 sock->fd = INVALID_SOCKET;
3857 _set_state(sock, SOCK_CLOSED);
3858 }
3859 how &= ~ISC_SOCKCANCEL_CONNECT;
3860 UNUSED(how);
3861
3862 maybe_free_socket(&sock, __LINE__);
3863 }
3864
3865 isc_sockettype_t
isc__socket_gettype(isc_socket_t * sock)3866 isc__socket_gettype(isc_socket_t *sock) {
3867 isc_sockettype_t type;
3868
3869 REQUIRE(VALID_SOCKET(sock));
3870
3871 LOCK(&sock->lock);
3872
3873 /*
3874 * make sure that the socket's not closed
3875 */
3876 if (sock->fd == INVALID_SOCKET) {
3877 UNLOCK(&sock->lock);
3878 return (ISC_R_CONNREFUSED);
3879 }
3880
3881 type = sock->type;
3882 UNLOCK(&sock->lock);
3883 return (type);
3884 }
3885
3886 bool
isc__socket_isbound(isc_socket_t * sock)3887 isc__socket_isbound(isc_socket_t *sock) {
3888 bool val;
3889
3890 REQUIRE(VALID_SOCKET(sock));
3891
3892 LOCK(&sock->lock);
3893 CONSISTENT(sock);
3894
3895 /*
3896 * make sure that the socket's not closed
3897 */
3898 if (sock->fd == INVALID_SOCKET) {
3899 UNLOCK(&sock->lock);
3900 return (false);
3901 }
3902
3903 val = ((sock->bound) ? true : false);
3904 UNLOCK(&sock->lock);
3905
3906 return (val);
3907 }
3908
3909 void
isc__socket_ipv6only(isc_socket_t * sock,bool yes)3910 isc__socket_ipv6only(isc_socket_t *sock, bool yes) {
3911 #if defined(IPV6_V6ONLY)
3912 int onoff = yes ? 1 : 0;
3913 #else
3914 UNUSED(yes);
3915 #endif
3916
3917 REQUIRE(VALID_SOCKET(sock));
3918
3919 #ifdef IPV6_V6ONLY
3920 if (sock->pf == AF_INET6) {
3921 (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY,
3922 (char *)&onoff, sizeof(onoff));
3923 }
3924 #endif
3925 }
3926
3927 void
isc__socket_dscp(isc_socket_t * sock,isc_dscp_t dscp)3928 isc__socket_dscp(isc_socket_t *sock, isc_dscp_t dscp) {
3929 #if !defined(IP_TOS) && !defined(IPV6_TCLASS)
3930 UNUSED(dscp);
3931 #else
3932 if (dscp < 0)
3933 return;
3934
3935 dscp <<= 2;
3936 dscp &= 0xff;
3937 #endif
3938
3939 REQUIRE(VALID_SOCKET(sock));
3940
3941 #ifdef IP_TOS
3942 if (sock->pf == AF_INET) {
3943 (void)setsockopt(sock->fd, IPPROTO_IP, IP_TOS,
3944 (char *)&dscp, sizeof(dscp));
3945 }
3946 #endif
3947 #ifdef IPV6_TCLASS
3948 if (sock->pf == AF_INET6) {
3949 (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_TCLASS,
3950 (char *)&dscp, sizeof(dscp));
3951 }
3952 #endif
3953 }
3954
3955 void
isc__socket_cleanunix(isc_sockaddr_t * addr,bool active)3956 isc__socket_cleanunix(isc_sockaddr_t *addr, bool active) {
3957 UNUSED(addr);
3958 UNUSED(active);
3959 }
3960
3961 isc_result_t
isc__socket_permunix(isc_sockaddr_t * addr,uint32_t perm,uint32_t owner,uint32_t group)3962 isc__socket_permunix(isc_sockaddr_t *addr, uint32_t perm,
3963 uint32_t owner, uint32_t group)
3964 {
3965 UNUSED(addr);
3966 UNUSED(perm);
3967 UNUSED(owner);
3968 UNUSED(group);
3969 return (ISC_R_NOTIMPLEMENTED);
3970 }
3971
3972 void
isc__socket_setname(isc_socket_t * socket,const char * name,void * tag)3973 isc__socket_setname(isc_socket_t *socket, const char *name, void *tag) {
3974
3975 /*
3976 * Name 'socket'.
3977 */
3978
3979 REQUIRE(VALID_SOCKET(socket));
3980
3981 LOCK(&socket->lock);
3982 strlcpy(socket->name, name, sizeof(socket->name));
3983 socket->tag = tag;
3984 UNLOCK(&socket->lock);
3985 }
3986
3987 const char *
isc__socket_getname(isc_socket_t * socket)3988 isc__socket_getname(isc_socket_t *socket) {
3989 return (socket->name);
3990 }
3991
3992 void *
isc__socket_gettag(isc_socket_t * socket)3993 isc__socket_gettag(isc_socket_t *socket) {
3994 return (socket->tag);
3995 }
3996
3997 int
isc__socket_getfd(isc_socket_t * socket)3998 isc__socket_getfd(isc_socket_t *socket) {
3999 return ((short) socket->fd);
4000 }
4001
4002 void
isc__socketmgr_setreserved(isc_socketmgr_t * manager,uint32_t reserved)4003 isc__socketmgr_setreserved(isc_socketmgr_t *manager, uint32_t reserved) {
4004 UNUSED(manager);
4005 UNUSED(reserved);
4006 }
4007
4008 void
isc___socketmgr_maxudp(isc_socketmgr_t * manager,unsigned int maxudp)4009 isc___socketmgr_maxudp(isc_socketmgr_t *manager, unsigned int maxudp) {
4010
4011 REQUIRE(VALID_MANAGER(manager));
4012
4013 manager->maxudp = maxudp;
4014 }
4015
4016 isc_socketevent_t *
isc_socket_socketevent(isc_mem_t * mctx,void * sender,isc_eventtype_t eventtype,isc_taskaction_t action,void * arg)4017 isc_socket_socketevent(isc_mem_t *mctx, void *sender,
4018 isc_eventtype_t eventtype, isc_taskaction_t action,
4019 void *arg)
4020 {
4021 return (allocate_socketevent(mctx, sender, eventtype, action, arg));
4022 }
4023
4024 #ifdef HAVE_LIBXML2
4025
4026 static const char *
_socktype(isc_sockettype_t type)4027 _socktype(isc_sockettype_t type) {
4028 if (type == isc_sockettype_udp)
4029 return ("udp");
4030 else if (type == isc_sockettype_tcp)
4031 return ("tcp");
4032 else if (type == isc_sockettype_unix)
4033 return ("unix");
4034 else if (type == isc_sockettype_fdwatch)
4035 return ("fdwatch");
4036 else
4037 return ("not-initialized");
4038 }
4039
4040 #define TRY0(a) do { xmlrc = (a); if (xmlrc < 0) goto error; } while(0)
4041 int
isc_socketmgr_renderxml(isc_socketmgr_t * mgr,xmlTextWriterPtr writer)4042 isc_socketmgr_renderxml(isc_socketmgr_t *mgr, xmlTextWriterPtr writer)
4043 {
4044 isc_socket_t *sock = NULL;
4045 char peerbuf[ISC_SOCKADDR_FORMATSIZE];
4046 isc_sockaddr_t addr;
4047 ISC_SOCKADDR_LEN_T len;
4048 int xmlrc;
4049
4050 LOCK(&mgr->lock);
4051
4052 #ifndef ISC_PLATFORM_USETHREADS
4053 TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "references"));
4054 TRY0(xmlTextWriterWriteFormatString(writer, "%d", mgr->refs));
4055 TRY0(xmlTextWriterEndElement(writer));
4056 #endif
4057
4058 TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "sockets"));
4059 sock = ISC_LIST_HEAD(mgr->socklist);
4060 while (sock != NULL) {
4061 LOCK(&sock->lock);
4062 TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "socket"));
4063
4064 TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "id"));
4065 TRY0(xmlTextWriterWriteFormatString(writer, "%p", sock));
4066 TRY0(xmlTextWriterEndElement(writer));
4067
4068 if (sock->name[0] != 0) {
4069 TRY0(xmlTextWriterStartElement(writer,
4070 ISC_XMLCHAR "name"));
4071 TRY0(xmlTextWriterWriteFormatString(writer, "%s",
4072 sock->name));
4073 TRY0(xmlTextWriterEndElement(writer)); /* name */
4074 }
4075
4076 TRY0(xmlTextWriterStartElement(writer,
4077 ISC_XMLCHAR "references"));
4078 TRY0(xmlTextWriterWriteFormatString(writer, "%d",
4079 sock->references));
4080 TRY0(xmlTextWriterEndElement(writer));
4081
4082 TRY0(xmlTextWriterWriteElement(writer, ISC_XMLCHAR "type",
4083 ISC_XMLCHAR _socktype(sock->type)));
4084
4085 if (sock->connected) {
4086 isc_sockaddr_format(&sock->address, peerbuf,
4087 sizeof(peerbuf));
4088 TRY0(xmlTextWriterWriteElement(writer,
4089 ISC_XMLCHAR "peer-address",
4090 ISC_XMLCHAR peerbuf));
4091 }
4092
4093 len = sizeof(addr);
4094 if (getsockname(sock->fd, &addr.type.sa, (void *)&len) == 0) {
4095 isc_sockaddr_format(&addr, peerbuf, sizeof(peerbuf));
4096 TRY0(xmlTextWriterWriteElement(writer,
4097 ISC_XMLCHAR "local-address",
4098 ISC_XMLCHAR peerbuf));
4099 }
4100
4101 TRY0(xmlTextWriterStartElement(writer, ISC_XMLCHAR "states"));
4102 if (sock->pending_recv)
4103 TRY0(xmlTextWriterWriteElement(writer,
4104 ISC_XMLCHAR "state",
4105 ISC_XMLCHAR "pending-receive"));
4106 if (sock->pending_send)
4107 TRY0(xmlTextWriterWriteElement(writer,
4108 ISC_XMLCHAR "state",
4109 ISC_XMLCHAR "pending-send"));
4110 if (sock->pending_accept)
4111 TRY0(xmlTextWriterWriteElement(writer,
4112 ISC_XMLCHAR "state",
4113 ISC_XMLCHAR "pending_accept"));
4114 if (sock->listener)
4115 TRY0(xmlTextWriterWriteElement(writer,
4116 ISC_XMLCHAR "state",
4117 ISC_XMLCHAR "listener"));
4118 if (sock->connected)
4119 TRY0(xmlTextWriterWriteElement(writer,
4120 ISC_XMLCHAR "state",
4121 ISC_XMLCHAR "connected"));
4122 if (sock->pending_connect)
4123 TRY0(xmlTextWriterWriteElement(writer,
4124 ISC_XMLCHAR "state",
4125 ISC_XMLCHAR "connecting"));
4126 if (sock->bound)
4127 TRY0(xmlTextWriterWriteElement(writer,
4128 ISC_XMLCHAR "state",
4129 ISC_XMLCHAR "bound"));
4130
4131 TRY0(xmlTextWriterEndElement(writer)); /* states */
4132
4133 TRY0(xmlTextWriterEndElement(writer)); /* socket */
4134
4135 UNLOCK(&sock->lock);
4136 sock = ISC_LIST_NEXT(sock, link);
4137 }
4138 TRY0(xmlTextWriterEndElement(writer)); /* sockets */
4139
4140 error:
4141 if (sock != NULL)
4142 UNLOCK(&sock->lock);
4143
4144 UNLOCK(&mgr->lock);
4145
4146 return (xmlrc);
4147 }
4148 #endif /* HAVE_LIBXML2 */
4149
4150 #ifdef HAVE_JSON
4151 #define CHECKMEM(m) do { \
4152 if (m == NULL) { \
4153 result = ISC_R_NOMEMORY;\
4154 goto error;\
4155 } \
4156 } while(0)
4157
4158 isc_result_t
isc_socketmgr_renderjson(isc_socketmgr_t * mgr,json_object * stats)4159 isc_socketmgr_renderjson(isc_socketmgr_t *mgr, json_object *stats) {
4160 isc_result_t result = ISC_R_SUCCESS;
4161 isc_socket_t *sock = NULL;
4162 char peerbuf[ISC_SOCKADDR_FORMATSIZE];
4163 isc_sockaddr_t addr;
4164 ISC_SOCKADDR_LEN_T len;
4165 json_object *obj, *array = json_object_new_array();
4166
4167 CHECKMEM(array);
4168
4169 LOCK(&mgr->lock);
4170
4171 #ifdef USE_SHARED_MANAGER
4172 obj = json_object_new_int(mgr->refs);
4173 CHECKMEM(obj);
4174 json_object_object_add(stats, "references", obj);
4175 #endif /* USE_SHARED_MANAGER */
4176
4177 sock = ISC_LIST_HEAD(mgr->socklist);
4178 while (sock != NULL) {
4179 json_object *states, *entry = json_object_new_object();
4180 char buf[255];
4181
4182 CHECKMEM(entry);
4183 json_object_array_add(array, entry);
4184
4185 LOCK(&sock->lock);
4186
4187 snprintf(buf, sizeof(buf), "%p", sock);
4188 obj = json_object_new_string(buf);
4189 CHECKMEM(obj);
4190 json_object_object_add(entry, "id", obj);
4191
4192 if (sock->name[0] != 0) {
4193 obj = json_object_new_string(sock->name);
4194 CHECKMEM(obj);
4195 json_object_object_add(entry, "name", obj);
4196 }
4197
4198 obj = json_object_new_int(sock->references);
4199 CHECKMEM(obj);
4200 json_object_object_add(entry, "references", obj);
4201
4202 obj = json_object_new_string(_socktype(sock->type));
4203 CHECKMEM(obj);
4204 json_object_object_add(entry, "type", obj);
4205
4206 if (sock->connected) {
4207 isc_sockaddr_format(&sock->address, peerbuf,
4208 sizeof(peerbuf));
4209 obj = json_object_new_string(peerbuf);
4210 CHECKMEM(obj);
4211 json_object_object_add(entry, "peer-address", obj);
4212 }
4213
4214 len = sizeof(addr);
4215 if (getsockname(sock->fd, &addr.type.sa, (void *)&len) == 0) {
4216 isc_sockaddr_format(&addr, peerbuf, sizeof(peerbuf));
4217 obj = json_object_new_string(peerbuf);
4218 CHECKMEM(obj);
4219 json_object_object_add(entry, "local-address", obj);
4220 }
4221
4222 states = json_object_new_array();
4223 CHECKMEM(states);
4224 json_object_object_add(entry, "states", states);
4225
4226 if (sock->pending_recv) {
4227 obj = json_object_new_string("pending-receive");
4228 CHECKMEM(obj);
4229 json_object_array_add(states, obj);
4230 }
4231
4232 if (sock->pending_send) {
4233 obj = json_object_new_string("pending-send");
4234 CHECKMEM(obj);
4235 json_object_array_add(states, obj);
4236 }
4237
4238 if (sock->pending_accept) {
4239 obj = json_object_new_string("pending-accept");
4240 CHECKMEM(obj);
4241 json_object_array_add(states, obj);
4242 }
4243
4244 if (sock->listener) {
4245 obj = json_object_new_string("listener");
4246 CHECKMEM(obj);
4247 json_object_array_add(states, obj);
4248 }
4249
4250 if (sock->connected) {
4251 obj = json_object_new_string("connected");
4252 CHECKMEM(obj);
4253 json_object_array_add(states, obj);
4254 }
4255
4256 if (sock->pending_connect) {
4257 obj = json_object_new_string("connecting");
4258 CHECKMEM(obj);
4259 json_object_array_add(states, obj);
4260 }
4261
4262 if (sock->bound) {
4263 obj = json_object_new_string("bound");
4264 CHECKMEM(obj);
4265 json_object_array_add(states, obj);
4266 }
4267
4268 UNLOCK(&sock->lock);
4269 sock = ISC_LIST_NEXT(sock, link);
4270 }
4271
4272 json_object_object_add(stats, "sockets", array);
4273 array = NULL;
4274 result = ISC_R_SUCCESS;
4275
4276 error:
4277 if (array != NULL)
4278 json_object_put(array);
4279
4280 if (sock != NULL)
4281 UNLOCK(&sock->lock);
4282
4283 UNLOCK(&mgr->lock);
4284
4285 return (result);
4286 }
4287 #endif /* HAVE_JSON */
4288
4289 /*
4290 * Replace ../socket_api.c
4291 */
4292
4293 isc_result_t
isc__socket_register(void)4294 isc__socket_register(void) {
4295 return (ISC_R_SUCCESS);
4296 }
4297
4298 isc_result_t
isc_socketmgr_createinctx(isc_mem_t * mctx,isc_appctx_t * actx,isc_socketmgr_t ** managerp)4299 isc_socketmgr_createinctx(isc_mem_t *mctx, isc_appctx_t *actx,
4300 isc_socketmgr_t **managerp)
4301 {
4302 isc_result_t result;
4303
4304 result = isc_socketmgr_create(mctx, managerp);
4305
4306 if (result == ISC_R_SUCCESS)
4307 isc_appctx_setsocketmgr(actx, *managerp);
4308
4309 return (result);
4310 }
4311