1 /*
2 * Copyright (C) 2004-2012 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 2000-2003 Internet Software Consortium.
4 *
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
16 */
17
18 /* $Id$ */
19
20 /* This code uses functions which are only available on Server 2003 and
21 * higher, and Windows XP and higher.
22 *
23 * This code is by nature multithreaded and takes advantage of various
24 * features to pass on information through the completion port for
25 * when I/O is completed. All sends, receives, accepts, and connects are
26 * completed through the completion port.
27 *
28 * The number of Completion Port Worker threads used is the total number
29 * of CPU's + 1. This increases the likelihood that a Worker Thread is
30 * available for processing a completed request.
31 *
32 * XXXPDM 5 August, 2002
33 */
34
35 #define MAKE_EXTERNAL 1
36 #include <config.h>
37
38 #include <sys/types.h>
39
40 #ifndef _WINSOCKAPI_
41 #define _WINSOCKAPI_ /* Prevent inclusion of winsock.h in windows.h */
42 #endif
43
44 #include <errno.h>
45 #include <stddef.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <unistd.h>
49 #include <io.h>
50 #include <fcntl.h>
51 #include <process.h>
52
53 #include <isc/buffer.h>
54 #include <isc/bufferlist.h>
55 #include <isc/condition.h>
56 #include <isc/list.h>
57 #include <isc/log.h>
58 #include <isc/mem.h>
59 #include <isc/msgs.h>
60 #include <isc/mutex.h>
61 #include <isc/net.h>
62 #include <isc/once.h>
63 #include <isc/os.h>
64 #include <isc/platform.h>
65 #include <isc/print.h>
66 #include <isc/region.h>
67 #include <isc/socket.h>
68 #include <isc/stats.h>
69 #include <isc/strerror.h>
70 #include <isc/syslog.h>
71 #include <isc/task.h>
72 #include <isc/thread.h>
73 #include <isc/util.h>
74 #include <isc/win32os.h>
75
76 #include <mswsock.h>
77
78 #include "errno2result.h"
79
80 /*
81 * How in the world can Microsoft exist with APIs like this?
82 * We can't actually call this directly, because it turns out
83 * no library exports this function. Instead, we need to
84 * issue a runtime call to get the address.
85 */
86 LPFN_CONNECTEX ISCConnectEx;
87 LPFN_ACCEPTEX ISCAcceptEx;
88 LPFN_GETACCEPTEXSOCKADDRS ISCGetAcceptExSockaddrs;
89
90 /*
91 * Run expensive internal consistency checks.
92 */
93 #ifdef ISC_SOCKET_CONSISTENCY_CHECKS
94 #define CONSISTENT(sock) consistent(sock)
95 #else
96 #define CONSISTENT(sock) do {} while (0)
97 #endif
98 static void consistent(isc_socket_t *sock);
99
100 /*
101 * Define this macro to control the behavior of connection
102 * resets on UDP sockets. See Microsoft KnowledgeBase Article Q263823
103 * for details.
104 * NOTE: This requires that Windows 2000 systems install Service Pack 2
105 * or later.
106 */
107 #ifndef SIO_UDP_CONNRESET
108 #define SIO_UDP_CONNRESET _WSAIOW(IOC_VENDOR,12)
109 #endif
110
111 /*
112 * Some systems define the socket length argument as an int, some as size_t,
113 * some as socklen_t. This is here so it can be easily changed if needed.
114 */
115 #ifndef ISC_SOCKADDR_LEN_T
116 #define ISC_SOCKADDR_LEN_T unsigned int
117 #endif
118
119 /*
120 * Define what the possible "soft" errors can be. These are non-fatal returns
121 * of various network related functions, like recv() and so on.
122 */
123 #define SOFT_ERROR(e) ((e) == WSAEINTR || \
124 (e) == WSAEWOULDBLOCK || \
125 (e) == EWOULDBLOCK || \
126 (e) == EINTR || \
127 (e) == EAGAIN || \
128 (e) == 0)
129
130 /*
131 * Pending errors are not really errors and should be
132 * kept separate
133 */
134 #define PENDING_ERROR(e) ((e) == WSA_IO_PENDING || (e) == 0)
135
136 #define DOIO_SUCCESS 0 /* i/o ok, event sent */
137 #define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */
138 #define DOIO_HARD 2 /* i/o error, event sent */
139 #define DOIO_EOF 3 /* EOF, no event sent */
140 #define DOIO_PENDING 4 /* status when i/o is in process */
141 #define DOIO_NEEDMORE 5 /* IO was processed, but we need more due to minimum */
142
143 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
144
145 /*
146 * DLVL(90) -- Function entry/exit and other tracing.
147 * DLVL(70) -- Socket "correctness" -- including returning of events, etc.
148 * DLVL(60) -- Socket data send/receive
149 * DLVL(50) -- Event tracing, including receiving/sending completion events.
150 * DLVL(20) -- Socket creation/destruction.
151 */
152 #define TRACE_LEVEL 90
153 #define CORRECTNESS_LEVEL 70
154 #define IOEVENT_LEVEL 60
155 #define EVENT_LEVEL 50
156 #define CREATION_LEVEL 20
157
158 #define TRACE DLVL(TRACE_LEVEL)
159 #define CORRECTNESS DLVL(CORRECTNESS_LEVEL)
160 #define IOEVENT DLVL(IOEVENT_LEVEL)
161 #define EVENT DLVL(EVENT_LEVEL)
162 #define CREATION DLVL(CREATION_LEVEL)
163
164 typedef isc_event_t intev_t;
165
166 /*
167 * Socket State
168 */
169 enum {
170 SOCK_INITIALIZED, /* Socket Initialized */
171 SOCK_OPEN, /* Socket opened but nothing yet to do */
172 SOCK_DATA, /* Socket sending or receiving data */
173 SOCK_LISTEN, /* TCP Socket listening for connects */
174 SOCK_ACCEPT, /* TCP socket is waiting to accept */
175 SOCK_CONNECT, /* TCP Socket connecting */
176 SOCK_CLOSED, /* Socket has been closed */
177 };
178
179 #define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o')
180 #define VALID_SOCKET(t) ISC_MAGIC_VALID(t, SOCKET_MAGIC)
181
182 /*
183 * IPv6 control information. If the socket is an IPv6 socket we want
184 * to collect the destination address and interface so the client can
185 * set them on outgoing packets.
186 */
187 #ifdef ISC_PLATFORM_HAVEIPV6
188 #ifndef USE_CMSG
189 #define USE_CMSG 1
190 #endif
191 #endif
192
193 /*
194 * We really don't want to try and use these control messages. Win32
195 * doesn't have this mechanism before XP.
196 */
197 #undef USE_CMSG
198
199 /*
200 * Message header for recvmsg and sendmsg calls.
201 * Used value-result for recvmsg, value only for sendmsg.
202 */
203 struct msghdr {
204 SOCKADDR_STORAGE to_addr; /* UDP send/recv address */
205 int to_addr_len; /* length of the address */
206 WSABUF *msg_iov; /* scatter/gather array */
207 u_int msg_iovlen; /* # elements in msg_iov */
208 void *msg_control; /* ancillary data, see below */
209 u_int msg_controllen; /* ancillary data buffer len */
210 int msg_totallen; /* total length of this message */
211 } msghdr;
212
213 /*
214 * The size to raise the receive buffer to.
215 */
216 #define RCVBUFSIZE (32*1024)
217
218 /*
219 * The number of times a send operation is repeated if the result
220 * is WSAEINTR.
221 */
222 #define NRETRIES 10
223
224 struct isc_socket {
225 /* Not locked. */
226 unsigned int magic;
227 isc_socketmgr_t *manager;
228 isc_mutex_t lock;
229 isc_sockettype_t type;
230
231 /* Pointers to scatter/gather buffers */
232 WSABUF iov[ISC_SOCKET_MAXSCATTERGATHER];
233
234 /* Locked by socket lock. */
235 ISC_LINK(isc_socket_t) link;
236 unsigned int references; /* EXTERNAL references */
237 SOCKET fd; /* file handle */
238 int pf; /* protocol family */
239 char name[16];
240 void * tag;
241
242 /*
243 * Each recv() call uses this buffer. It is a per-socket receive
244 * buffer that allows us to decouple the system recv() from the
245 * recv_list done events. This means the items on the recv_list
246 * can be removed without having to cancel pending system recv()
247 * calls. It also allows us to read-ahead in some cases.
248 */
249 struct {
250 SOCKADDR_STORAGE from_addr; // UDP send/recv address
251 int from_addr_len; // length of the address
252 char *base; // the base of the buffer
253 char *consume_position; // where to start copying data from next
254 unsigned int len; // the actual size of this buffer
255 unsigned int remaining; // the number of bytes remaining
256 } recvbuf;
257
258 ISC_LIST(isc_socketevent_t) send_list;
259 ISC_LIST(isc_socketevent_t) recv_list;
260 ISC_LIST(isc_socket_newconnev_t) accept_list;
261 isc_socket_connev_t *connect_ev;
262
263 isc_sockaddr_t address; /* remote address */
264
265 unsigned int listener : 1, /* listener socket */
266 connected : 1,
267 pending_connect : 1, /* connect pending */
268 bound : 1, /* bound to local addr */
269 dupped : 1; /* created by isc_socket_dup() */
270 unsigned int pending_iocp; /* Should equal the counters below. Debug. */
271 unsigned int pending_recv; /* Number of outstanding recv() calls. */
272 unsigned int pending_send; /* Number of outstanding send() calls. */
273 unsigned int pending_accept; /* Number of outstanding accept() calls. */
274 unsigned int state; /* Socket state. Debugging and consistency checking. */
275 int state_lineno; /* line which last touched state */
276 };
277
278 #define _set_state(sock, _state) do { (sock)->state = (_state); (sock)->state_lineno = __LINE__; } while (0)
279
280 /*
281 * Buffer structure
282 */
283 typedef struct buflist buflist_t;
284
285 struct buflist {
286 void *buf;
287 unsigned int buflen;
288 ISC_LINK(buflist_t) link;
289 };
290
291 /*
292 * I/O Completion ports Info structures
293 */
294
295 static HANDLE hHeapHandle = NULL;
296 typedef struct IoCompletionInfo {
297 OVERLAPPED overlapped;
298 isc_socketevent_t *dev; /* send()/recv() done event */
299 isc_socket_connev_t *cdev; /* connect() done event */
300 isc_socket_newconnev_t *adev; /* accept() done event */
301 void *acceptbuffer;
302 DWORD received_bytes;
303 int request_type;
304 struct msghdr messagehdr;
305 ISC_LIST(buflist_t) bufferlist; /*%< list of buffers */
306 } IoCompletionInfo;
307
308 /*
309 * Define a maximum number of I/O Completion Port worker threads
310 * to handle the load on the Completion Port. The actual number
311 * used is the number of CPU's + 1.
312 */
313 #define MAX_IOCPTHREADS 20
314
315 #define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g')
316 #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)
317
318 struct isc_socketmgr {
319 /* Not locked. */
320 unsigned int magic;
321 isc_mem_t *mctx;
322 isc_mutex_t lock;
323 isc_stats_t *stats;
324
325 /* Locked by manager lock. */
326 ISC_LIST(isc_socket_t) socklist;
327 isc_boolean_t bShutdown;
328 isc_condition_t shutdown_ok;
329 HANDLE hIoCompletionPort;
330 int maxIOCPThreads;
331 HANDLE hIOCPThreads[MAX_IOCPTHREADS];
332 DWORD dwIOCPThreadIds[MAX_IOCPTHREADS];
333
334 /*
335 * Debugging.
336 * Modified by InterlockedIncrement() and InterlockedDecrement()
337 */
338 LONG totalSockets;
339 LONG iocp_total;
340 };
341
342 enum {
343 SOCKET_RECV,
344 SOCKET_SEND,
345 SOCKET_ACCEPT,
346 SOCKET_CONNECT
347 };
348
349 /*
350 * send() and recv() iovec counts
351 */
352 #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER)
353 #define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER)
354
355 static isc_result_t socket_create(isc_socketmgr_t *manager0, int pf,
356 isc_sockettype_t type,
357 isc_socket_t **socketp,
358 isc_socket_t *dup_socket);
359 static isc_threadresult_t WINAPI SocketIoThread(LPVOID ThreadContext);
360 static void maybe_free_socket(isc_socket_t **, int);
361 static void free_socket(isc_socket_t **, int);
362 static isc_boolean_t senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev);
363 static isc_boolean_t acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev);
364 static isc_boolean_t connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev);
365 static void send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev);
366 static void send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev);
367 static void send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev);
368 static void send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev);
369 static void send_recvdone_abort(isc_socket_t *sock, isc_result_t result);
370 static void queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev);
371 static void queue_receive_request(isc_socket_t *sock);
372
373 /*
374 * This is used to dump the contents of the sock structure
375 * You should make sure that the sock is locked before
376 * dumping it. Since the code uses simple printf() statements
377 * it should only be used interactively.
378 */
379 void
sock_dump(isc_socket_t * sock)380 sock_dump(isc_socket_t *sock) {
381 isc_socketevent_t *ldev;
382 isc_socket_newconnev_t *ndev;
383
384 #if 0
385 isc_sockaddr_t addr;
386 char socktext[256];
387
388 isc_socket_getpeername(sock, &addr);
389 isc_sockaddr_format(&addr, socktext, sizeof(socktext));
390 printf("Remote Socket: %s\n", socktext);
391 isc_socket_getsockname(sock, &addr);
392 isc_sockaddr_format(&addr, socktext, sizeof(socktext));
393 printf("This Socket: %s\n", socktext);
394 #endif
395
396 printf("\n\t\tSock Dump\n");
397 printf("\t\tfd: %u\n", sock->fd);
398 printf("\t\treferences: %d\n", sock->references);
399 printf("\t\tpending_accept: %d\n", sock->pending_accept);
400 printf("\t\tconnecting: %d\n", sock->pending_connect);
401 printf("\t\tconnected: %d\n", sock->connected);
402 printf("\t\tbound: %d\n", sock->bound);
403 printf("\t\tpending_iocp: %d\n", sock->pending_iocp);
404 printf("\t\tsocket type: %d\n", sock->type);
405
406 printf("\n\t\tSock Recv List\n");
407 ldev = ISC_LIST_HEAD(sock->recv_list);
408 while (ldev != NULL) {
409 printf("\t\tdev: %p\n", ldev);
410 ldev = ISC_LIST_NEXT(ldev, ev_link);
411 }
412
413 printf("\n\t\tSock Send List\n");
414 ldev = ISC_LIST_HEAD(sock->send_list);
415 while (ldev != NULL) {
416 printf("\t\tdev: %p\n", ldev);
417 ldev = ISC_LIST_NEXT(ldev, ev_link);
418 }
419
420 printf("\n\t\tSock Accept List\n");
421 ndev = ISC_LIST_HEAD(sock->accept_list);
422 while (ndev != NULL) {
423 printf("\t\tdev: %p\n", ldev);
424 ndev = ISC_LIST_NEXT(ndev, ev_link);
425 }
426 }
427
428 static void
429 socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
430 isc_logcategory_t *category, isc_logmodule_t *module, int level,
431 isc_msgcat_t *msgcat, int msgset, int message,
432 const char *fmt, ...) ISC_FORMAT_PRINTF(9, 10);
433
434 /* This function will add an entry to the I/O completion port
435 * that will signal the I/O thread to exit (gracefully)
436 */
437 static void
signal_iocompletionport_exit(isc_socketmgr_t * manager)438 signal_iocompletionport_exit(isc_socketmgr_t *manager) {
439 int i;
440 int errval;
441 char strbuf[ISC_STRERRORSIZE];
442
443 REQUIRE(VALID_MANAGER(manager));
444 for (i = 0; i < manager->maxIOCPThreads; i++) {
445 if (!PostQueuedCompletionStatus(manager->hIoCompletionPort,
446 0, 0, 0)) {
447 errval = GetLastError();
448 isc__strerror(errval, strbuf, sizeof(strbuf));
449 FATAL_ERROR(__FILE__, __LINE__,
450 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
451 ISC_MSG_FAILED,
452 "Can't request service thread to exit: %s"),
453 strbuf);
454 }
455 }
456 }
457
458 /*
459 * Create the worker threads for the I/O Completion Port
460 */
461 void
iocompletionport_createthreads(int total_threads,isc_socketmgr_t * manager)462 iocompletionport_createthreads(int total_threads, isc_socketmgr_t *manager) {
463 int errval;
464 char strbuf[ISC_STRERRORSIZE];
465 int i;
466
467 INSIST(total_threads > 0);
468 REQUIRE(VALID_MANAGER(manager));
469 /*
470 * We need at least one
471 */
472 for (i = 0; i < total_threads; i++) {
473 manager->hIOCPThreads[i] = CreateThread(NULL, 0, SocketIoThread,
474 manager, 0,
475 &manager->dwIOCPThreadIds[i]);
476 if (manager->hIOCPThreads[i] == NULL) {
477 errval = GetLastError();
478 isc__strerror(errval, strbuf, sizeof(strbuf));
479 FATAL_ERROR(__FILE__, __LINE__,
480 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
481 ISC_MSG_FAILED,
482 "Can't create IOCP thread: %s"),
483 strbuf);
484 exit(1);
485 }
486 }
487 }
488
489 /*
490 * Create/initialise the I/O completion port
491 */
492 void
iocompletionport_init(isc_socketmgr_t * manager)493 iocompletionport_init(isc_socketmgr_t *manager) {
494 int errval;
495 char strbuf[ISC_STRERRORSIZE];
496
497 REQUIRE(VALID_MANAGER(manager));
498 /*
499 * Create a private heap to handle the socket overlapped structure
500 * The minimum number of structures is 10, there is no maximum
501 */
502 hHeapHandle = HeapCreate(0, 10 * sizeof(IoCompletionInfo), 0);
503 if (hHeapHandle == NULL) {
504 errval = GetLastError();
505 isc__strerror(errval, strbuf, sizeof(strbuf));
506 FATAL_ERROR(__FILE__, __LINE__,
507 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
508 ISC_MSG_FAILED,
509 "HeapCreate() failed during "
510 "initialization: %s"),
511 strbuf);
512 exit(1);
513 }
514
515 manager->maxIOCPThreads = min(isc_os_ncpus() + 1, MAX_IOCPTHREADS);
516
517 /* Now Create the Completion Port */
518 manager->hIoCompletionPort = CreateIoCompletionPort(
519 INVALID_HANDLE_VALUE, NULL,
520 0, manager->maxIOCPThreads);
521 if (manager->hIoCompletionPort == NULL) {
522 errval = GetLastError();
523 isc__strerror(errval, strbuf, sizeof(strbuf));
524 FATAL_ERROR(__FILE__, __LINE__,
525 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
526 ISC_MSG_FAILED,
527 "CreateIoCompletionPort() failed "
528 "during initialization: %s"),
529 strbuf);
530 exit(1);
531 }
532
533 /*
534 * Worker threads for servicing the I/O
535 */
536 iocompletionport_createthreads(manager->maxIOCPThreads, manager);
537 }
538
539 /*
540 * Associate a socket with an IO Completion Port. This allows us to queue events for it
541 * and have our worker pool of threads process them.
542 */
543 void
iocompletionport_update(isc_socket_t * sock)544 iocompletionport_update(isc_socket_t *sock) {
545 HANDLE hiocp;
546 char strbuf[ISC_STRERRORSIZE];
547
548 REQUIRE(VALID_SOCKET(sock));
549
550 hiocp = CreateIoCompletionPort((HANDLE)sock->fd,
551 sock->manager->hIoCompletionPort, (ULONG_PTR)sock, 0);
552
553 if (hiocp == NULL) {
554 DWORD errval = GetLastError();
555 isc__strerror(errval, strbuf, sizeof(strbuf));
556 isc_log_iwrite(isc_lctx,
557 ISC_LOGCATEGORY_GENERAL,
558 ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
559 isc_msgcat, ISC_MSGSET_SOCKET,
560 ISC_MSG_TOOMANYHANDLES,
561 "iocompletionport_update: failed to open"
562 " io completion port: %s",
563 strbuf);
564
565 /* XXXMLG temporary hack to make failures detected.
566 * This function should return errors to the caller, not
567 * exit here.
568 */
569 FATAL_ERROR(__FILE__, __LINE__,
570 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
571 ISC_MSG_FAILED,
572 "CreateIoCompletionPort() failed "
573 "during initialization: %s"),
574 strbuf);
575 exit(1);
576 }
577
578 InterlockedIncrement(&sock->manager->iocp_total);
579 }
580
581 /*
582 * Routine to cleanup and then close the socket.
583 * Only close the socket here if it is NOT associated
584 * with an event, otherwise the WSAWaitForMultipleEvents
585 * may fail due to the fact that the Wait should not
586 * be running while closing an event or a socket.
587 * The socket is locked before calling this function
588 */
589 void
socket_close(isc_socket_t * sock)590 socket_close(isc_socket_t *sock) {
591
592 REQUIRE(sock != NULL);
593
594 if (sock->fd != INVALID_SOCKET) {
595 closesocket(sock->fd);
596 sock->fd = INVALID_SOCKET;
597 _set_state(sock, SOCK_CLOSED);
598 InterlockedDecrement(&sock->manager->totalSockets);
599 }
600 }
601
602 static isc_once_t initialise_once = ISC_ONCE_INIT;
603 static isc_boolean_t initialised = ISC_FALSE;
604
605 static void
initialise(void)606 initialise(void) {
607 WORD wVersionRequested;
608 WSADATA wsaData;
609 int err;
610 SOCKET sock;
611 GUID GUIDConnectEx = WSAID_CONNECTEX;
612 GUID GUIDAcceptEx = WSAID_ACCEPTEX;
613 GUID GUIDGetAcceptExSockaddrs = WSAID_GETACCEPTEXSOCKADDRS;
614 DWORD dwBytes;
615
616 /* Need Winsock 2.2 or better */
617 wVersionRequested = MAKEWORD(2, 2);
618
619 err = WSAStartup(wVersionRequested, &wsaData);
620 if (err != 0) {
621 char strbuf[ISC_STRERRORSIZE];
622 isc__strerror(err, strbuf, sizeof(strbuf));
623 FATAL_ERROR(__FILE__, __LINE__, "WSAStartup() %s: %s",
624 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
625 ISC_MSG_FAILED, "failed"),
626 strbuf);
627 exit(1);
628 }
629 /*
630 * The following APIs do not exist as functions in a library, but we must
631 * ask winsock for them. They are "extensions" -- but why they cannot be
632 * actual functions is beyond me. So, ask winsock for the pointers to the
633 * functions we need.
634 */
635 sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
636 INSIST(sock != INVALID_SOCKET);
637 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER,
638 &GUIDConnectEx, sizeof(GUIDConnectEx),
639 &ISCConnectEx, sizeof(ISCConnectEx),
640 &dwBytes, NULL, NULL);
641 INSIST(err == 0);
642
643 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER,
644 &GUIDAcceptEx, sizeof(GUIDAcceptEx),
645 &ISCAcceptEx, sizeof(ISCAcceptEx),
646 &dwBytes, NULL, NULL);
647 INSIST(err == 0);
648
649 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER,
650 &GUIDGetAcceptExSockaddrs, sizeof(GUIDGetAcceptExSockaddrs),
651 &ISCGetAcceptExSockaddrs, sizeof(ISCGetAcceptExSockaddrs),
652 &dwBytes, NULL, NULL);
653 INSIST(err == 0);
654
655 closesocket(sock);
656
657 initialised = ISC_TRUE;
658 }
659
660 /*
661 * Initialize socket services
662 */
663 void
InitSockets(void)664 InitSockets(void) {
665 RUNTIME_CHECK(isc_once_do(&initialise_once,
666 initialise) == ISC_R_SUCCESS);
667 if (!initialised)
668 exit(1);
669 }
670
671 int
internal_sendmsg(isc_socket_t * sock,IoCompletionInfo * lpo,struct msghdr * messagehdr,int flags,int * Error)672 internal_sendmsg(isc_socket_t *sock, IoCompletionInfo *lpo,
673 struct msghdr *messagehdr, int flags, int *Error)
674 {
675 int Result;
676 DWORD BytesSent;
677 DWORD Flags = flags;
678 int total_sent;
679
680 *Error = 0;
681 Result = WSASendTo(sock->fd, messagehdr->msg_iov,
682 messagehdr->msg_iovlen, &BytesSent,
683 Flags, (SOCKADDR *)&messagehdr->to_addr,
684 messagehdr->to_addr_len, (LPWSAOVERLAPPED)lpo,
685 NULL);
686
687 total_sent = (int)BytesSent;
688
689 /* Check for errors.*/
690 if (Result == SOCKET_ERROR) {
691 *Error = WSAGetLastError();
692
693 switch (*Error) {
694 case WSA_IO_INCOMPLETE:
695 case WSA_WAIT_IO_COMPLETION:
696 case WSA_IO_PENDING:
697 case NO_ERROR: /* Strange, but okay */
698 sock->pending_iocp++;
699 sock->pending_send++;
700 break;
701
702 default:
703 return (-1);
704 break;
705 }
706 } else {
707 sock->pending_iocp++;
708 sock->pending_send++;
709 }
710
711 if (lpo != NULL)
712 return (0);
713 else
714 return (total_sent);
715 }
716
717 static void
queue_receive_request(isc_socket_t * sock)718 queue_receive_request(isc_socket_t *sock) {
719 DWORD Flags = 0;
720 DWORD NumBytes = 0;
721 int total_bytes = 0;
722 int Result;
723 int Error;
724 int need_retry;
725 WSABUF iov[1];
726 IoCompletionInfo *lpo = NULL;
727 isc_result_t isc_result;
728
729 retry:
730 need_retry = ISC_FALSE;
731
732 /*
733 * If we already have a receive pending, do nothing.
734 */
735 if (sock->pending_recv > 0) {
736 if (lpo != NULL)
737 HeapFree(hHeapHandle, 0, lpo);
738 return;
739 }
740
741 /*
742 * If no one is waiting, do nothing.
743 */
744 if (ISC_LIST_EMPTY(sock->recv_list)) {
745 if (lpo != NULL)
746 HeapFree(hHeapHandle, 0, lpo);
747 return;
748 }
749
750 INSIST(sock->recvbuf.remaining == 0);
751 INSIST(sock->fd != INVALID_SOCKET);
752
753 iov[0].len = sock->recvbuf.len;
754 iov[0].buf = sock->recvbuf.base;
755
756 if (lpo == NULL) {
757 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
758 HEAP_ZERO_MEMORY,
759 sizeof(IoCompletionInfo));
760 RUNTIME_CHECK(lpo != NULL);
761 } else
762 ZeroMemory(lpo, sizeof(IoCompletionInfo));
763 lpo->request_type = SOCKET_RECV;
764
765 sock->recvbuf.from_addr_len = sizeof(sock->recvbuf.from_addr);
766
767 Error = 0;
768 Result = WSARecvFrom((SOCKET)sock->fd, iov, 1,
769 &NumBytes, &Flags,
770 (SOCKADDR *)&sock->recvbuf.from_addr,
771 &sock->recvbuf.from_addr_len,
772 (LPWSAOVERLAPPED)lpo, NULL);
773
774 /* Check for errors. */
775 if (Result == SOCKET_ERROR) {
776 Error = WSAGetLastError();
777
778 switch (Error) {
779 case WSA_IO_PENDING:
780 sock->pending_iocp++;
781 sock->pending_recv++;
782 break;
783
784 /* direct error: no completion event */
785 case ERROR_HOST_UNREACHABLE:
786 case WSAENETRESET:
787 case WSAECONNRESET:
788 if (!sock->connected) {
789 /* soft error */
790 need_retry = ISC_TRUE;
791 break;
792 }
793 /* FALLTHROUGH */
794
795 default:
796 isc_result = isc__errno2result(Error);
797 if (isc_result == ISC_R_UNEXPECTED)
798 UNEXPECTED_ERROR(__FILE__, __LINE__,
799 "WSARecvFrom: Windows error code: %d, isc result %d",
800 Error, isc_result);
801 send_recvdone_abort(sock, isc_result);
802 HeapFree(hHeapHandle, 0, lpo);
803 lpo = NULL;
804 break;
805 }
806 } else {
807 /*
808 * The recv() finished immediately, but we will still get
809 * a completion event. Rather than duplicate code, let
810 * that thread handle sending the data along its way.
811 */
812 sock->pending_iocp++;
813 sock->pending_recv++;
814 }
815
816 socket_log(__LINE__, sock, NULL, IOEVENT,
817 isc_msgcat, ISC_MSGSET_SOCKET,
818 ISC_MSG_DOIORECV,
819 "queue_io_request: fd %d result %d error %d",
820 sock->fd, Result, Error);
821
822 CONSISTENT(sock);
823
824 if (need_retry)
825 goto retry;
826 }
827
828 static void
manager_log(isc_socketmgr_t * sockmgr,isc_logcategory_t * category,isc_logmodule_t * module,int level,const char * fmt,...)829 manager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category,
830 isc_logmodule_t *module, int level, const char *fmt, ...)
831 {
832 char msgbuf[2048];
833 va_list ap;
834
835 if (!isc_log_wouldlog(isc_lctx, level))
836 return;
837
838 va_start(ap, fmt);
839 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
840 va_end(ap);
841
842 isc_log_write(isc_lctx, category, module, level,
843 "sockmgr %p: %s", sockmgr, msgbuf);
844 }
845
846 static void
socket_log(int lineno,isc_socket_t * sock,isc_sockaddr_t * address,isc_logcategory_t * category,isc_logmodule_t * module,int level,isc_msgcat_t * msgcat,int msgset,int message,const char * fmt,...)847 socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
848 isc_logcategory_t *category, isc_logmodule_t *module, int level,
849 isc_msgcat_t *msgcat, int msgset, int message,
850 const char *fmt, ...)
851 {
852 char msgbuf[2048];
853 char peerbuf[256];
854 va_list ap;
855
856
857 if (!isc_log_wouldlog(isc_lctx, level))
858 return;
859
860 va_start(ap, fmt);
861 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
862 va_end(ap);
863
864 if (address == NULL) {
865 isc_log_iwrite(isc_lctx, category, module, level,
866 msgcat, msgset, message,
867 "socket %p line %d: %s", sock, lineno, msgbuf);
868 } else {
869 isc_sockaddr_format(address, peerbuf, sizeof(peerbuf));
870 isc_log_iwrite(isc_lctx, category, module, level,
871 msgcat, msgset, message,
872 "socket %p line %d peer %s: %s", sock, lineno,
873 peerbuf, msgbuf);
874 }
875
876 }
877
878 /*
879 * Make an fd SOCKET non-blocking.
880 */
881 static isc_result_t
make_nonblock(SOCKET fd)882 make_nonblock(SOCKET fd) {
883 int ret;
884 unsigned long flags = 1;
885 char strbuf[ISC_STRERRORSIZE];
886
887 /* Set the socket to non-blocking */
888 ret = ioctlsocket(fd, FIONBIO, &flags);
889
890 if (ret == -1) {
891 isc__strerror(errno, strbuf, sizeof(strbuf));
892 UNEXPECTED_ERROR(__FILE__, __LINE__,
893 "ioctlsocket(%d, FIOBIO, %d): %s",
894 fd, flags, strbuf);
895
896 return (ISC_R_UNEXPECTED);
897 }
898
899 return (ISC_R_SUCCESS);
900 }
901
902 /*
903 * Windows 2000 systems incorrectly cause UDP sockets using WSARecvFrom
904 * to not work correctly, returning a WSACONNRESET error when a WSASendTo
905 * fails with an "ICMP port unreachable" response and preventing the
906 * socket from using the WSARecvFrom in subsequent operations.
907 * The function below fixes this, but requires that Windows 2000
908 * Service Pack 2 or later be installed on the system. NT 4.0
909 * systems are not affected by this and work correctly.
910 * See Microsoft Knowledge Base Article Q263823 for details of this.
911 */
912 isc_result_t
connection_reset_fix(SOCKET fd)913 connection_reset_fix(SOCKET fd) {
914 DWORD dwBytesReturned = 0;
915 BOOL bNewBehavior = FALSE;
916 DWORD status;
917
918 if (isc_win32os_majorversion() < 5)
919 return (ISC_R_SUCCESS); /* NT 4.0 has no problem */
920
921 /* disable bad behavior using IOCTL: SIO_UDP_CONNRESET */
922 status = WSAIoctl(fd, SIO_UDP_CONNRESET, &bNewBehavior,
923 sizeof(bNewBehavior), NULL, 0,
924 &dwBytesReturned, NULL, NULL);
925 if (status != SOCKET_ERROR)
926 return (ISC_R_SUCCESS);
927 else {
928 UNEXPECTED_ERROR(__FILE__, __LINE__,
929 "WSAIoctl(SIO_UDP_CONNRESET, oldBehaviour) %s",
930 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
931 ISC_MSG_FAILED, "failed"));
932 return (ISC_R_UNEXPECTED);
933 }
934 }
935
936 /*
937 * Construct an iov array and attach it to the msghdr passed in. This is
938 * the SEND constructor, which will use the used region of the buffer
939 * (if using a buffer list) or will use the internal region (if a single
940 * buffer I/O is requested).
941 *
942 * Nothing can be NULL, and the done event must list at least one buffer
943 * on the buffer linked list for this function to be meaningful.
944 */
945 static void
build_msghdr_send(isc_socket_t * sock,isc_socketevent_t * dev,struct msghdr * msg,char * cmsg,WSABUF * iov,IoCompletionInfo * lpo)946 build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev,
947 struct msghdr *msg, char *cmsg, WSABUF *iov,
948 IoCompletionInfo *lpo)
949 {
950 unsigned int iovcount;
951 isc_buffer_t *buffer;
952 buflist_t *cpbuffer;
953 isc_region_t used;
954 size_t write_count;
955 size_t skip_count;
956
957 memset(msg, 0, sizeof(*msg));
958
959 memcpy(&msg->to_addr, &dev->address.type, dev->address.length);
960 msg->to_addr_len = dev->address.length;
961
962 buffer = ISC_LIST_HEAD(dev->bufferlist);
963 write_count = 0;
964 iovcount = 0;
965
966 /*
967 * Single buffer I/O? Skip what we've done so far in this region.
968 */
969 if (buffer == NULL) {
970 write_count = dev->region.length - dev->n;
971 cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
972 RUNTIME_CHECK(cpbuffer != NULL);
973 cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, write_count);
974 RUNTIME_CHECK(cpbuffer->buf != NULL);
975
976 socket_log(__LINE__, sock, NULL, TRACE,
977 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
978 "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
979 cpbuffer->buf, write_count);
980
981 memcpy(cpbuffer->buf,(dev->region.base + dev->n), write_count);
982 cpbuffer->buflen = write_count;
983 ISC_LIST_ENQUEUE(lpo->bufferlist, cpbuffer, link);
984 iov[0].buf = cpbuffer->buf;
985 iov[0].len = write_count;
986 iovcount = 1;
987
988 goto config;
989 }
990
991 /*
992 * Multibuffer I/O.
993 * Skip the data in the buffer list that we have already written.
994 */
995 skip_count = dev->n;
996 while (buffer != NULL) {
997 REQUIRE(ISC_BUFFER_VALID(buffer));
998 if (skip_count < isc_buffer_usedlength(buffer))
999 break;
1000 skip_count -= isc_buffer_usedlength(buffer);
1001 buffer = ISC_LIST_NEXT(buffer, link);
1002 }
1003
1004 while (buffer != NULL) {
1005 INSIST(iovcount < MAXSCATTERGATHER_SEND);
1006
1007 isc_buffer_usedregion(buffer, &used);
1008
1009 if (used.length > 0) {
1010 int uselen = used.length - skip_count;
1011 cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
1012 RUNTIME_CHECK(cpbuffer != NULL);
1013 cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, uselen);
1014 RUNTIME_CHECK(cpbuffer->buf != NULL);
1015
1016 socket_log(__LINE__, sock, NULL, TRACE,
1017 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
1018 "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
1019 cpbuffer->buf, write_count);
1020
1021 memcpy(cpbuffer->buf,(used.base + skip_count), uselen);
1022 cpbuffer->buflen = uselen;
1023 iov[iovcount].buf = cpbuffer->buf;
1024 iov[iovcount].len = used.length - skip_count;
1025 write_count += uselen;
1026 skip_count = 0;
1027 iovcount++;
1028 }
1029 buffer = ISC_LIST_NEXT(buffer, link);
1030 }
1031
1032 INSIST(skip_count == 0);
1033
1034 config:
1035 msg->msg_iov = iov;
1036 msg->msg_iovlen = iovcount;
1037 msg->msg_totallen = write_count;
1038 }
1039
1040 static void
set_dev_address(isc_sockaddr_t * address,isc_socket_t * sock,isc_socketevent_t * dev)1041 set_dev_address(isc_sockaddr_t *address, isc_socket_t *sock,
1042 isc_socketevent_t *dev)
1043 {
1044 if (sock->type == isc_sockettype_udp) {
1045 if (address != NULL)
1046 dev->address = *address;
1047 else
1048 dev->address = sock->address;
1049 } else if (sock->type == isc_sockettype_tcp) {
1050 INSIST(address == NULL);
1051 dev->address = sock->address;
1052 }
1053 }
1054
1055 static void
destroy_socketevent(isc_event_t * event)1056 destroy_socketevent(isc_event_t *event) {
1057 isc_socketevent_t *ev = (isc_socketevent_t *)event;
1058
1059 INSIST(ISC_LIST_EMPTY(ev->bufferlist));
1060
1061 (ev->destroy)(event);
1062 }
1063
1064 static isc_socketevent_t *
allocate_socketevent(isc_socket_t * sock,isc_eventtype_t eventtype,isc_taskaction_t action,const void * arg)1065 allocate_socketevent(isc_socket_t *sock, isc_eventtype_t eventtype,
1066 isc_taskaction_t action, const void *arg)
1067 {
1068 isc_socketevent_t *ev;
1069
1070 ev = (isc_socketevent_t *)isc_event_allocate(sock->manager->mctx,
1071 sock, eventtype,
1072 action, arg,
1073 sizeof(*ev));
1074 if (ev == NULL)
1075 return (NULL);
1076
1077 ev->result = ISC_R_IOERROR; // XXXMLG temporary change to detect failure to set
1078 ISC_LINK_INIT(ev, ev_link);
1079 ISC_LIST_INIT(ev->bufferlist);
1080 ev->region.base = NULL;
1081 ev->n = 0;
1082 ev->offset = 0;
1083 ev->attributes = 0;
1084 ev->destroy = ev->ev_destroy;
1085 ev->ev_destroy = destroy_socketevent;
1086
1087 return (ev);
1088 }
1089
1090 #if defined(ISC_SOCKET_DEBUG)
1091 static void
dump_msg(struct msghdr * msg,isc_socket_t * sock)1092 dump_msg(struct msghdr *msg, isc_socket_t *sock) {
1093 unsigned int i;
1094
1095 printf("MSGHDR %p, Socket #: %u\n", msg, sock->fd);
1096 printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen);
1097 printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen);
1098 for (i = 0; i < (unsigned int)msg->msg_iovlen; i++)
1099 printf("\t\t%d\tbase %p, len %d\n", i,
1100 msg->msg_iov[i].buf,
1101 msg->msg_iov[i].len);
1102 }
1103 #endif
1104
1105 /*
1106 * map the error code
1107 */
1108 int
map_socket_error(isc_socket_t * sock,int windows_errno,int * isc_errno,char * errorstring,size_t bufsize)1109 map_socket_error(isc_socket_t *sock, int windows_errno, int *isc_errno,
1110 char *errorstring, size_t bufsize) {
1111
1112 int doreturn;
1113 switch (windows_errno) {
1114 case WSAECONNREFUSED:
1115 *isc_errno = ISC_R_CONNREFUSED;
1116 if (sock->connected)
1117 doreturn = DOIO_HARD;
1118 else
1119 doreturn = DOIO_SOFT;
1120 break;
1121 case WSAENETUNREACH:
1122 case ERROR_NETWORK_UNREACHABLE:
1123 *isc_errno = ISC_R_NETUNREACH;
1124 if (sock->connected)
1125 doreturn = DOIO_HARD;
1126 else
1127 doreturn = DOIO_SOFT;
1128 break;
1129 case ERROR_PORT_UNREACHABLE:
1130 case ERROR_HOST_UNREACHABLE:
1131 case WSAEHOSTUNREACH:
1132 *isc_errno = ISC_R_HOSTUNREACH;
1133 if (sock->connected)
1134 doreturn = DOIO_HARD;
1135 else
1136 doreturn = DOIO_SOFT;
1137 break;
1138 case WSAENETDOWN:
1139 *isc_errno = ISC_R_NETDOWN;
1140 if (sock->connected)
1141 doreturn = DOIO_HARD;
1142 else
1143 doreturn = DOIO_SOFT;
1144 break;
1145 case WSAEHOSTDOWN:
1146 *isc_errno = ISC_R_HOSTDOWN;
1147 if (sock->connected)
1148 doreturn = DOIO_HARD;
1149 else
1150 doreturn = DOIO_SOFT;
1151 break;
1152 case WSAEACCES:
1153 *isc_errno = ISC_R_NOPERM;
1154 if (sock->connected)
1155 doreturn = DOIO_HARD;
1156 else
1157 doreturn = DOIO_SOFT;
1158 break;
1159 case WSAECONNRESET:
1160 case WSAENETRESET:
1161 case WSAECONNABORTED:
1162 case WSAEDISCON:
1163 *isc_errno = ISC_R_CONNECTIONRESET;
1164 if (sock->connected)
1165 doreturn = DOIO_HARD;
1166 else
1167 doreturn = DOIO_SOFT;
1168 break;
1169 case WSAENOTCONN:
1170 *isc_errno = ISC_R_NOTCONNECTED;
1171 if (sock->connected)
1172 doreturn = DOIO_HARD;
1173 else
1174 doreturn = DOIO_SOFT;
1175 break;
1176 case ERROR_OPERATION_ABORTED:
1177 case ERROR_CONNECTION_ABORTED:
1178 case ERROR_REQUEST_ABORTED:
1179 *isc_errno = ISC_R_CONNECTIONRESET;
1180 doreturn = DOIO_HARD;
1181 break;
1182 case WSAENOBUFS:
1183 *isc_errno = ISC_R_NORESOURCES;
1184 doreturn = DOIO_HARD;
1185 break;
1186 case WSAEAFNOSUPPORT:
1187 *isc_errno = ISC_R_FAMILYNOSUPPORT;
1188 doreturn = DOIO_HARD;
1189 break;
1190 case WSAEADDRNOTAVAIL:
1191 *isc_errno = ISC_R_ADDRNOTAVAIL;
1192 doreturn = DOIO_HARD;
1193 break;
1194 case WSAEDESTADDRREQ:
1195 *isc_errno = ISC_R_BADADDRESSFORM;
1196 doreturn = DOIO_HARD;
1197 break;
1198 case ERROR_NETNAME_DELETED:
1199 *isc_errno = ISC_R_NETDOWN;
1200 doreturn = DOIO_HARD;
1201 break;
1202 default:
1203 *isc_errno = ISC_R_IOERROR;
1204 doreturn = DOIO_HARD;
1205 break;
1206 }
1207 if (doreturn == DOIO_HARD) {
1208 isc__strerror(windows_errno, errorstring, bufsize);
1209 }
1210 return (doreturn);
1211 }
1212
1213 static void
fill_recv(isc_socket_t * sock,isc_socketevent_t * dev)1214 fill_recv(isc_socket_t *sock, isc_socketevent_t *dev) {
1215 isc_region_t r;
1216 int copylen;
1217 isc_buffer_t *buffer;
1218
1219 INSIST(dev->n < dev->minimum);
1220 INSIST(sock->recvbuf.remaining > 0);
1221 INSIST(sock->pending_recv == 0);
1222
1223 if (sock->type == isc_sockettype_udp) {
1224 dev->address.length = sock->recvbuf.from_addr_len;
1225 memcpy(&dev->address.type, &sock->recvbuf.from_addr,
1226 sock->recvbuf.from_addr_len);
1227 if (isc_sockaddr_getport(&dev->address) == 0) {
1228 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
1229 socket_log(__LINE__, sock, &dev->address, IOEVENT,
1230 isc_msgcat, ISC_MSGSET_SOCKET,
1231 ISC_MSG_ZEROPORT,
1232 "dropping source port zero packet");
1233 }
1234 sock->recvbuf.remaining = 0;
1235 return;
1236 }
1237 } else if (sock->type == isc_sockettype_tcp) {
1238 dev->address = sock->address;
1239 }
1240
1241 /*
1242 * Run through the list of buffers we were given, and find the
1243 * first one with space. Once it is found, loop through, filling
1244 * the buffers as much as possible.
1245 */
1246 buffer = ISC_LIST_HEAD(dev->bufferlist);
1247 if (buffer != NULL) { // Multi-buffer receive
1248 while (buffer != NULL && sock->recvbuf.remaining > 0) {
1249 REQUIRE(ISC_BUFFER_VALID(buffer));
1250 if (isc_buffer_availablelength(buffer) > 0) {
1251 isc_buffer_availableregion(buffer, &r);
1252 copylen = min(r.length, sock->recvbuf.remaining);
1253 memcpy(r.base, sock->recvbuf.consume_position, copylen);
1254 sock->recvbuf.consume_position += copylen;
1255 sock->recvbuf.remaining -= copylen;
1256 isc_buffer_add(buffer, copylen);
1257 dev->n += copylen;
1258 }
1259 buffer = ISC_LIST_NEXT(buffer, link);
1260 }
1261 } else { // Single-buffer receive
1262 copylen = min(dev->region.length - dev->n, sock->recvbuf.remaining);
1263 memcpy(dev->region.base + dev->n, sock->recvbuf.consume_position, copylen);
1264 sock->recvbuf.consume_position += copylen;
1265 sock->recvbuf.remaining -= copylen;
1266 dev->n += copylen;
1267 }
1268
1269 /*
1270 * UDP receives are all-consuming. That is, if we have 4k worth of
1271 * data in our receive buffer, and the caller only gave us
1272 * 1k of space, we will toss the remaining 3k of data. TCP
1273 * will keep the extra data around and use it for later requests.
1274 */
1275 if (sock->type == isc_sockettype_udp)
1276 sock->recvbuf.remaining = 0;
1277 }
1278
1279 /*
1280 * Copy out as much data from the internal buffer to done events.
1281 * As each done event is filled, send it along its way.
1282 */
1283 static void
completeio_recv(isc_socket_t * sock)1284 completeio_recv(isc_socket_t *sock)
1285 {
1286 isc_socketevent_t *dev;
1287
1288 /*
1289 * If we are in the process of filling our buffer, we cannot
1290 * touch it yet, so don't.
1291 */
1292 if (sock->pending_recv > 0)
1293 return;
1294
1295 while (sock->recvbuf.remaining > 0 && !ISC_LIST_EMPTY(sock->recv_list)) {
1296 dev = ISC_LIST_HEAD(sock->recv_list);
1297
1298 /*
1299 * See if we have sufficient data in our receive buffer
1300 * to handle this. If we do, copy out the data.
1301 */
1302 fill_recv(sock, dev);
1303
1304 /*
1305 * Did we satisfy it?
1306 */
1307 if (dev->n >= dev->minimum) {
1308 dev->result = ISC_R_SUCCESS;
1309 send_recvdone_event(sock, &dev);
1310 }
1311 }
1312 }
1313
1314 /*
1315 * Returns:
1316 * DOIO_SUCCESS The operation succeeded. dev->result contains
1317 * ISC_R_SUCCESS.
1318 *
1319 * DOIO_HARD A hard or unexpected I/O error was encountered.
1320 * dev->result contains the appropriate error.
1321 *
1322 * DOIO_SOFT A soft I/O error was encountered. No senddone
1323 * event was sent. The operation should be retried.
1324 *
1325 * No other return values are possible.
1326 */
1327 static int
completeio_send(isc_socket_t * sock,isc_socketevent_t * dev,struct msghdr * messagehdr,int cc,int send_errno)1328 completeio_send(isc_socket_t *sock, isc_socketevent_t *dev,
1329 struct msghdr *messagehdr, int cc, int send_errno)
1330 {
1331 char addrbuf[ISC_SOCKADDR_FORMATSIZE];
1332 char strbuf[ISC_STRERRORSIZE];
1333
1334 if (send_errno != 0) {
1335 if (SOFT_ERROR(send_errno))
1336 return (DOIO_SOFT);
1337
1338 return (map_socket_error(sock, send_errno, &dev->result,
1339 strbuf, sizeof(strbuf)));
1340
1341 /*
1342 * The other error types depend on whether or not the
1343 * socket is UDP or TCP. If it is UDP, some errors
1344 * that we expect to be fatal under TCP are merely
1345 * annoying, and are really soft errors.
1346 *
1347 * However, these soft errors are still returned as
1348 * a status.
1349 */
1350 isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf));
1351 isc__strerror(send_errno, strbuf, sizeof(strbuf));
1352 UNEXPECTED_ERROR(__FILE__, __LINE__, "completeio_send: %s: %s",
1353 addrbuf, strbuf);
1354 dev->result = isc__errno2result(send_errno);
1355 return (DOIO_HARD);
1356 }
1357
1358 /*
1359 * If we write less than we expected, update counters, poke.
1360 */
1361 dev->n += cc;
1362 if (cc != messagehdr->msg_totallen)
1363 return (DOIO_SOFT);
1364
1365 /*
1366 * Exactly what we wanted to write. We're done with this
1367 * entry. Post its completion event.
1368 */
1369 dev->result = ISC_R_SUCCESS;
1370 return (DOIO_SUCCESS);
1371 }
1372
1373 static int
startio_send(isc_socket_t * sock,isc_socketevent_t * dev,int * nbytes,int * send_errno)1374 startio_send(isc_socket_t *sock, isc_socketevent_t *dev, int *nbytes,
1375 int *send_errno)
1376 {
1377 char *cmsg = NULL;
1378 char strbuf[ISC_STRERRORSIZE];
1379 IoCompletionInfo *lpo;
1380 int status;
1381 struct msghdr *msghdr;
1382
1383 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
1384 HEAP_ZERO_MEMORY,
1385 sizeof(IoCompletionInfo));
1386 RUNTIME_CHECK(lpo != NULL);
1387 lpo->request_type = SOCKET_SEND;
1388 lpo->dev = dev;
1389 msghdr = &lpo->messagehdr;
1390 memset(msghdr, 0, sizeof(struct msghdr));
1391 ISC_LIST_INIT(lpo->bufferlist);
1392
1393 build_msghdr_send(sock, dev, msghdr, cmsg, sock->iov, lpo);
1394
1395 *nbytes = internal_sendmsg(sock, lpo, msghdr, 0, send_errno);
1396
1397 if (*nbytes < 0) {
1398 /*
1399 * I/O has been initiated
1400 * completion will be through the completion port
1401 */
1402 if (PENDING_ERROR(*send_errno)) {
1403 status = DOIO_PENDING;
1404 goto done;
1405 }
1406
1407 if (SOFT_ERROR(*send_errno)) {
1408 status = DOIO_SOFT;
1409 goto done;
1410 }
1411
1412 /*
1413 * If we got this far then something is wrong
1414 */
1415 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
1416 isc__strerror(*send_errno, strbuf, sizeof(strbuf));
1417 socket_log(__LINE__, sock, NULL, IOEVENT,
1418 isc_msgcat, ISC_MSGSET_SOCKET,
1419 ISC_MSG_INTERNALSEND,
1420 "startio_send: internal_sendmsg(%d) %d "
1421 "bytes, err %d/%s",
1422 sock->fd, *nbytes, *send_errno, strbuf);
1423 }
1424 status = DOIO_HARD;
1425 goto done;
1426 }
1427 dev->result = ISC_R_SUCCESS;
1428 status = DOIO_SOFT;
1429 done:
1430 _set_state(sock, SOCK_DATA);
1431 return (status);
1432 }
1433
1434 static isc_result_t
allocate_socket(isc_socketmgr_t * manager,isc_sockettype_t type,isc_socket_t ** socketp)1435 allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
1436 isc_socket_t **socketp) {
1437 isc_socket_t *sock;
1438 isc_result_t result;
1439
1440 sock = isc_mem_get(manager->mctx, sizeof(*sock));
1441
1442 if (sock == NULL)
1443 return (ISC_R_NOMEMORY);
1444
1445 sock->magic = 0;
1446 sock->references = 0;
1447
1448 sock->manager = manager;
1449 sock->type = type;
1450 sock->fd = INVALID_SOCKET;
1451
1452 ISC_LINK_INIT(sock, link);
1453
1454 /*
1455 * set up list of readers and writers to be initially empty
1456 */
1457 ISC_LIST_INIT(sock->recv_list);
1458 ISC_LIST_INIT(sock->send_list);
1459 ISC_LIST_INIT(sock->accept_list);
1460 sock->connect_ev = NULL;
1461 sock->pending_accept = 0;
1462 sock->pending_recv = 0;
1463 sock->pending_send = 0;
1464 sock->pending_iocp = 0;
1465 sock->listener = 0;
1466 sock->connected = 0;
1467 sock->pending_connect = 0;
1468 sock->bound = 0;
1469 sock->dupped = 0;
1470 memset(sock->name, 0, sizeof(sock->name)); // zero the name field
1471 _set_state(sock, SOCK_INITIALIZED);
1472
1473 sock->recvbuf.len = 65536;
1474 sock->recvbuf.consume_position = sock->recvbuf.base;
1475 sock->recvbuf.remaining = 0;
1476 sock->recvbuf.base = isc_mem_get(manager->mctx, sock->recvbuf.len); // max buffer size
1477 if (sock->recvbuf.base == NULL) {
1478 sock->magic = 0;
1479 goto error;
1480 }
1481
1482 /*
1483 * initialize the lock
1484 */
1485 result = isc_mutex_init(&sock->lock);
1486 if (result != ISC_R_SUCCESS) {
1487 sock->magic = 0;
1488 isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len);
1489 sock->recvbuf.base = NULL;
1490 goto error;
1491 }
1492
1493 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1494 "allocated");
1495
1496 sock->magic = SOCKET_MAGIC;
1497 *socketp = sock;
1498
1499 return (ISC_R_SUCCESS);
1500
1501 error:
1502 isc_mem_put(manager->mctx, sock, sizeof(*sock));
1503
1504 return (result);
1505 }
1506
1507 /*
1508 * Verify that the socket state is consistent.
1509 */
1510 static void
consistent(isc_socket_t * sock)1511 consistent(isc_socket_t *sock) {
1512
1513 isc_socketevent_t *dev;
1514 isc_socket_newconnev_t *nev;
1515 unsigned int count;
1516 char *crash_reason;
1517 isc_boolean_t crash = ISC_FALSE;
1518
1519 REQUIRE(sock->pending_iocp == sock->pending_recv + sock->pending_send
1520 + sock->pending_accept + sock->pending_connect);
1521
1522 dev = ISC_LIST_HEAD(sock->send_list);
1523 count = 0;
1524 while (dev != NULL) {
1525 count++;
1526 dev = ISC_LIST_NEXT(dev, ev_link);
1527 }
1528 if (count > sock->pending_send) {
1529 crash = ISC_TRUE;
1530 crash_reason = "send_list > sock->pending_send";
1531 }
1532
1533 nev = ISC_LIST_HEAD(sock->accept_list);
1534 count = 0;
1535 while (nev != NULL) {
1536 count++;
1537 nev = ISC_LIST_NEXT(nev, ev_link);
1538 }
1539 if (count > sock->pending_accept) {
1540 crash = ISC_TRUE;
1541 crash_reason = "send_list > sock->pending_send";
1542 }
1543
1544 if (crash) {
1545 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1546 ISC_MSG_DESTROYING, "SOCKET INCONSISTENT: %s",
1547 crash_reason);
1548 sock_dump(sock);
1549 INSIST(crash == ISC_FALSE);
1550 }
1551 }
1552
1553 /*
1554 * Maybe free the socket.
1555 *
1556 * This function will verify tht the socket is no longer in use in any way,
1557 * either internally or externally. This is the only place where this
1558 * check is to be made; if some bit of code believes that IT is done with
1559 * the socket (e.g., some reference counter reaches zero), it should call
1560 * this function.
1561 *
1562 * When calling this function, the socket must be locked, and the manager
1563 * must be unlocked.
1564 *
1565 * When this function returns, *socketp will be NULL. No tricks to try
1566 * to hold on to this pointer are allowed.
1567 */
1568 static void
maybe_free_socket(isc_socket_t ** socketp,int lineno)1569 maybe_free_socket(isc_socket_t **socketp, int lineno) {
1570 isc_socket_t *sock = *socketp;
1571 *socketp = NULL;
1572
1573 INSIST(VALID_SOCKET(sock));
1574 CONSISTENT(sock);
1575
1576 if (sock->pending_iocp > 0
1577 || sock->pending_recv > 0
1578 || sock->pending_send > 0
1579 || sock->pending_accept > 0
1580 || sock->references > 0
1581 || sock->pending_connect == 1
1582 || !ISC_LIST_EMPTY(sock->recv_list)
1583 || !ISC_LIST_EMPTY(sock->send_list)
1584 || !ISC_LIST_EMPTY(sock->accept_list)
1585 || sock->fd != INVALID_SOCKET) {
1586 UNLOCK(&sock->lock);
1587 return;
1588 }
1589 UNLOCK(&sock->lock);
1590
1591 free_socket(&sock, lineno);
1592 }
1593
1594 void
free_socket(isc_socket_t ** sockp,int lineno)1595 free_socket(isc_socket_t **sockp, int lineno) {
1596 isc_socketmgr_t *manager;
1597 isc_socket_t *sock = *sockp;
1598 *sockp = NULL;
1599
1600 manager = sock->manager;
1601
1602 /*
1603 * Seems we can free the socket after all.
1604 */
1605 manager = sock->manager;
1606 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1607 ISC_MSG_DESTROYING, "freeing socket line %d fd %d lock %p semaphore %p",
1608 lineno, sock->fd, &sock->lock, sock->lock.LockSemaphore);
1609
1610 sock->magic = 0;
1611 DESTROYLOCK(&sock->lock);
1612
1613 if (sock->recvbuf.base != NULL)
1614 isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len);
1615
1616 LOCK(&manager->lock);
1617 if (ISC_LINK_LINKED(sock, link))
1618 ISC_LIST_UNLINK(manager->socklist, sock, link);
1619 isc_mem_put(manager->mctx, sock, sizeof(*sock));
1620
1621 if (ISC_LIST_EMPTY(manager->socklist))
1622 SIGNAL(&manager->shutdown_ok);
1623 UNLOCK(&manager->lock);
1624 }
1625
1626 /*
1627 * Create a new 'type' socket managed by 'manager'. Events
1628 * will be posted to 'task' and when dispatched 'action' will be
1629 * called with 'arg' as the arg value. The new socket is returned
1630 * in 'socketp'.
1631 */
1632 static isc_result_t
socket_create(isc_socketmgr_t * manager,int pf,isc_sockettype_t type,isc_socket_t ** socketp,isc_socket_t * dup_socket)1633 socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
1634 isc_socket_t **socketp, isc_socket_t *dup_socket)
1635 {
1636 isc_socket_t *sock = NULL;
1637 isc_result_t result;
1638 #if defined(USE_CMSG)
1639 int on = 1;
1640 #endif
1641 #if defined(SO_RCVBUF)
1642 ISC_SOCKADDR_LEN_T optlen;
1643 int size;
1644 #endif
1645 int socket_errno;
1646 char strbuf[ISC_STRERRORSIZE];
1647
1648 REQUIRE(VALID_MANAGER(manager));
1649 REQUIRE(socketp != NULL && *socketp == NULL);
1650 REQUIRE(type != isc_sockettype_fdwatch);
1651
1652 if (dup_socket != NULL)
1653 return (ISC_R_NOTIMPLEMENTED);
1654
1655 result = allocate_socket(manager, type, &sock);
1656 if (result != ISC_R_SUCCESS)
1657 return (result);
1658
1659 sock->pf = pf;
1660 #if 0
1661 if (dup_socket == NULL) {
1662 #endif
1663 switch (type) {
1664 case isc_sockettype_udp:
1665 sock->fd = socket(pf, SOCK_DGRAM, IPPROTO_UDP);
1666 if (sock->fd != INVALID_SOCKET) {
1667 result = connection_reset_fix(sock->fd);
1668 if (result != ISC_R_SUCCESS) {
1669 socket_log(__LINE__, sock,
1670 NULL, EVENT, NULL, 0, 0,
1671 "closed %d %d %d "
1672 "con_reset_fix_failed",
1673 sock->pending_recv,
1674 sock->pending_send,
1675 sock->references);
1676 closesocket(sock->fd);
1677 _set_state(sock, SOCK_CLOSED);
1678 sock->fd = INVALID_SOCKET;
1679 free_socket(&sock, __LINE__);
1680 return (result);
1681 }
1682 }
1683 break;
1684 case isc_sockettype_tcp:
1685 sock->fd = socket(pf, SOCK_STREAM, IPPROTO_TCP);
1686 break;
1687 }
1688 #if 0
1689 } else {
1690 /*
1691 * XXX: dup() is deprecated in windows, use _dup()
1692 * instead. In future we may want to investigate
1693 * WSADuplicateSocket().
1694 */
1695 sock->fd = _dup(dup_socket->fd);
1696 sock->dupped = 1;
1697 sock->bound = dup_socket->bound;
1698 }
1699 #endif
1700
1701 if (sock->fd == INVALID_SOCKET) {
1702 socket_errno = WSAGetLastError();
1703 free_socket(&sock, __LINE__);
1704
1705 switch (socket_errno) {
1706 case WSAEMFILE:
1707 case WSAENOBUFS:
1708 return (ISC_R_NORESOURCES);
1709
1710 case WSAEPROTONOSUPPORT:
1711 case WSAEPFNOSUPPORT:
1712 case WSAEAFNOSUPPORT:
1713 return (ISC_R_FAMILYNOSUPPORT);
1714
1715 default:
1716 isc__strerror(socket_errno, strbuf, sizeof(strbuf));
1717 UNEXPECTED_ERROR(__FILE__, __LINE__,
1718 "socket() %s: %s",
1719 isc_msgcat_get(isc_msgcat,
1720 ISC_MSGSET_GENERAL,
1721 ISC_MSG_FAILED,
1722 "failed"),
1723 strbuf);
1724 return (ISC_R_UNEXPECTED);
1725 }
1726 }
1727
1728 result = make_nonblock(sock->fd);
1729 if (result != ISC_R_SUCCESS) {
1730 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1731 "closed %d %d %d make_nonblock_failed",
1732 sock->pending_recv, sock->pending_send,
1733 sock->references);
1734 closesocket(sock->fd);
1735 sock->fd = INVALID_SOCKET;
1736 free_socket(&sock, __LINE__);
1737 return (result);
1738 }
1739
1740
1741 #if defined(USE_CMSG) || defined(SO_RCVBUF)
1742 if (type == isc_sockettype_udp) {
1743
1744 #if defined(USE_CMSG)
1745 #if defined(ISC_PLATFORM_HAVEIPV6)
1746 #ifdef IPV6_RECVPKTINFO
1747 /* 2292bis */
1748 if ((pf == AF_INET6)
1749 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1750 (char *)&on, sizeof(on)) < 0)) {
1751 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
1752 UNEXPECTED_ERROR(__FILE__, __LINE__,
1753 "setsockopt(%d, IPV6_RECVPKTINFO) "
1754 "%s: %s", sock->fd,
1755 isc_msgcat_get(isc_msgcat,
1756 ISC_MSGSET_GENERAL,
1757 ISC_MSG_FAILED,
1758 "failed"),
1759 strbuf);
1760 }
1761 #else
1762 /* 2292 */
1763 if ((pf == AF_INET6)
1764 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO,
1765 (char *)&on, sizeof(on)) < 0)) {
1766 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
1767 UNEXPECTED_ERROR(__FILE__, __LINE__,
1768 "setsockopt(%d, IPV6_PKTINFO) %s: %s",
1769 sock->fd,
1770 isc_msgcat_get(isc_msgcat,
1771 ISC_MSGSET_GENERAL,
1772 ISC_MSG_FAILED,
1773 "failed"),
1774 strbuf);
1775 }
1776 #endif /* IPV6_RECVPKTINFO */
1777 #ifdef IPV6_USE_MIN_MTU /*2292bis, not too common yet*/
1778 /* use minimum MTU */
1779 if (pf == AF_INET6) {
1780 (void)setsockopt(sock->fd, IPPROTO_IPV6,
1781 IPV6_USE_MIN_MTU,
1782 (char *)&on, sizeof(on));
1783 }
1784 #endif
1785 #endif /* ISC_PLATFORM_HAVEIPV6 */
1786 #endif /* defined(USE_CMSG) */
1787
1788 #if defined(SO_RCVBUF)
1789 optlen = sizeof(size);
1790 if (getsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF,
1791 (char *)&size, &optlen) >= 0 &&
1792 size < RCVBUFSIZE) {
1793 size = RCVBUFSIZE;
1794 (void)setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF,
1795 (char *)&size, sizeof(size));
1796 }
1797 #endif
1798
1799 }
1800 #endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */
1801
1802 _set_state(sock, SOCK_OPEN);
1803 sock->references = 1;
1804 *socketp = sock;
1805
1806 iocompletionport_update(sock);
1807
1808 /*
1809 * Note we don't have to lock the socket like we normally would because
1810 * there are no external references to it yet.
1811 */
1812 LOCK(&manager->lock);
1813 ISC_LIST_APPEND(manager->socklist, sock, link);
1814 InterlockedIncrement(&manager->totalSockets);
1815 UNLOCK(&manager->lock);
1816
1817 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat,
1818 ISC_MSGSET_SOCKET, ISC_MSG_CREATED,
1819 "created %u type %u", sock->fd, type);
1820
1821 return (ISC_R_SUCCESS);
1822 }
1823
1824 isc_result_t
isc__socket_create(isc_socketmgr_t * manager,int pf,isc_sockettype_t type,isc_socket_t ** socketp)1825 isc__socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
1826 isc_socket_t **socketp)
1827 {
1828 return (socket_create(manager, pf, type, socketp, NULL));
1829 }
1830
1831 isc_result_t
isc__socket_dup(isc_socket_t * sock,isc_socket_t ** socketp)1832 isc__socket_dup(isc_socket_t *sock, isc_socket_t **socketp) {
1833 REQUIRE(VALID_SOCKET(sock));
1834 REQUIRE(socketp != NULL && *socketp == NULL);
1835
1836 #if 1
1837 return (ISC_R_NOTIMPLEMENTED);
1838 #else
1839 return (socket_create(sock->manager, sock->pf, sock->type,
1840 socketp, sock));
1841 #endif
1842 }
1843
1844 isc_result_t
isc_socket_open(isc_socket_t * sock)1845 isc_socket_open(isc_socket_t *sock) {
1846 REQUIRE(VALID_SOCKET(sock));
1847 REQUIRE(sock->type != isc_sockettype_fdwatch);
1848
1849 return (ISC_R_NOTIMPLEMENTED);
1850 }
1851
1852 /*
1853 * Attach to a socket. Caller must explicitly detach when it is done.
1854 */
1855 void
isc__socket_attach(isc_socket_t * sock,isc_socket_t ** socketp)1856 isc__socket_attach(isc_socket_t *sock, isc_socket_t **socketp) {
1857 REQUIRE(VALID_SOCKET(sock));
1858 REQUIRE(socketp != NULL && *socketp == NULL);
1859
1860 LOCK(&sock->lock);
1861 CONSISTENT(sock);
1862 sock->references++;
1863 UNLOCK(&sock->lock);
1864
1865 *socketp = sock;
1866 }
1867
1868 /*
1869 * Dereference a socket. If this is the last reference to it, clean things
1870 * up by destroying the socket.
1871 */
1872 void
isc__socket_detach(isc_socket_t ** socketp)1873 isc__socket_detach(isc_socket_t **socketp) {
1874 isc_socket_t *sock;
1875 isc_boolean_t kill_socket = ISC_FALSE;
1876
1877 REQUIRE(socketp != NULL);
1878 sock = *socketp;
1879 REQUIRE(VALID_SOCKET(sock));
1880 REQUIRE(sock->type != isc_sockettype_fdwatch);
1881
1882 LOCK(&sock->lock);
1883 CONSISTENT(sock);
1884 REQUIRE(sock->references > 0);
1885 sock->references--;
1886
1887 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1888 "detach_socket %d %d %d",
1889 sock->pending_recv, sock->pending_send,
1890 sock->references);
1891
1892 if (sock->references == 0 && sock->fd != INVALID_SOCKET) {
1893 closesocket(sock->fd);
1894 sock->fd = INVALID_SOCKET;
1895 _set_state(sock, SOCK_CLOSED);
1896 }
1897
1898 maybe_free_socket(&sock, __LINE__);
1899
1900 *socketp = NULL;
1901 }
1902
1903 isc_result_t
isc_socket_close(isc_socket_t * sock)1904 isc_socket_close(isc_socket_t *sock) {
1905 REQUIRE(VALID_SOCKET(sock));
1906 REQUIRE(sock->type != isc_sockettype_fdwatch);
1907
1908 return (ISC_R_NOTIMPLEMENTED);
1909 }
1910
1911 /*
1912 * Dequeue an item off the given socket's read queue, set the result code
1913 * in the done event to the one provided, and send it to the task it was
1914 * destined for.
1915 *
1916 * If the event to be sent is on a list, remove it before sending. If
1917 * asked to, send and detach from the task as well.
1918 *
1919 * Caller must have the socket locked if the event is attached to the socket.
1920 */
1921 static void
send_recvdone_event(isc_socket_t * sock,isc_socketevent_t ** dev)1922 send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1923 isc_task_t *task;
1924
1925 task = (*dev)->ev_sender;
1926 (*dev)->ev_sender = sock;
1927
1928 if (ISC_LINK_LINKED(*dev, ev_link))
1929 ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link);
1930
1931 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
1932 == ISC_SOCKEVENTATTR_ATTACHED)
1933 isc_task_sendanddetach(&task, (isc_event_t **)dev);
1934 else
1935 isc_task_send(task, (isc_event_t **)dev);
1936
1937 CONSISTENT(sock);
1938 }
1939
1940 /*
1941 * See comments for send_recvdone_event() above.
1942 */
1943 static void
send_senddone_event(isc_socket_t * sock,isc_socketevent_t ** dev)1944 send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1945 isc_task_t *task;
1946
1947 INSIST(dev != NULL && *dev != NULL);
1948
1949 task = (*dev)->ev_sender;
1950 (*dev)->ev_sender = sock;
1951
1952 if (ISC_LINK_LINKED(*dev, ev_link))
1953 ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link);
1954
1955 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
1956 == ISC_SOCKEVENTATTR_ATTACHED)
1957 isc_task_sendanddetach(&task, (isc_event_t **)dev);
1958 else
1959 isc_task_send(task, (isc_event_t **)dev);
1960
1961 CONSISTENT(sock);
1962 }
1963
1964 /*
1965 * See comments for send_recvdone_event() above.
1966 */
1967 static void
send_acceptdone_event(isc_socket_t * sock,isc_socket_newconnev_t ** adev)1968 send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev) {
1969 isc_task_t *task;
1970
1971 INSIST(adev != NULL && *adev != NULL);
1972
1973 task = (*adev)->ev_sender;
1974 (*adev)->ev_sender = sock;
1975
1976 if (ISC_LINK_LINKED(*adev, ev_link))
1977 ISC_LIST_DEQUEUE(sock->accept_list, *adev, ev_link);
1978
1979 isc_task_sendanddetach(&task, (isc_event_t **)adev);
1980
1981 CONSISTENT(sock);
1982 }
1983
1984 /*
1985 * See comments for send_recvdone_event() above.
1986 */
1987 static void
send_connectdone_event(isc_socket_t * sock,isc_socket_connev_t ** cdev)1988 send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev) {
1989 isc_task_t *task;
1990
1991 INSIST(cdev != NULL && *cdev != NULL);
1992
1993 task = (*cdev)->ev_sender;
1994 (*cdev)->ev_sender = sock;
1995
1996 sock->connect_ev = NULL;
1997
1998 isc_task_sendanddetach(&task, (isc_event_t **)cdev);
1999
2000 CONSISTENT(sock);
2001 }
2002
2003 /*
2004 * On entry to this function, the event delivered is the internal
2005 * readable event, and the first item on the accept_list should be
2006 * the done event we want to send. If the list is empty, this is a no-op,
2007 * so just close the new connection, unlock, and return.
2008 *
2009 * Note the socket is locked before entering here
2010 */
2011 static void
internal_accept(isc_socket_t * sock,IoCompletionInfo * lpo,int accept_errno)2012 internal_accept(isc_socket_t *sock, IoCompletionInfo *lpo, int accept_errno) {
2013 isc_socket_newconnev_t *adev;
2014 isc_result_t result = ISC_R_SUCCESS;
2015 isc_socket_t *nsock;
2016 struct sockaddr *localaddr;
2017 int localaddr_len = sizeof(*localaddr);
2018 struct sockaddr *remoteaddr;
2019 int remoteaddr_len = sizeof(*remoteaddr);
2020
2021 INSIST(VALID_SOCKET(sock));
2022 LOCK(&sock->lock);
2023 CONSISTENT(sock);
2024
2025 socket_log(__LINE__, sock, NULL, TRACE,
2026 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2027 "internal_accept called");
2028
2029 INSIST(sock->listener);
2030
2031 INSIST(sock->pending_iocp > 0);
2032 sock->pending_iocp--;
2033 INSIST(sock->pending_accept > 0);
2034 sock->pending_accept--;
2035
2036 adev = lpo->adev;
2037
2038 /*
2039 * If the event is no longer in the list we can just return.
2040 */
2041 if (!acceptdone_is_active(sock, adev))
2042 goto done;
2043
2044 nsock = adev->newsocket;
2045
2046 /*
2047 * Pull off the done event.
2048 */
2049 ISC_LIST_UNLINK(sock->accept_list, adev, ev_link);
2050
2051 /*
2052 * Extract the addresses from the socket, copy them into the structure,
2053 * and return the new socket.
2054 */
2055 ISCGetAcceptExSockaddrs(lpo->acceptbuffer, 0,
2056 sizeof(SOCKADDR_STORAGE) + 16, sizeof(SOCKADDR_STORAGE) + 16,
2057 (LPSOCKADDR *)&localaddr, &localaddr_len,
2058 (LPSOCKADDR *)&remoteaddr, &remoteaddr_len);
2059 memcpy(&adev->address.type, remoteaddr, remoteaddr_len);
2060 adev->address.length = remoteaddr_len;
2061 nsock->address = adev->address;
2062 nsock->pf = adev->address.type.sa.sa_family;
2063
2064 socket_log(__LINE__, nsock, &nsock->address, TRACE,
2065 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2066 "internal_accept parent %p", sock);
2067
2068 result = make_nonblock(adev->newsocket->fd);
2069 INSIST(result == ISC_R_SUCCESS);
2070
2071 INSIST(setsockopt(nsock->fd, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT,
2072 (char *)&sock->fd, sizeof(sock->fd)) == 0);
2073
2074 /*
2075 * Hook it up into the manager.
2076 */
2077 nsock->bound = 1;
2078 nsock->connected = 1;
2079 _set_state(nsock, SOCK_OPEN);
2080
2081 LOCK(&nsock->manager->lock);
2082 ISC_LIST_APPEND(nsock->manager->socklist, nsock, link);
2083 InterlockedIncrement(&nsock->manager->totalSockets);
2084 UNLOCK(&nsock->manager->lock);
2085
2086 socket_log(__LINE__, sock, &nsock->address, CREATION,
2087 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
2088 "accepted_connection new_socket %p fd %d",
2089 nsock, nsock->fd);
2090
2091 adev->result = result;
2092 send_acceptdone_event(sock, &adev);
2093
2094 done:
2095 CONSISTENT(sock);
2096 UNLOCK(&sock->lock);
2097
2098 HeapFree(hHeapHandle, 0, lpo->acceptbuffer);
2099 lpo->acceptbuffer = NULL;
2100 }
2101
2102 /*
2103 * Called when a socket with a pending connect() finishes.
2104 * Note that the socket is locked before entering.
2105 */
2106 static void
internal_connect(isc_socket_t * sock,IoCompletionInfo * lpo,int connect_errno)2107 internal_connect(isc_socket_t *sock, IoCompletionInfo *lpo, int connect_errno) {
2108 isc_socket_connev_t *cdev;
2109 char strbuf[ISC_STRERRORSIZE];
2110
2111 INSIST(VALID_SOCKET(sock));
2112
2113 LOCK(&sock->lock);
2114
2115 INSIST(sock->pending_iocp > 0);
2116 sock->pending_iocp--;
2117 INSIST(sock->pending_connect == 1);
2118 sock->pending_connect = 0;
2119
2120 /*
2121 * Has this event been canceled?
2122 */
2123 cdev = lpo->cdev;
2124 if (!connectdone_is_active(sock, cdev)) {
2125 sock->pending_connect = 0;
2126 if (sock->fd != INVALID_SOCKET) {
2127 closesocket(sock->fd);
2128 sock->fd = INVALID_SOCKET;
2129 _set_state(sock, SOCK_CLOSED);
2130 }
2131 CONSISTENT(sock);
2132 UNLOCK(&sock->lock);
2133 return;
2134 }
2135
2136 /*
2137 * Check possible Windows network event error status here.
2138 */
2139 if (connect_errno != 0) {
2140 /*
2141 * If the error is SOFT, just try again on this
2142 * fd and pretend nothing strange happened.
2143 */
2144 if (SOFT_ERROR(connect_errno) ||
2145 connect_errno == WSAEINPROGRESS) {
2146 sock->pending_connect = 1;
2147 CONSISTENT(sock);
2148 UNLOCK(&sock->lock);
2149 return;
2150 }
2151
2152 /*
2153 * Translate other errors into ISC_R_* flavors.
2154 */
2155 switch (connect_errno) {
2156 #define ERROR_MATCH(a, b) case a: cdev->result = b; break;
2157 ERROR_MATCH(WSAEACCES, ISC_R_NOPERM);
2158 ERROR_MATCH(WSAEADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
2159 ERROR_MATCH(WSAEAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
2160 ERROR_MATCH(WSAECONNREFUSED, ISC_R_CONNREFUSED);
2161 ERROR_MATCH(WSAEHOSTUNREACH, ISC_R_HOSTUNREACH);
2162 ERROR_MATCH(WSAEHOSTDOWN, ISC_R_HOSTDOWN);
2163 ERROR_MATCH(WSAENETUNREACH, ISC_R_NETUNREACH);
2164 ERROR_MATCH(WSAENETDOWN, ISC_R_NETDOWN);
2165 ERROR_MATCH(WSAENOBUFS, ISC_R_NORESOURCES);
2166 ERROR_MATCH(WSAECONNRESET, ISC_R_CONNECTIONRESET);
2167 ERROR_MATCH(WSAECONNABORTED, ISC_R_CONNECTIONRESET);
2168 ERROR_MATCH(WSAETIMEDOUT, ISC_R_TIMEDOUT);
2169 #undef ERROR_MATCH
2170 default:
2171 cdev->result = ISC_R_UNEXPECTED;
2172 isc__strerror(connect_errno, strbuf, sizeof(strbuf));
2173 UNEXPECTED_ERROR(__FILE__, __LINE__,
2174 "internal_connect: connect() %s",
2175 strbuf);
2176 }
2177 } else {
2178 INSIST(setsockopt(sock->fd, SOL_SOCKET,
2179 SO_UPDATE_CONNECT_CONTEXT, NULL, 0) == 0);
2180 cdev->result = ISC_R_SUCCESS;
2181 sock->connected = 1;
2182 socket_log(__LINE__, sock, &sock->address, IOEVENT,
2183 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
2184 "internal_connect: success");
2185 }
2186
2187 send_connectdone_event(sock, &cdev);
2188
2189 UNLOCK(&sock->lock);
2190 }
2191
2192 /*
2193 * Loop through the socket, returning ISC_R_EOF for each done event pending.
2194 */
2195 static void
send_recvdone_abort(isc_socket_t * sock,isc_result_t result)2196 send_recvdone_abort(isc_socket_t *sock, isc_result_t result) {
2197 isc_socketevent_t *dev;
2198
2199 while (!ISC_LIST_EMPTY(sock->recv_list)) {
2200 dev = ISC_LIST_HEAD(sock->recv_list);
2201 dev->result = result;
2202 send_recvdone_event(sock, &dev);
2203 }
2204 }
2205
2206 /*
2207 * Take the data we received in our private buffer, and if any recv() calls on
2208 * our list are satisfied, send the corresponding done event.
2209 *
2210 * If we need more data (there are still items on the recv_list after we consume all
2211 * our data) then arrange for another system recv() call to fill our buffers.
2212 */
2213 static void
internal_recv(isc_socket_t * sock,int nbytes)2214 internal_recv(isc_socket_t *sock, int nbytes)
2215 {
2216 INSIST(VALID_SOCKET(sock));
2217
2218 LOCK(&sock->lock);
2219 CONSISTENT(sock);
2220
2221 socket_log(__LINE__, sock, NULL, IOEVENT,
2222 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALRECV,
2223 "internal_recv: %d bytes received", nbytes);
2224
2225 /*
2226 * If we got here, the I/O operation succeeded. However, we might still have removed this
2227 * event from our notification list (or never placed it on it due to immediate completion.)
2228 * Handle the reference counting here, and handle the cancellation event just after.
2229 */
2230 INSIST(sock->pending_iocp > 0);
2231 sock->pending_iocp--;
2232 INSIST(sock->pending_recv > 0);
2233 sock->pending_recv--;
2234
2235 /*
2236 * The only way we could have gotten here is that our I/O has successfully completed.
2237 * Update our pointers, and move on. The only odd case here is that we might not
2238 * have received enough data on a TCP stream to satisfy the minimum requirements. If
2239 * this is the case, we will re-issue the recv() call for what we need.
2240 *
2241 * We do check for a recv() of 0 bytes on a TCP stream. This means the remote end
2242 * has closed.
2243 */
2244 if (nbytes == 0 && sock->type == isc_sockettype_tcp) {
2245 send_recvdone_abort(sock, ISC_R_EOF);
2246 maybe_free_socket(&sock, __LINE__);
2247 return;
2248 }
2249 sock->recvbuf.remaining = nbytes;
2250 sock->recvbuf.consume_position = sock->recvbuf.base;
2251 completeio_recv(sock);
2252
2253 /*
2254 * If there are more receivers waiting for data, queue another receive
2255 * here.
2256 */
2257 queue_receive_request(sock);
2258
2259 /*
2260 * Unlock and/or destroy if we are the last thing this socket has left to do.
2261 */
2262 maybe_free_socket(&sock, __LINE__);
2263 }
2264
2265 static void
internal_send(isc_socket_t * sock,isc_socketevent_t * dev,struct msghdr * messagehdr,int nbytes,int send_errno,IoCompletionInfo * lpo)2266 internal_send(isc_socket_t *sock, isc_socketevent_t *dev,
2267 struct msghdr *messagehdr, int nbytes, int send_errno, IoCompletionInfo *lpo)
2268 {
2269 buflist_t *buffer;
2270
2271 /*
2272 * Find out what socket this is and lock it.
2273 */
2274 INSIST(VALID_SOCKET(sock));
2275
2276 LOCK(&sock->lock);
2277 CONSISTENT(sock);
2278
2279 socket_log(__LINE__, sock, NULL, IOEVENT,
2280 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALSEND,
2281 "internal_send: task got socket event %p", dev);
2282
2283 buffer = ISC_LIST_HEAD(lpo->bufferlist);
2284 while (buffer != NULL) {
2285 ISC_LIST_DEQUEUE(lpo->bufferlist, buffer, link);
2286
2287 socket_log(__LINE__, sock, NULL, TRACE,
2288 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2289 "free_buffer %p %p", buffer, buffer->buf);
2290
2291 HeapFree(hHeapHandle, 0, buffer->buf);
2292 HeapFree(hHeapHandle, 0, buffer);
2293 buffer = ISC_LIST_HEAD(lpo->bufferlist);
2294 }
2295
2296 INSIST(sock->pending_iocp > 0);
2297 sock->pending_iocp--;
2298 INSIST(sock->pending_send > 0);
2299 sock->pending_send--;
2300
2301 /* If the event is no longer in the list we can just return */
2302 if (!senddone_is_active(sock, dev))
2303 goto done;
2304
2305 /*
2306 * Set the error code and send things on its way.
2307 */
2308 switch (completeio_send(sock, dev, messagehdr, nbytes, send_errno)) {
2309 case DOIO_SOFT:
2310 break;
2311 case DOIO_HARD:
2312 case DOIO_SUCCESS:
2313 send_senddone_event(sock, &dev);
2314 break;
2315 }
2316
2317 done:
2318 maybe_free_socket(&sock, __LINE__);
2319 }
2320
2321 /*
2322 * These return if the done event passed in is on the list (or for connect, is
2323 * the one we're waiting for. Using these ensures we will not double-send an
2324 * event.
2325 */
2326 static isc_boolean_t
senddone_is_active(isc_socket_t * sock,isc_socketevent_t * dev)2327 senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev)
2328 {
2329 isc_socketevent_t *ldev;
2330
2331 ldev = ISC_LIST_HEAD(sock->send_list);
2332 while (ldev != NULL && ldev != dev)
2333 ldev = ISC_LIST_NEXT(ldev, ev_link);
2334
2335 return (ldev == NULL ? ISC_FALSE : ISC_TRUE);
2336 }
2337
2338 static isc_boolean_t
acceptdone_is_active(isc_socket_t * sock,isc_socket_newconnev_t * dev)2339 acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev)
2340 {
2341 isc_socket_newconnev_t *ldev;
2342
2343 ldev = ISC_LIST_HEAD(sock->accept_list);
2344 while (ldev != NULL && ldev != dev)
2345 ldev = ISC_LIST_NEXT(ldev, ev_link);
2346
2347 return (ldev == NULL ? ISC_FALSE : ISC_TRUE);
2348 }
2349
2350 static isc_boolean_t
connectdone_is_active(isc_socket_t * sock,isc_socket_connev_t * dev)2351 connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev)
2352 {
2353 return (sock->connect_ev == dev ? ISC_TRUE : ISC_FALSE);
2354 }
2355
2356 //
2357 // The Windows network stack seems to have two very distinct paths depending
2358 // on what is installed. Specifically, if something is looking at network
2359 // connections (like an anti-virus or anti-malware application, such as
2360 // McAfee products) Windows may return additional error conditions which
2361 // were not previously returned.
2362 //
2363 // One specific one is when a TCP SYN scan is used. In this situation,
2364 // Windows responds with the SYN-ACK, but the scanner never responds with
2365 // the 3rd packet, the ACK. Windows consiers this a partially open connection.
2366 // Most Unix networking stacks, and Windows without McAfee installed, will
2367 // not return this to the caller. However, with this product installed,
2368 // Windows returns this as a failed status on the Accept() call. Here, we
2369 // will just re-issue the ISCAcceptEx() call as if nothing had happened.
2370 //
2371 // This code should only be called when the listening socket has received
2372 // such an error. Additionally, the "parent" socket must be locked.
2373 // Additionally, the lpo argument is re-used here, and must not be freed
2374 // by the caller.
2375 //
2376 static isc_result_t
restart_accept(isc_socket_t * parent,IoCompletionInfo * lpo)2377 restart_accept(isc_socket_t *parent, IoCompletionInfo *lpo)
2378 {
2379 isc_socket_t *nsock = lpo->adev->newsocket;
2380 SOCKET new_fd;
2381
2382 /*
2383 * AcceptEx() requires we pass in a socket. Note that we carefully
2384 * do not close the previous socket in case of an error message returned by
2385 * our new socket() call. If we return an error here, our caller will
2386 * clean up.
2387 */
2388 new_fd = socket(parent->pf, SOCK_STREAM, IPPROTO_TCP);
2389 if (nsock->fd == INVALID_SOCKET) {
2390 return (ISC_R_FAILURE); // parent will ask windows for error message
2391 }
2392 closesocket(nsock->fd);
2393 nsock->fd = new_fd;
2394
2395 memset(&lpo->overlapped, 0, sizeof(lpo->overlapped));
2396
2397 ISCAcceptEx(parent->fd,
2398 nsock->fd, /* Accepted Socket */
2399 lpo->acceptbuffer, /* Buffer for initial Recv */
2400 0, /* Length of Buffer */
2401 sizeof(SOCKADDR_STORAGE) + 16, /* Local address length + 16 */
2402 sizeof(SOCKADDR_STORAGE) + 16, /* Remote address lengh + 16 */
2403 (LPDWORD)&lpo->received_bytes, /* Bytes Recved */
2404 (LPOVERLAPPED)lpo /* Overlapped structure */
2405 );
2406
2407 InterlockedDecrement(&nsock->manager->iocp_total);
2408 iocompletionport_update(nsock);
2409
2410 return (ISC_R_SUCCESS);
2411 }
2412
2413 /*
2414 * This is the I/O Completion Port Worker Function. It loops forever
2415 * waiting for I/O to complete and then forwards them for further
2416 * processing. There are a number of these in separate threads.
2417 */
2418 static isc_threadresult_t WINAPI
SocketIoThread(LPVOID ThreadContext)2419 SocketIoThread(LPVOID ThreadContext) {
2420 isc_socketmgr_t *manager = ThreadContext;
2421 BOOL bSuccess = FALSE;
2422 DWORD nbytes;
2423 IoCompletionInfo *lpo = NULL;
2424 isc_socket_t *sock = NULL;
2425 int request;
2426 struct msghdr *messagehdr = NULL;
2427 int errval;
2428 char strbuf[ISC_STRERRORSIZE];
2429 int errstatus;
2430
2431 REQUIRE(VALID_MANAGER(manager));
2432
2433 /*
2434 * Set the thread priority high enough so I/O will
2435 * preempt normal recv packet processing, but not
2436 * higher than the timer sync thread.
2437 */
2438 if (!SetThreadPriority(GetCurrentThread(),
2439 THREAD_PRIORITY_ABOVE_NORMAL)) {
2440 errval = GetLastError();
2441 isc__strerror(errval, strbuf, sizeof(strbuf));
2442 FATAL_ERROR(__FILE__, __LINE__,
2443 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2444 ISC_MSG_FAILED,
2445 "Can't set thread priority: %s"),
2446 strbuf);
2447 }
2448
2449 /*
2450 * Loop forever waiting on I/O Completions and then processing them
2451 */
2452 while (TRUE) {
2453 wait_again:
2454 bSuccess = GetQueuedCompletionStatus(manager->hIoCompletionPort,
2455 &nbytes, (LPDWORD)&sock,
2456 (LPWSAOVERLAPPED *)&lpo,
2457 INFINITE);
2458 if (lpo == NULL) /* Received request to exit */
2459 break;
2460
2461 REQUIRE(VALID_SOCKET(sock));
2462
2463 request = lpo->request_type;
2464
2465 errstatus = 0;
2466 if (!bSuccess) {
2467 isc_result_t isc_result;
2468
2469 /*
2470 * Did the I/O operation complete?
2471 */
2472 errstatus = GetLastError();
2473 isc_result = isc__errno2resultx(errstatus, __FILE__, __LINE__);
2474
2475 LOCK(&sock->lock);
2476 CONSISTENT(sock);
2477 switch (request) {
2478 case SOCKET_RECV:
2479 INSIST(sock->pending_iocp > 0);
2480 sock->pending_iocp--;
2481 INSIST(sock->pending_recv > 0);
2482 sock->pending_recv--;
2483 if (!sock->connected &&
2484 ((errstatus == ERROR_HOST_UNREACHABLE) ||
2485 (errstatus == WSAENETRESET) ||
2486 (errstatus == WSAECONNRESET))) {
2487 /* ignore soft errors */
2488 queue_receive_request(sock);
2489 break;
2490 }
2491 send_recvdone_abort(sock, isc_result);
2492 if (isc_result == ISC_R_UNEXPECTED) {
2493 UNEXPECTED_ERROR(__FILE__, __LINE__,
2494 "SOCKET_RECV: Windows error code: %d, returning ISC error %d",
2495 errstatus, isc_result);
2496 }
2497 break;
2498
2499 case SOCKET_SEND:
2500 INSIST(sock->pending_iocp > 0);
2501 sock->pending_iocp--;
2502 INSIST(sock->pending_send > 0);
2503 sock->pending_send--;
2504 if (senddone_is_active(sock, lpo->dev)) {
2505 lpo->dev->result = isc_result;
2506 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2507 "canceled_send");
2508 send_senddone_event(sock, &lpo->dev);
2509 }
2510 break;
2511
2512 case SOCKET_ACCEPT:
2513 INSIST(sock->pending_iocp > 0);
2514 INSIST(sock->pending_accept > 0);
2515
2516 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2517 "Accept: errstatus=%d isc_result=%d", errstatus, isc_result);
2518
2519 if (acceptdone_is_active(sock, lpo->adev)) {
2520 if (restart_accept(sock, lpo) == ISC_R_SUCCESS) {
2521 UNLOCK(&sock->lock);
2522 goto wait_again;
2523 } else {
2524 errstatus = GetLastError();
2525 isc_result = isc__errno2resultx(errstatus, __FILE__, __LINE__);
2526 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2527 "restart_accept() failed: errstatus=%d isc_result=%d",
2528 errstatus, isc_result);
2529 }
2530 }
2531
2532 sock->pending_iocp--;
2533 sock->pending_accept--;
2534 if (acceptdone_is_active(sock, lpo->adev)) {
2535 closesocket(lpo->adev->newsocket->fd);
2536 lpo->adev->newsocket->fd = INVALID_SOCKET;
2537 lpo->adev->newsocket->references--;
2538 free_socket(&lpo->adev->newsocket, __LINE__);
2539 lpo->adev->result = isc_result;
2540 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2541 "canceled_accept");
2542 send_acceptdone_event(sock, &lpo->adev);
2543 }
2544 break;
2545
2546 case SOCKET_CONNECT:
2547 INSIST(sock->pending_iocp > 0);
2548 sock->pending_iocp--;
2549 INSIST(sock->pending_connect == 1);
2550 sock->pending_connect = 0;
2551 if (connectdone_is_active(sock, lpo->cdev)) {
2552 lpo->cdev->result = isc_result;
2553 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2554 "canceled_connect");
2555 send_connectdone_event(sock, &lpo->cdev);
2556 }
2557 break;
2558 }
2559 maybe_free_socket(&sock, __LINE__);
2560
2561 if (lpo != NULL)
2562 HeapFree(hHeapHandle, 0, lpo);
2563 continue;
2564 }
2565
2566 messagehdr = &lpo->messagehdr;
2567
2568 switch (request) {
2569 case SOCKET_RECV:
2570 internal_recv(sock, nbytes);
2571 break;
2572 case SOCKET_SEND:
2573 internal_send(sock, lpo->dev, messagehdr, nbytes, errstatus, lpo);
2574 break;
2575 case SOCKET_ACCEPT:
2576 internal_accept(sock, lpo, errstatus);
2577 break;
2578 case SOCKET_CONNECT:
2579 internal_connect(sock, lpo, errstatus);
2580 break;
2581 }
2582
2583 if (lpo != NULL)
2584 HeapFree(hHeapHandle, 0, lpo);
2585 }
2586
2587 /*
2588 * Exit Completion Port Thread
2589 */
2590 manager_log(manager, TRACE,
2591 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2592 ISC_MSG_EXITING, "SocketIoThread exiting"));
2593 return ((isc_threadresult_t)0);
2594 }
2595
2596 /*
2597 * Create a new socket manager.
2598 */
2599 isc_result_t
isc__socketmgr_create(isc_mem_t * mctx,isc_socketmgr_t ** managerp)2600 isc__socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
2601 return (isc_socketmgr_create2(mctx, managerp, 0));
2602 }
2603
2604 isc_result_t
isc__socketmgr_create2(isc_mem_t * mctx,isc_socketmgr_t ** managerp,unsigned int maxsocks)2605 isc__socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp,
2606 unsigned int maxsocks)
2607 {
2608 isc_socketmgr_t *manager;
2609 isc_result_t result;
2610
2611 REQUIRE(managerp != NULL && *managerp == NULL);
2612
2613 if (maxsocks != 0)
2614 return (ISC_R_NOTIMPLEMENTED);
2615
2616 manager = isc_mem_get(mctx, sizeof(*manager));
2617 if (manager == NULL)
2618 return (ISC_R_NOMEMORY);
2619
2620 InitSockets();
2621
2622 manager->magic = SOCKET_MANAGER_MAGIC;
2623 manager->mctx = NULL;
2624 manager->stats = NULL;
2625 ISC_LIST_INIT(manager->socklist);
2626 result = isc_mutex_init(&manager->lock);
2627 if (result != ISC_R_SUCCESS) {
2628 isc_mem_put(mctx, manager, sizeof(*manager));
2629 return (result);
2630 }
2631 if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) {
2632 DESTROYLOCK(&manager->lock);
2633 isc_mem_put(mctx, manager, sizeof(*manager));
2634 UNEXPECTED_ERROR(__FILE__, __LINE__,
2635 "isc_condition_init() %s",
2636 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2637 ISC_MSG_FAILED, "failed"));
2638 return (ISC_R_UNEXPECTED);
2639 }
2640
2641 isc_mem_attach(mctx, &manager->mctx);
2642
2643 iocompletionport_init(manager); /* Create the Completion Ports */
2644
2645 manager->bShutdown = ISC_FALSE;
2646 manager->totalSockets = 0;
2647 manager->iocp_total = 0;
2648
2649 *managerp = manager;
2650
2651 return (ISC_R_SUCCESS);
2652 }
2653
2654 isc_result_t
isc__socketmgr_getmaxsockets(isc_socketmgr_t * manager,unsigned int * nsockp)2655 isc__socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp) {
2656 REQUIRE(VALID_MANAGER(manager));
2657 REQUIRE(nsockp != NULL);
2658
2659 return (ISC_R_NOTIMPLEMENTED);
2660 }
2661
2662 void
isc__socketmgr_setstats(isc_socketmgr_t * manager,isc_stats_t * stats)2663 isc__socketmgr_setstats(isc_socketmgr_t *manager, isc_stats_t *stats) {
2664 REQUIRE(VALID_MANAGER(manager));
2665 REQUIRE(ISC_LIST_EMPTY(manager->socklist));
2666 REQUIRE(manager->stats == NULL);
2667 REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max);
2668
2669 isc_stats_attach(stats, &manager->stats);
2670 }
2671
2672 void
isc__socketmgr_destroy(isc_socketmgr_t ** managerp)2673 isc__socketmgr_destroy(isc_socketmgr_t **managerp) {
2674 isc_socketmgr_t *manager;
2675 int i;
2676 isc_mem_t *mctx;
2677
2678 /*
2679 * Destroy a socket manager.
2680 */
2681
2682 REQUIRE(managerp != NULL);
2683 manager = *managerp;
2684 REQUIRE(VALID_MANAGER(manager));
2685
2686 LOCK(&manager->lock);
2687
2688 /*
2689 * Wait for all sockets to be destroyed.
2690 */
2691 while (!ISC_LIST_EMPTY(manager->socklist)) {
2692 manager_log(manager, CREATION,
2693 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2694 ISC_MSG_SOCKETSREMAIN,
2695 "sockets exist"));
2696 WAIT(&manager->shutdown_ok, &manager->lock);
2697 }
2698
2699 UNLOCK(&manager->lock);
2700
2701 /*
2702 * Here, we need to had some wait code for the completion port
2703 * thread.
2704 */
2705 signal_iocompletionport_exit(manager);
2706 manager->bShutdown = ISC_TRUE;
2707
2708 /*
2709 * Wait for threads to exit.
2710 */
2711 for (i = 0; i < manager->maxIOCPThreads; i++) {
2712 if (isc_thread_join((isc_thread_t) manager->hIOCPThreads[i],
2713 NULL) != ISC_R_SUCCESS)
2714 UNEXPECTED_ERROR(__FILE__, __LINE__,
2715 "isc_thread_join() for Completion Port %s",
2716 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2717 ISC_MSG_FAILED, "failed"));
2718 }
2719 /*
2720 * Clean up.
2721 */
2722
2723 CloseHandle(manager->hIoCompletionPort);
2724
2725 (void)isc_condition_destroy(&manager->shutdown_ok);
2726
2727 DESTROYLOCK(&manager->lock);
2728 if (manager->stats != NULL)
2729 isc_stats_detach(&manager->stats);
2730 manager->magic = 0;
2731 mctx= manager->mctx;
2732 isc_mem_put(mctx, manager, sizeof(*manager));
2733
2734 isc_mem_detach(&mctx);
2735
2736 *managerp = NULL;
2737 }
2738
2739 static void
queue_receive_event(isc_socket_t * sock,isc_task_t * task,isc_socketevent_t * dev)2740 queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev)
2741 {
2742 isc_task_t *ntask = NULL;
2743
2744 isc_task_attach(task, &ntask);
2745 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
2746
2747 /*
2748 * Enqueue the request.
2749 */
2750 INSIST(!ISC_LINK_LINKED(dev, ev_link));
2751 ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link);
2752
2753 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2754 "queue_receive_event: event %p -> task %p",
2755 dev, ntask);
2756 }
2757
2758 /*
2759 * Check the pending receive queue, and if we have data pending, give it to this
2760 * caller. If we have none, queue an I/O request. If this caller is not the first
2761 * on the list, then we will just queue this event and return.
2762 *
2763 * Caller must have the socket locked.
2764 */
2765 static isc_result_t
socket_recv(isc_socket_t * sock,isc_socketevent_t * dev,isc_task_t * task,unsigned int flags)2766 socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2767 unsigned int flags)
2768 {
2769 int cc = 0;
2770 isc_task_t *ntask = NULL;
2771 isc_result_t result = ISC_R_SUCCESS;
2772 int recv_errno = 0;
2773
2774 dev->ev_sender = task;
2775
2776 if (sock->fd == INVALID_SOCKET)
2777 return (ISC_R_EOF);
2778
2779 /*
2780 * Queue our event on the list of things to do. Call our function to
2781 * attempt to fill buffers as much as possible, and return done events.
2782 * We are going to lie about our handling of the ISC_SOCKFLAG_IMMEDIATE
2783 * here and tell our caller that we could not satisfy it immediately.
2784 */
2785 queue_receive_event(sock, task, dev);
2786 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
2787 result = ISC_R_INPROGRESS;
2788
2789 completeio_recv(sock);
2790
2791 /*
2792 * If there are more receivers waiting for data, queue another receive
2793 * here. If the
2794 */
2795 queue_receive_request(sock);
2796
2797 return (result);
2798 }
2799
2800 isc_result_t
isc__socket_recvv(isc_socket_t * sock,isc_bufferlist_t * buflist,unsigned int minimum,isc_task_t * task,isc_taskaction_t action,const void * arg)2801 isc__socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist,
2802 unsigned int minimum, isc_task_t *task,
2803 isc_taskaction_t action, const void *arg)
2804 {
2805 isc_socketevent_t *dev;
2806 isc_socketmgr_t *manager;
2807 unsigned int iocount;
2808 isc_buffer_t *buffer;
2809 isc_result_t ret;
2810
2811 REQUIRE(VALID_SOCKET(sock));
2812 LOCK(&sock->lock);
2813 CONSISTENT(sock);
2814
2815 /*
2816 * Make sure that the socket is not closed. XXXMLG change error here?
2817 */
2818 if (sock->fd == INVALID_SOCKET) {
2819 UNLOCK(&sock->lock);
2820 return (ISC_R_CONNREFUSED);
2821 }
2822
2823 REQUIRE(buflist != NULL);
2824 REQUIRE(!ISC_LIST_EMPTY(*buflist));
2825 REQUIRE(task != NULL);
2826 REQUIRE(action != NULL);
2827
2828 manager = sock->manager;
2829 REQUIRE(VALID_MANAGER(manager));
2830
2831 iocount = isc_bufferlist_availablecount(buflist);
2832 REQUIRE(iocount > 0);
2833
2834 INSIST(sock->bound);
2835
2836 dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg);
2837 if (dev == NULL) {
2838 UNLOCK(&sock->lock);
2839 return (ISC_R_NOMEMORY);
2840 }
2841
2842 /*
2843 * UDP sockets are always partial read
2844 */
2845 if (sock->type == isc_sockettype_udp)
2846 dev->minimum = 1;
2847 else {
2848 if (minimum == 0)
2849 dev->minimum = iocount;
2850 else
2851 dev->minimum = minimum;
2852 }
2853
2854 /*
2855 * Move each buffer from the passed in list to our internal one.
2856 */
2857 buffer = ISC_LIST_HEAD(*buflist);
2858 while (buffer != NULL) {
2859 ISC_LIST_DEQUEUE(*buflist, buffer, link);
2860 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
2861 buffer = ISC_LIST_HEAD(*buflist);
2862 }
2863
2864 ret = socket_recv(sock, dev, task, 0);
2865
2866 UNLOCK(&sock->lock);
2867 return (ret);
2868 }
2869
2870 isc_result_t
isc__socket_recv(isc_socket_t * sock,isc_region_t * region,unsigned int minimum,isc_task_t * task,isc_taskaction_t action,const void * arg)2871 isc__socket_recv(isc_socket_t *sock, isc_region_t *region,
2872 unsigned int minimum, isc_task_t *task,
2873 isc_taskaction_t action, const void *arg)
2874 {
2875 isc_socketevent_t *dev;
2876 isc_socketmgr_t *manager;
2877 isc_result_t ret;
2878
2879 REQUIRE(VALID_SOCKET(sock));
2880 LOCK(&sock->lock);
2881 CONSISTENT(sock);
2882
2883 /*
2884 * make sure that the socket's not closed
2885 */
2886 if (sock->fd == INVALID_SOCKET) {
2887 UNLOCK(&sock->lock);
2888 return (ISC_R_CONNREFUSED);
2889 }
2890 REQUIRE(action != NULL);
2891
2892 manager = sock->manager;
2893 REQUIRE(VALID_MANAGER(manager));
2894
2895 INSIST(sock->bound);
2896
2897 dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg);
2898 if (dev == NULL) {
2899 UNLOCK(&sock->lock);
2900 return (ISC_R_NOMEMORY);
2901 }
2902
2903 ret = isc_socket_recv2(sock, region, minimum, task, dev, 0);
2904 UNLOCK(&sock->lock);
2905 return (ret);
2906 }
2907
2908 isc_result_t
isc__socket_recv2(isc_socket_t * sock,isc_region_t * region,unsigned int minimum,isc_task_t * task,isc_socketevent_t * event,unsigned int flags)2909 isc__socket_recv2(isc_socket_t *sock, isc_region_t *region,
2910 unsigned int minimum, isc_task_t *task,
2911 isc_socketevent_t *event, unsigned int flags)
2912 {
2913 isc_result_t ret;
2914
2915 REQUIRE(VALID_SOCKET(sock));
2916 LOCK(&sock->lock);
2917 CONSISTENT(sock);
2918
2919 event->result = ISC_R_UNEXPECTED;
2920 event->ev_sender = sock;
2921 /*
2922 * make sure that the socket's not closed
2923 */
2924 if (sock->fd == INVALID_SOCKET) {
2925 UNLOCK(&sock->lock);
2926 return (ISC_R_CONNREFUSED);
2927 }
2928
2929 ISC_LIST_INIT(event->bufferlist);
2930 event->region = *region;
2931 event->n = 0;
2932 event->offset = 0;
2933 event->attributes = 0;
2934
2935 /*
2936 * UDP sockets are always partial read.
2937 */
2938 if (sock->type == isc_sockettype_udp)
2939 event->minimum = 1;
2940 else {
2941 if (minimum == 0)
2942 event->minimum = region->length;
2943 else
2944 event->minimum = minimum;
2945 }
2946
2947 ret = socket_recv(sock, event, task, flags);
2948 UNLOCK(&sock->lock);
2949 return (ret);
2950 }
2951
2952 /*
2953 * Caller must have the socket locked.
2954 */
2955 static isc_result_t
socket_send(isc_socket_t * sock,isc_socketevent_t * dev,isc_task_t * task,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo,unsigned int flags)2956 socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2957 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
2958 unsigned int flags)
2959 {
2960 int io_state;
2961 int send_errno = 0;
2962 int cc = 0;
2963 isc_task_t *ntask = NULL;
2964 isc_result_t result = ISC_R_SUCCESS;
2965
2966 dev->ev_sender = task;
2967
2968 set_dev_address(address, sock, dev);
2969 if (pktinfo != NULL) {
2970 socket_log(__LINE__, sock, NULL, TRACE, isc_msgcat, ISC_MSGSET_SOCKET,
2971 ISC_MSG_PKTINFOPROVIDED,
2972 "pktinfo structure provided, ifindex %u (set to 0)",
2973 pktinfo->ipi6_ifindex);
2974
2975 dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO;
2976 dev->pktinfo = *pktinfo;
2977 /*
2978 * Set the pktinfo index to 0 here, to let the kernel decide
2979 * what interface it should send on.
2980 */
2981 dev->pktinfo.ipi6_ifindex = 0;
2982 }
2983
2984 io_state = startio_send(sock, dev, &cc, &send_errno);
2985 switch (io_state) {
2986 case DOIO_PENDING: /* I/O started. Nothing more to do */
2987 case DOIO_SOFT:
2988 /*
2989 * We couldn't send all or part of the request right now, so
2990 * queue it unless ISC_SOCKFLAG_NORETRY is set.
2991 */
2992 if ((flags & ISC_SOCKFLAG_NORETRY) == 0) {
2993 isc_task_attach(task, &ntask);
2994 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
2995
2996 /*
2997 * Enqueue the request.
2998 */
2999 INSIST(!ISC_LINK_LINKED(dev, ev_link));
3000 ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link);
3001
3002 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
3003 "socket_send: event %p -> task %p",
3004 dev, ntask);
3005
3006 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
3007 result = ISC_R_INPROGRESS;
3008 break;
3009 }
3010
3011 case DOIO_SUCCESS:
3012 break;
3013 }
3014
3015 return (result);
3016 }
3017
3018 isc_result_t
isc__socket_send(isc_socket_t * sock,isc_region_t * region,isc_task_t * task,isc_taskaction_t action,const void * arg)3019 isc__socket_send(isc_socket_t *sock, isc_region_t *region,
3020 isc_task_t *task, isc_taskaction_t action, const void *arg)
3021 {
3022 /*
3023 * REQUIRE() checking is performed in isc_socket_sendto().
3024 */
3025 return (isc_socket_sendto(sock, region, task, action, arg, NULL,
3026 NULL));
3027 }
3028
3029 isc_result_t
isc__socket_sendto(isc_socket_t * sock,isc_region_t * region,isc_task_t * task,isc_taskaction_t action,const void * arg,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo)3030 isc__socket_sendto(isc_socket_t *sock, isc_region_t *region,
3031 isc_task_t *task, isc_taskaction_t action, const void *arg,
3032 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
3033 {
3034 isc_socketevent_t *dev;
3035 isc_socketmgr_t *manager;
3036 isc_result_t ret;
3037
3038 REQUIRE(VALID_SOCKET(sock));
3039 REQUIRE(sock->type != isc_sockettype_fdwatch);
3040
3041 LOCK(&sock->lock);
3042 CONSISTENT(sock);
3043
3044 /*
3045 * make sure that the socket's not closed
3046 */
3047 if (sock->fd == INVALID_SOCKET) {
3048 UNLOCK(&sock->lock);
3049 return (ISC_R_CONNREFUSED);
3050 }
3051 REQUIRE(region != NULL);
3052 REQUIRE(task != NULL);
3053 REQUIRE(action != NULL);
3054
3055 manager = sock->manager;
3056 REQUIRE(VALID_MANAGER(manager));
3057
3058 INSIST(sock->bound);
3059
3060 dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg);
3061 if (dev == NULL) {
3062 UNLOCK(&sock->lock);
3063 return (ISC_R_NOMEMORY);
3064 }
3065 dev->region = *region;
3066
3067 ret = socket_send(sock, dev, task, address, pktinfo, 0);
3068 UNLOCK(&sock->lock);
3069 return (ret);
3070 }
3071
3072 isc_result_t
isc__socket_sendv(isc_socket_t * sock,isc_bufferlist_t * buflist,isc_task_t * task,isc_taskaction_t action,const void * arg)3073 isc__socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist,
3074 isc_task_t *task, isc_taskaction_t action, const void *arg)
3075 {
3076 return (isc_socket_sendtov(sock, buflist, task, action, arg, NULL,
3077 NULL));
3078 }
3079
3080 isc_result_t
isc__socket_sendtov(isc_socket_t * sock,isc_bufferlist_t * buflist,isc_task_t * task,isc_taskaction_t action,const void * arg,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo)3081 isc__socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist,
3082 isc_task_t *task, isc_taskaction_t action, const void *arg,
3083 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
3084 {
3085 isc_socketevent_t *dev;
3086 isc_socketmgr_t *manager;
3087 unsigned int iocount;
3088 isc_buffer_t *buffer;
3089 isc_result_t ret;
3090
3091 REQUIRE(VALID_SOCKET(sock));
3092
3093 LOCK(&sock->lock);
3094 CONSISTENT(sock);
3095
3096 /*
3097 * make sure that the socket's not closed
3098 */
3099 if (sock->fd == INVALID_SOCKET) {
3100 UNLOCK(&sock->lock);
3101 return (ISC_R_CONNREFUSED);
3102 }
3103 REQUIRE(buflist != NULL);
3104 REQUIRE(!ISC_LIST_EMPTY(*buflist));
3105 REQUIRE(task != NULL);
3106 REQUIRE(action != NULL);
3107
3108 manager = sock->manager;
3109 REQUIRE(VALID_MANAGER(manager));
3110
3111 iocount = isc_bufferlist_usedcount(buflist);
3112 REQUIRE(iocount > 0);
3113
3114 dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg);
3115 if (dev == NULL) {
3116 UNLOCK(&sock->lock);
3117 return (ISC_R_NOMEMORY);
3118 }
3119
3120 /*
3121 * Move each buffer from the passed in list to our internal one.
3122 */
3123 buffer = ISC_LIST_HEAD(*buflist);
3124 while (buffer != NULL) {
3125 ISC_LIST_DEQUEUE(*buflist, buffer, link);
3126 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
3127 buffer = ISC_LIST_HEAD(*buflist);
3128 }
3129
3130 ret = socket_send(sock, dev, task, address, pktinfo, 0);
3131 UNLOCK(&sock->lock);
3132 return (ret);
3133 }
3134
3135 isc_result_t
isc__socket_sendto2(isc_socket_t * sock,isc_region_t * region,isc_task_t * task,isc_sockaddr_t * address,struct in6_pktinfo * pktinfo,isc_socketevent_t * event,unsigned int flags)3136 isc__socket_sendto2(isc_socket_t *sock, isc_region_t *region,
3137 isc_task_t *task,
3138 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
3139 isc_socketevent_t *event, unsigned int flags)
3140 {
3141 isc_result_t ret;
3142
3143 REQUIRE(VALID_SOCKET(sock));
3144 LOCK(&sock->lock);
3145 CONSISTENT(sock);
3146
3147 REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE|ISC_SOCKFLAG_NORETRY)) == 0);
3148 if ((flags & ISC_SOCKFLAG_NORETRY) != 0)
3149 REQUIRE(sock->type == isc_sockettype_udp);
3150 event->ev_sender = sock;
3151 event->result = ISC_R_UNEXPECTED;
3152 /*
3153 * make sure that the socket's not closed
3154 */
3155 if (sock->fd == INVALID_SOCKET) {
3156 UNLOCK(&sock->lock);
3157 return (ISC_R_CONNREFUSED);
3158 }
3159 ISC_LIST_INIT(event->bufferlist);
3160 event->region = *region;
3161 event->n = 0;
3162 event->offset = 0;
3163 event->attributes = 0;
3164
3165 ret = socket_send(sock, event, task, address, pktinfo, flags);
3166 UNLOCK(&sock->lock);
3167 return (ret);
3168 }
3169
3170 isc_result_t
isc__socket_bind(isc_socket_t * sock,isc_sockaddr_t * sockaddr,unsigned int options)3171 isc__socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr,
3172 unsigned int options) {
3173 int bind_errno;
3174 char strbuf[ISC_STRERRORSIZE];
3175 int on = 1;
3176
3177 REQUIRE(VALID_SOCKET(sock));
3178 LOCK(&sock->lock);
3179 CONSISTENT(sock);
3180
3181 /*
3182 * make sure that the socket's not closed
3183 */
3184 if (sock->fd == INVALID_SOCKET) {
3185 UNLOCK(&sock->lock);
3186 return (ISC_R_CONNREFUSED);
3187 }
3188
3189 INSIST(!sock->bound);
3190 INSIST(!sock->dupped);
3191
3192 if (sock->pf != sockaddr->type.sa.sa_family) {
3193 UNLOCK(&sock->lock);
3194 return (ISC_R_FAMILYMISMATCH);
3195 }
3196 /*
3197 * Only set SO_REUSEADDR when we want a specific port.
3198 */
3199 if ((options & ISC_SOCKET_REUSEADDRESS) != 0 &&
3200 isc_sockaddr_getport(sockaddr) != (in_port_t)0 &&
3201 setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
3202 sizeof(on)) < 0) {
3203 UNEXPECTED_ERROR(__FILE__, __LINE__,
3204 "setsockopt(%d) %s", sock->fd,
3205 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
3206 ISC_MSG_FAILED, "failed"));
3207 /* Press on... */
3208 }
3209 if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) {
3210 bind_errno = WSAGetLastError();
3211 UNLOCK(&sock->lock);
3212 switch (bind_errno) {
3213 case WSAEACCES:
3214 return (ISC_R_NOPERM);
3215 case WSAEADDRNOTAVAIL:
3216 return (ISC_R_ADDRNOTAVAIL);
3217 case WSAEADDRINUSE:
3218 return (ISC_R_ADDRINUSE);
3219 case WSAEINVAL:
3220 return (ISC_R_BOUND);
3221 default:
3222 isc__strerror(bind_errno, strbuf, sizeof(strbuf));
3223 UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s",
3224 strbuf);
3225 return (ISC_R_UNEXPECTED);
3226 }
3227 }
3228
3229 socket_log(__LINE__, sock, sockaddr, TRACE,
3230 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "bound");
3231 sock->bound = 1;
3232
3233 UNLOCK(&sock->lock);
3234 return (ISC_R_SUCCESS);
3235 }
3236
3237 isc_result_t
isc__socket_filter(isc_socket_t * sock,const char * filter)3238 isc__socket_filter(isc_socket_t *sock, const char *filter) {
3239 UNUSED(sock);
3240 UNUSED(filter);
3241
3242 REQUIRE(VALID_SOCKET(sock));
3243 return (ISC_R_NOTIMPLEMENTED);
3244 }
3245
3246 /*
3247 * Set up to listen on a given socket. We do this by creating an internal
3248 * event that will be dispatched when the socket has read activity. The
3249 * watcher will send the internal event to the task when there is a new
3250 * connection.
3251 *
3252 * Unlike in read, we don't preallocate a done event here. Every time there
3253 * is a new connection we'll have to allocate a new one anyway, so we might
3254 * as well keep things simple rather than having to track them.
3255 */
3256 isc_result_t
isc__socket_listen(isc_socket_t * sock,unsigned int backlog)3257 isc__socket_listen(isc_socket_t *sock, unsigned int backlog) {
3258 char strbuf[ISC_STRERRORSIZE];
3259
3260 REQUIRE(VALID_SOCKET(sock));
3261
3262 LOCK(&sock->lock);
3263 CONSISTENT(sock);
3264
3265 /*
3266 * make sure that the socket's not closed
3267 */
3268 if (sock->fd == INVALID_SOCKET) {
3269 UNLOCK(&sock->lock);
3270 return (ISC_R_CONNREFUSED);
3271 }
3272
3273 REQUIRE(!sock->listener);
3274 REQUIRE(sock->bound);
3275 REQUIRE(sock->type == isc_sockettype_tcp);
3276
3277 if (backlog == 0)
3278 backlog = SOMAXCONN;
3279
3280 if (listen(sock->fd, (int)backlog) < 0) {
3281 UNLOCK(&sock->lock);
3282 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
3283
3284 UNEXPECTED_ERROR(__FILE__, __LINE__, "listen: %s", strbuf);
3285
3286 return (ISC_R_UNEXPECTED);
3287 }
3288
3289 socket_log(__LINE__, sock, NULL, TRACE,
3290 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "listening");
3291 sock->listener = 1;
3292 _set_state(sock, SOCK_LISTEN);
3293
3294 UNLOCK(&sock->lock);
3295 return (ISC_R_SUCCESS);
3296 }
3297
3298 /*
3299 * This should try to do aggressive accept() XXXMLG
3300 */
3301 isc_result_t
isc__socket_accept(isc_socket_t * sock,isc_task_t * task,isc_taskaction_t action,const void * arg)3302 isc__socket_accept(isc_socket_t *sock,
3303 isc_task_t *task, isc_taskaction_t action, const void *arg)
3304 {
3305 isc_socket_newconnev_t *adev;
3306 isc_socketmgr_t *manager;
3307 isc_task_t *ntask = NULL;
3308 isc_socket_t *nsock;
3309 isc_result_t result;
3310 IoCompletionInfo *lpo;
3311
3312 REQUIRE(VALID_SOCKET(sock));
3313
3314 manager = sock->manager;
3315 REQUIRE(VALID_MANAGER(manager));
3316
3317 LOCK(&sock->lock);
3318 CONSISTENT(sock);
3319
3320 /*
3321 * make sure that the socket's not closed
3322 */
3323 if (sock->fd == INVALID_SOCKET) {
3324 UNLOCK(&sock->lock);
3325 return (ISC_R_CONNREFUSED);
3326 }
3327
3328 REQUIRE(sock->listener);
3329
3330 /*
3331 * Sender field is overloaded here with the task we will be sending
3332 * this event to. Just before the actual event is delivered the
3333 * actual ev_sender will be touched up to be the socket.
3334 */
3335 adev = (isc_socket_newconnev_t *)
3336 isc_event_allocate(manager->mctx, task, ISC_SOCKEVENT_NEWCONN,
3337 action, arg, sizeof(*adev));
3338 if (adev == NULL) {
3339 UNLOCK(&sock->lock);
3340 return (ISC_R_NOMEMORY);
3341 }
3342 ISC_LINK_INIT(adev, ev_link);
3343
3344 result = allocate_socket(manager, sock->type, &nsock);
3345 if (result != ISC_R_SUCCESS) {
3346 isc_event_free((isc_event_t **)&adev);
3347 UNLOCK(&sock->lock);
3348 return (result);
3349 }
3350
3351 /*
3352 * AcceptEx() requires we pass in a socket.
3353 */
3354 nsock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP);
3355 if (nsock->fd == INVALID_SOCKET) {
3356 free_socket(&nsock, __LINE__);
3357 isc_event_free((isc_event_t **)&adev);
3358 UNLOCK(&sock->lock);
3359 return (ISC_R_FAILURE); // XXXMLG need real error message
3360 }
3361
3362 /*
3363 * Attach to socket and to task.
3364 */
3365 isc_task_attach(task, &ntask);
3366 if (isc_task_exiting(ntask)) {
3367 free_socket(&nsock, __LINE__);
3368 isc_task_detach(&ntask);
3369 isc_event_free(ISC_EVENT_PTR(&adev));
3370 UNLOCK(&sock->lock);
3371 return (ISC_R_SHUTTINGDOWN);
3372 }
3373 nsock->references++;
3374
3375 adev->ev_sender = ntask;
3376 adev->newsocket = nsock;
3377 _set_state(nsock, SOCK_ACCEPT);
3378
3379 /*
3380 * Queue io completion for an accept().
3381 */
3382 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
3383 HEAP_ZERO_MEMORY,
3384 sizeof(IoCompletionInfo));
3385 RUNTIME_CHECK(lpo != NULL);
3386 lpo->acceptbuffer = (void *)HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY,
3387 (sizeof(SOCKADDR_STORAGE) + 16) * 2);
3388 RUNTIME_CHECK(lpo->acceptbuffer != NULL);
3389
3390 lpo->adev = adev;
3391 lpo->request_type = SOCKET_ACCEPT;
3392
3393 ISCAcceptEx(sock->fd,
3394 nsock->fd, /* Accepted Socket */
3395 lpo->acceptbuffer, /* Buffer for initial Recv */
3396 0, /* Length of Buffer */
3397 sizeof(SOCKADDR_STORAGE) + 16, /* Local address length + 16 */
3398 sizeof(SOCKADDR_STORAGE) + 16, /* Remote address lengh + 16 */
3399 (LPDWORD)&lpo->received_bytes, /* Bytes Recved */
3400 (LPOVERLAPPED)lpo /* Overlapped structure */
3401 );
3402 iocompletionport_update(nsock);
3403
3404 socket_log(__LINE__, sock, NULL, TRACE,
3405 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND,
3406 "accepting for nsock %p fd %d", nsock, nsock->fd);
3407
3408 /*
3409 * Enqueue the event
3410 */
3411 ISC_LIST_ENQUEUE(sock->accept_list, adev, ev_link);
3412 sock->pending_accept++;
3413 sock->pending_iocp++;
3414
3415 UNLOCK(&sock->lock);
3416 return (ISC_R_SUCCESS);
3417 }
3418
3419 isc_result_t
isc__socket_connect(isc_socket_t * sock,isc_sockaddr_t * addr,isc_task_t * task,isc_taskaction_t action,const void * arg)3420 isc__socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr,
3421 isc_task_t *task, isc_taskaction_t action, const void *arg)
3422 {
3423 char strbuf[ISC_STRERRORSIZE];
3424 isc_socket_connev_t *cdev;
3425 isc_task_t *ntask = NULL;
3426 isc_socketmgr_t *manager;
3427 IoCompletionInfo *lpo;
3428 int bind_errno;
3429
3430 REQUIRE(VALID_SOCKET(sock));
3431 REQUIRE(addr != NULL);
3432 REQUIRE(task != NULL);
3433 REQUIRE(action != NULL);
3434
3435 manager = sock->manager;
3436 REQUIRE(VALID_MANAGER(manager));
3437 REQUIRE(addr != NULL);
3438
3439 if (isc_sockaddr_ismulticast(addr))
3440 return (ISC_R_MULTICAST);
3441
3442 LOCK(&sock->lock);
3443 CONSISTENT(sock);
3444
3445 /*
3446 * make sure that the socket's not closed
3447 */
3448 if (sock->fd == INVALID_SOCKET) {
3449 UNLOCK(&sock->lock);
3450 return (ISC_R_CONNREFUSED);
3451 }
3452
3453 /*
3454 * Windows sockets won't connect unless the socket is bound.
3455 */
3456 if (!sock->bound) {
3457 isc_sockaddr_t any;
3458
3459 isc_sockaddr_anyofpf(&any, isc_sockaddr_pf(addr));
3460 if (bind(sock->fd, &any.type.sa, any.length) < 0) {
3461 bind_errno = WSAGetLastError();
3462 UNLOCK(&sock->lock);
3463 switch (bind_errno) {
3464 case WSAEACCES:
3465 return (ISC_R_NOPERM);
3466 case WSAEADDRNOTAVAIL:
3467 return (ISC_R_ADDRNOTAVAIL);
3468 case WSAEADDRINUSE:
3469 return (ISC_R_ADDRINUSE);
3470 case WSAEINVAL:
3471 return (ISC_R_BOUND);
3472 default:
3473 isc__strerror(bind_errno, strbuf,
3474 sizeof(strbuf));
3475 UNEXPECTED_ERROR(__FILE__, __LINE__,
3476 "bind: %s", strbuf);
3477 return (ISC_R_UNEXPECTED);
3478 }
3479 }
3480 sock->bound = 1;
3481 }
3482
3483 REQUIRE(!sock->pending_connect);
3484
3485 cdev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock,
3486 ISC_SOCKEVENT_CONNECT,
3487 action, arg,
3488 sizeof(*cdev));
3489 if (cdev == NULL) {
3490 UNLOCK(&sock->lock);
3491 return (ISC_R_NOMEMORY);
3492 }
3493 ISC_LINK_INIT(cdev, ev_link);
3494
3495 if (sock->type == isc_sockettype_tcp) {
3496 /*
3497 * Queue io completion for an accept().
3498 */
3499 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
3500 HEAP_ZERO_MEMORY,
3501 sizeof(IoCompletionInfo));
3502 lpo->cdev = cdev;
3503 lpo->request_type = SOCKET_CONNECT;
3504
3505 sock->address = *addr;
3506 ISCConnectEx(sock->fd, &addr->type.sa, addr->length,
3507 NULL, 0, NULL, (LPOVERLAPPED)lpo);
3508
3509 /*
3510 * Attach to task.
3511 */
3512 isc_task_attach(task, &ntask);
3513 cdev->ev_sender = ntask;
3514
3515 sock->pending_connect = 1;
3516 _set_state(sock, SOCK_CONNECT);
3517
3518 /*
3519 * Enqueue the request.
3520 */
3521 sock->connect_ev = cdev;
3522 sock->pending_iocp++;
3523 } else {
3524 WSAConnect(sock->fd, &addr->type.sa, addr->length, NULL, NULL, NULL, NULL);
3525 cdev->result = ISC_R_SUCCESS;
3526 isc_task_send(task, (isc_event_t **)&cdev);
3527 }
3528 CONSISTENT(sock);
3529 UNLOCK(&sock->lock);
3530
3531 return (ISC_R_SUCCESS);
3532 }
3533
3534 isc_result_t
isc__socket_getpeername(isc_socket_t * sock,isc_sockaddr_t * addressp)3535 isc__socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3536 isc_result_t result;
3537
3538 REQUIRE(VALID_SOCKET(sock));
3539 REQUIRE(addressp != NULL);
3540
3541 LOCK(&sock->lock);
3542 CONSISTENT(sock);
3543
3544 /*
3545 * make sure that the socket's not closed
3546 */
3547 if (sock->fd == INVALID_SOCKET) {
3548 UNLOCK(&sock->lock);
3549 return (ISC_R_CONNREFUSED);
3550 }
3551
3552 if (sock->connected) {
3553 *addressp = sock->address;
3554 result = ISC_R_SUCCESS;
3555 } else {
3556 result = ISC_R_NOTCONNECTED;
3557 }
3558
3559 UNLOCK(&sock->lock);
3560
3561 return (result);
3562 }
3563
3564 isc_result_t
isc__socket_getsockname(isc_socket_t * sock,isc_sockaddr_t * addressp)3565 isc__socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3566 ISC_SOCKADDR_LEN_T len;
3567 isc_result_t result;
3568 char strbuf[ISC_STRERRORSIZE];
3569
3570 REQUIRE(VALID_SOCKET(sock));
3571 REQUIRE(addressp != NULL);
3572
3573 LOCK(&sock->lock);
3574 CONSISTENT(sock);
3575
3576 /*
3577 * make sure that the socket's not closed
3578 */
3579 if (sock->fd == INVALID_SOCKET) {
3580 UNLOCK(&sock->lock);
3581 return (ISC_R_CONNREFUSED);
3582 }
3583
3584 if (!sock->bound) {
3585 result = ISC_R_NOTBOUND;
3586 goto out;
3587 }
3588
3589 result = ISC_R_SUCCESS;
3590
3591 len = sizeof(addressp->type);
3592 if (getsockname(sock->fd, &addressp->type.sa, (void *)&len) < 0) {
3593 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
3594 UNEXPECTED_ERROR(__FILE__, __LINE__, "getsockname: %s",
3595 strbuf);
3596 result = ISC_R_UNEXPECTED;
3597 goto out;
3598 }
3599 addressp->length = (unsigned int)len;
3600
3601 out:
3602 UNLOCK(&sock->lock);
3603
3604 return (result);
3605 }
3606
3607 /*
3608 * Run through the list of events on this socket, and cancel the ones
3609 * queued for task "task" of type "how". "how" is a bitmask.
3610 */
3611 void
isc__socket_cancel(isc_socket_t * sock,isc_task_t * task,unsigned int how)3612 isc__socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) {
3613
3614 REQUIRE(VALID_SOCKET(sock));
3615
3616 /*
3617 * Quick exit if there is nothing to do. Don't even bother locking
3618 * in this case.
3619 */
3620 if (how == 0)
3621 return;
3622
3623 LOCK(&sock->lock);
3624 CONSISTENT(sock);
3625
3626 /*
3627 * make sure that the socket's not closed
3628 */
3629 if (sock->fd == INVALID_SOCKET) {
3630 UNLOCK(&sock->lock);
3631 return;
3632 }
3633
3634 /*
3635 * All of these do the same thing, more or less.
3636 * Each will:
3637 * o If the internal event is marked as "posted" try to
3638 * remove it from the task's queue. If this fails, mark it
3639 * as canceled instead, and let the task clean it up later.
3640 * o For each I/O request for that task of that type, post
3641 * its done event with status of "ISC_R_CANCELED".
3642 * o Reset any state needed.
3643 */
3644
3645 if ((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV) {
3646 isc_socketevent_t *dev;
3647 isc_socketevent_t *next;
3648 isc_task_t *current_task;
3649
3650 dev = ISC_LIST_HEAD(sock->recv_list);
3651 while (dev != NULL) {
3652 current_task = dev->ev_sender;
3653 next = ISC_LIST_NEXT(dev, ev_link);
3654 if ((task == NULL) || (task == current_task)) {
3655 dev->result = ISC_R_CANCELED;
3656 send_recvdone_event(sock, &dev);
3657 }
3658 dev = next;
3659 }
3660 }
3661 how &= ~ISC_SOCKCANCEL_RECV;
3662
3663 if ((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND) {
3664 isc_socketevent_t *dev;
3665 isc_socketevent_t *next;
3666 isc_task_t *current_task;
3667
3668 dev = ISC_LIST_HEAD(sock->send_list);
3669
3670 while (dev != NULL) {
3671 current_task = dev->ev_sender;
3672 next = ISC_LIST_NEXT(dev, ev_link);
3673 if ((task == NULL) || (task == current_task)) {
3674 dev->result = ISC_R_CANCELED;
3675 send_senddone_event(sock, &dev);
3676 }
3677 dev = next;
3678 }
3679 }
3680 how &= ~ISC_SOCKCANCEL_SEND;
3681
3682 if (((how & ISC_SOCKCANCEL_ACCEPT) == ISC_SOCKCANCEL_ACCEPT)
3683 && !ISC_LIST_EMPTY(sock->accept_list)) {
3684 isc_socket_newconnev_t *dev;
3685 isc_socket_newconnev_t *next;
3686 isc_task_t *current_task;
3687
3688 dev = ISC_LIST_HEAD(sock->accept_list);
3689 while (dev != NULL) {
3690 current_task = dev->ev_sender;
3691 next = ISC_LIST_NEXT(dev, ev_link);
3692
3693 if ((task == NULL) || (task == current_task)) {
3694
3695 dev->newsocket->references--;
3696 closesocket(dev->newsocket->fd);
3697 dev->newsocket->fd = INVALID_SOCKET;
3698 free_socket(&dev->newsocket, __LINE__);
3699
3700 dev->result = ISC_R_CANCELED;
3701 send_acceptdone_event(sock, &dev);
3702 }
3703
3704 dev = next;
3705 }
3706 }
3707 how &= ~ISC_SOCKCANCEL_ACCEPT;
3708
3709 /*
3710 * Connecting is not a list.
3711 */
3712 if (((how & ISC_SOCKCANCEL_CONNECT) == ISC_SOCKCANCEL_CONNECT)
3713 && sock->connect_ev != NULL) {
3714 isc_socket_connev_t *dev;
3715 isc_task_t *current_task;
3716
3717 INSIST(sock->pending_connect);
3718
3719 dev = sock->connect_ev;
3720 current_task = dev->ev_sender;
3721
3722 if ((task == NULL) || (task == current_task)) {
3723 closesocket(sock->fd);
3724 sock->fd = INVALID_SOCKET;
3725 _set_state(sock, SOCK_CLOSED);
3726
3727 sock->connect_ev = NULL;
3728 dev->result = ISC_R_CANCELED;
3729 send_connectdone_event(sock, &dev);
3730 }
3731 }
3732 how &= ~ISC_SOCKCANCEL_CONNECT;
3733
3734 maybe_free_socket(&sock, __LINE__);
3735 }
3736
3737 isc_sockettype_t
isc__socket_gettype(isc_socket_t * sock)3738 isc__socket_gettype(isc_socket_t *sock) {
3739 isc_sockettype_t type;
3740
3741 REQUIRE(VALID_SOCKET(sock));
3742
3743 LOCK(&sock->lock);
3744
3745 /*
3746 * make sure that the socket's not closed
3747 */
3748 if (sock->fd == INVALID_SOCKET) {
3749 UNLOCK(&sock->lock);
3750 return (ISC_R_CONNREFUSED);
3751 }
3752
3753 type = sock->type;
3754 UNLOCK(&sock->lock);
3755 return (type);
3756 }
3757
3758 isc_boolean_t
isc__socket_isbound(isc_socket_t * sock)3759 isc__socket_isbound(isc_socket_t *sock) {
3760 isc_boolean_t val;
3761
3762 REQUIRE(VALID_SOCKET(sock));
3763
3764 LOCK(&sock->lock);
3765 CONSISTENT(sock);
3766
3767 /*
3768 * make sure that the socket's not closed
3769 */
3770 if (sock->fd == INVALID_SOCKET) {
3771 UNLOCK(&sock->lock);
3772 return (ISC_FALSE);
3773 }
3774
3775 val = ((sock->bound) ? ISC_TRUE : ISC_FALSE);
3776 UNLOCK(&sock->lock);
3777
3778 return (val);
3779 }
3780
3781 void
isc__socket_ipv6only(isc_socket_t * sock,isc_boolean_t yes)3782 isc__socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes) {
3783 #if defined(IPV6_V6ONLY)
3784 int onoff = yes ? 1 : 0;
3785 #else
3786 UNUSED(yes);
3787 #endif
3788
3789 REQUIRE(VALID_SOCKET(sock));
3790
3791 #ifdef IPV6_V6ONLY
3792 if (sock->pf == AF_INET6) {
3793 (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY,
3794 (char *)&onoff, sizeof(onoff));
3795 }
3796 #endif
3797 }
3798
3799 void
isc__socket_cleanunix(isc_sockaddr_t * addr,isc_boolean_t active)3800 isc__socket_cleanunix(isc_sockaddr_t *addr, isc_boolean_t active) {
3801 UNUSED(addr);
3802 UNUSED(active);
3803 }
3804
3805 isc_result_t
isc__socket_permunix(isc_sockaddr_t * addr,isc_uint32_t perm,isc_uint32_t owner,isc_uint32_t group)3806 isc__socket_permunix(isc_sockaddr_t *addr, isc_uint32_t perm,
3807 isc_uint32_t owner, isc_uint32_t group)
3808 {
3809 UNUSED(addr);
3810 UNUSED(perm);
3811 UNUSED(owner);
3812 UNUSED(group);
3813 return (ISC_R_NOTIMPLEMENTED);
3814 }
3815
3816 void
isc__socket_setname(isc_socket_t * socket,const char * name,void * tag)3817 isc__socket_setname(isc_socket_t *socket, const char *name, void *tag) {
3818
3819 /*
3820 * Name 'socket'.
3821 */
3822
3823 REQUIRE(VALID_SOCKET(socket));
3824
3825 LOCK(&socket->lock);
3826 memset(socket->name, 0, sizeof(socket->name));
3827 strncpy(socket->name, name, sizeof(socket->name) - 1);
3828 socket->tag = tag;
3829 UNLOCK(&socket->lock);
3830 }
3831
3832 const char *
isc__socket_getname(isc_socket_t * socket)3833 isc__socket_getname(isc_socket_t *socket) {
3834 return (socket->name);
3835 }
3836
3837 void *
isc__socket_gettag(isc_socket_t * socket)3838 isc__socket_gettag(isc_socket_t *socket) {
3839 return (socket->tag);
3840 }
3841
3842 int
isc__socket_getfd(isc_socket_t * socket)3843 isc__socket_getfd(isc_socket_t *socket) {
3844 return ((short) socket->fd);
3845 }
3846
3847 void
isc__socketmgr_setreserved(isc_socketmgr_t * manager,isc_uint32_t reserved)3848 isc__socketmgr_setreserved(isc_socketmgr_t *manager, isc_uint32_t reserved) {
3849 UNUSED(manager);
3850 UNUSED(reserved);
3851 }
3852
3853 void
isc___socketmgr_maxudp(isc_socketmgr_t * manager,int maxudp)3854 isc___socketmgr_maxudp(isc_socketmgr_t *manager, int maxudp) {
3855
3856 UNUSED(manager);
3857 UNUSED(maxudp);
3858 }
3859
3860 #ifdef HAVE_LIBXML2
3861
3862 static const char *
_socktype(isc_sockettype_t type)3863 _socktype(isc_sockettype_t type)
3864 {
3865 if (type == isc_sockettype_udp)
3866 return ("udp");
3867 else if (type == isc_sockettype_tcp)
3868 return ("tcp");
3869 else if (type == isc_sockettype_unix)
3870 return ("unix");
3871 else if (type == isc_sockettype_fdwatch)
3872 return ("fdwatch");
3873 else
3874 return ("not-initialized");
3875 }
3876
3877 void
isc_socketmgr_renderxml(isc_socketmgr_t * mgr,xmlTextWriterPtr writer)3878 isc_socketmgr_renderxml(isc_socketmgr_t *mgr, xmlTextWriterPtr writer)
3879 {
3880 isc_socket_t *sock;
3881 char peerbuf[ISC_SOCKADDR_FORMATSIZE];
3882 isc_sockaddr_t addr;
3883 ISC_SOCKADDR_LEN_T len;
3884
3885 LOCK(&mgr->lock);
3886
3887 #ifndef ISC_PLATFORM_USETHREADS
3888 xmlTextWriterStartElement(writer, ISC_XMLCHAR "references");
3889 xmlTextWriterWriteFormatString(writer, "%d", mgr->refs);
3890 xmlTextWriterEndElement(writer);
3891 #endif
3892
3893 xmlTextWriterStartElement(writer, ISC_XMLCHAR "sockets");
3894 sock = ISC_LIST_HEAD(mgr->socklist);
3895 while (sock != NULL) {
3896 LOCK(&sock->lock);
3897 xmlTextWriterStartElement(writer, ISC_XMLCHAR "socket");
3898
3899 xmlTextWriterStartElement(writer, ISC_XMLCHAR "id");
3900 xmlTextWriterWriteFormatString(writer, "%p", sock);
3901 xmlTextWriterEndElement(writer);
3902
3903 if (sock->name[0] != 0) {
3904 xmlTextWriterStartElement(writer, ISC_XMLCHAR "name");
3905 xmlTextWriterWriteFormatString(writer, "%s",
3906 sock->name);
3907 xmlTextWriterEndElement(writer); /* name */
3908 }
3909
3910 xmlTextWriterStartElement(writer, ISC_XMLCHAR "references");
3911 xmlTextWriterWriteFormatString(writer, "%d", sock->references);
3912 xmlTextWriterEndElement(writer);
3913
3914 xmlTextWriterWriteElement(writer, ISC_XMLCHAR "type",
3915 ISC_XMLCHAR _socktype(sock->type));
3916
3917 if (sock->connected) {
3918 isc_sockaddr_format(&sock->address, peerbuf,
3919 sizeof(peerbuf));
3920 xmlTextWriterWriteElement(writer,
3921 ISC_XMLCHAR "peer-address",
3922 ISC_XMLCHAR peerbuf);
3923 }
3924
3925 len = sizeof(addr);
3926 if (getsockname(sock->fd, &addr.type.sa, (void *)&len) == 0) {
3927 isc_sockaddr_format(&addr, peerbuf, sizeof(peerbuf));
3928 xmlTextWriterWriteElement(writer,
3929 ISC_XMLCHAR "local-address",
3930 ISC_XMLCHAR peerbuf);
3931 }
3932
3933 xmlTextWriterStartElement(writer, ISC_XMLCHAR "states");
3934 if (sock->pending_recv)
3935 xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3936 ISC_XMLCHAR "pending-receive");
3937 if (sock->pending_send)
3938 xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3939 ISC_XMLCHAR "pending-send");
3940 if (sock->pending_accept)
3941 xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3942 ISC_XMLCHAR "pending_accept");
3943 if (sock->listener)
3944 xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3945 ISC_XMLCHAR "listener");
3946 if (sock->connected)
3947 xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3948 ISC_XMLCHAR "connected");
3949 if (sock->pending_connect)
3950 xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3951 ISC_XMLCHAR "connecting");
3952 if (sock->bound)
3953 xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3954 ISC_XMLCHAR "bound");
3955
3956 xmlTextWriterEndElement(writer); /* states */
3957
3958 xmlTextWriterEndElement(writer); /* socket */
3959
3960 UNLOCK(&sock->lock);
3961 sock = ISC_LIST_NEXT(sock, link);
3962 }
3963 xmlTextWriterEndElement(writer); /* sockets */
3964
3965 UNLOCK(&mgr->lock);
3966 }
3967 #endif /* HAVE_LIBXML2 */
3968