1 /*-------------------------------------------------------------------------
2 *
3 * socket.c
4 * Microsoft Windows Win32 Socket Functions
5 *
6 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/port/win32/socket.c
10 *
11 *-------------------------------------------------------------------------
12 */
13
14 #include "postgres.h"
15
16 /*
17 * Indicate if pgwin32_recv() and pgwin32_send() should operate
18 * in non-blocking mode.
19 *
20 * Since the socket emulation layer always sets the actual socket to
21 * non-blocking mode in order to be able to deliver signals, we must
22 * specify this in a separate flag if we actually need non-blocking
23 * operation.
24 *
25 * This flag changes the behaviour *globally* for all socket operations,
26 * so it should only be set for very short periods of time.
27 */
28 int pgwin32_noblock = 0;
29
30 /* Undef the macros defined in win32.h, so we can access system functions */
31 #undef socket
32 #undef bind
33 #undef listen
34 #undef accept
35 #undef connect
36 #undef select
37 #undef recv
38 #undef send
39
40 /*
41 * Blocking socket functions implemented so they listen on both
42 * the socket and the signal event, required for signal handling.
43 */
44
45 /*
46 * Convert the last socket error code into errno
47 *
48 * Note: where there is a direct correspondence between a WSAxxx error code
49 * and a Berkeley error symbol, this mapping is actually a no-op, because
50 * in win32.h we redefine the network-related Berkeley error symbols to have
51 * the values of their WSAxxx counterparts. The point of the switch is
52 * mostly to translate near-miss error codes into something that's sensible
53 * in the Berkeley universe.
54 */
55 static void
TranslateSocketError(void)56 TranslateSocketError(void)
57 {
58 switch (WSAGetLastError())
59 {
60 case WSAEINVAL:
61 case WSANOTINITIALISED:
62 case WSAEINVALIDPROVIDER:
63 case WSAEINVALIDPROCTABLE:
64 case WSAEDESTADDRREQ:
65 errno = EINVAL;
66 break;
67 case WSAEINPROGRESS:
68 errno = EINPROGRESS;
69 break;
70 case WSAEFAULT:
71 errno = EFAULT;
72 break;
73 case WSAEISCONN:
74 errno = EISCONN;
75 break;
76 case WSAEMSGSIZE:
77 errno = EMSGSIZE;
78 break;
79 case WSAEAFNOSUPPORT:
80 errno = EAFNOSUPPORT;
81 break;
82 case WSAEMFILE:
83 errno = EMFILE;
84 break;
85 case WSAENOBUFS:
86 errno = ENOBUFS;
87 break;
88 case WSAEPROTONOSUPPORT:
89 case WSAEPROTOTYPE:
90 case WSAESOCKTNOSUPPORT:
91 errno = EPROTONOSUPPORT;
92 break;
93 case WSAECONNABORTED:
94 errno = ECONNABORTED;
95 break;
96 case WSAECONNREFUSED:
97 errno = ECONNREFUSED;
98 break;
99 case WSAECONNRESET:
100 errno = ECONNRESET;
101 break;
102 case WSAEINTR:
103 errno = EINTR;
104 break;
105 case WSAENOTSOCK:
106 errno = ENOTSOCK;
107 break;
108 case WSAEOPNOTSUPP:
109 errno = EOPNOTSUPP;
110 break;
111 case WSAEWOULDBLOCK:
112 errno = EWOULDBLOCK;
113 break;
114 case WSAEACCES:
115 errno = EACCES;
116 break;
117 case WSAEADDRINUSE:
118 errno = EADDRINUSE;
119 break;
120 case WSAEADDRNOTAVAIL:
121 errno = EADDRNOTAVAIL;
122 break;
123 case WSAEHOSTUNREACH:
124 case WSAEHOSTDOWN:
125 case WSAHOST_NOT_FOUND:
126 case WSAENETDOWN:
127 case WSAENETUNREACH:
128 case WSAENETRESET:
129 errno = EHOSTUNREACH;
130 break;
131 case WSAENOTCONN:
132 case WSAESHUTDOWN:
133 case WSAEDISCON:
134 errno = ENOTCONN;
135 break;
136 default:
137 ereport(NOTICE,
138 (errmsg_internal("unrecognized win32 socket error code: %d", WSAGetLastError())));
139 errno = EINVAL;
140 }
141 }
142
143 static int
pgwin32_poll_signals(void)144 pgwin32_poll_signals(void)
145 {
146 if (UNBLOCKED_SIGNAL_QUEUE())
147 {
148 pgwin32_dispatch_queued_signals();
149 errno = EINTR;
150 return 1;
151 }
152 return 0;
153 }
154
155 static int
isDataGram(SOCKET s)156 isDataGram(SOCKET s)
157 {
158 int type;
159 int typelen = sizeof(type);
160
161 if (getsockopt(s, SOL_SOCKET, SO_TYPE, (char *) &type, &typelen))
162 return 1;
163
164 return (type == SOCK_DGRAM) ? 1 : 0;
165 }
166
167 int
pgwin32_waitforsinglesocket(SOCKET s,int what,int timeout)168 pgwin32_waitforsinglesocket(SOCKET s, int what, int timeout)
169 {
170 static HANDLE waitevent = INVALID_HANDLE_VALUE;
171 static SOCKET current_socket = INVALID_SOCKET;
172 static int isUDP = 0;
173 HANDLE events[2];
174 int r;
175
176 /* Create an event object just once and use it on all future calls */
177 if (waitevent == INVALID_HANDLE_VALUE)
178 {
179 waitevent = CreateEvent(NULL, TRUE, FALSE, NULL);
180
181 if (waitevent == INVALID_HANDLE_VALUE)
182 ereport(ERROR,
183 (errmsg_internal("could not create socket waiting event: error code %lu", GetLastError())));
184 }
185 else if (!ResetEvent(waitevent))
186 ereport(ERROR,
187 (errmsg_internal("could not reset socket waiting event: error code %lu", GetLastError())));
188
189 /*
190 * Track whether socket is UDP or not. (NB: most likely, this is both
191 * useless and wrong; there is no reason to think that the behavior of
192 * WSAEventSelect is different for TCP and UDP.)
193 */
194 if (current_socket != s)
195 isUDP = isDataGram(s);
196 current_socket = s;
197
198 /*
199 * Attach event to socket. NOTE: we must detach it again before
200 * returning, since other bits of code may try to attach other events to
201 * the socket.
202 */
203 if (WSAEventSelect(s, waitevent, what) != 0)
204 {
205 TranslateSocketError();
206 return 0;
207 }
208
209 events[0] = pgwin32_signal_event;
210 events[1] = waitevent;
211
212 /*
213 * Just a workaround of unknown locking problem with writing in UDP socket
214 * under high load: Client's pgsql backend sleeps infinitely in
215 * WaitForMultipleObjectsEx, pgstat process sleeps in pgwin32_select().
216 * So, we will wait with small timeout(0.1 sec) and if socket is still
217 * blocked, try WSASend (see comments in pgwin32_select) and wait again.
218 */
219 if ((what & FD_WRITE) && isUDP)
220 {
221 for (;;)
222 {
223 r = WaitForMultipleObjectsEx(2, events, FALSE, 100, TRUE);
224
225 if (r == WAIT_TIMEOUT)
226 {
227 char c;
228 WSABUF buf;
229 DWORD sent;
230
231 buf.buf = &c;
232 buf.len = 0;
233
234 r = WSASend(s, &buf, 1, &sent, 0, NULL, NULL);
235 if (r == 0) /* Completed - means things are fine! */
236 {
237 WSAEventSelect(s, NULL, 0);
238 return 1;
239 }
240 else if (WSAGetLastError() != WSAEWOULDBLOCK)
241 {
242 TranslateSocketError();
243 WSAEventSelect(s, NULL, 0);
244 return 0;
245 }
246 }
247 else
248 break;
249 }
250 }
251 else
252 r = WaitForMultipleObjectsEx(2, events, FALSE, timeout, TRUE);
253
254 WSAEventSelect(s, NULL, 0);
255
256 if (r == WAIT_OBJECT_0 || r == WAIT_IO_COMPLETION)
257 {
258 pgwin32_dispatch_queued_signals();
259 errno = EINTR;
260 return 0;
261 }
262 if (r == WAIT_OBJECT_0 + 1)
263 return 1;
264 if (r == WAIT_TIMEOUT)
265 {
266 errno = EWOULDBLOCK;
267 return 0;
268 }
269 ereport(ERROR,
270 (errmsg_internal("unrecognized return value from WaitForMultipleObjects: %d (error code %lu)", r, GetLastError())));
271 return 0;
272 }
273
274 /*
275 * Create a socket, setting it to overlapped and non-blocking
276 */
277 SOCKET
pgwin32_socket(int af,int type,int protocol)278 pgwin32_socket(int af, int type, int protocol)
279 {
280 SOCKET s;
281 unsigned long on = 1;
282
283 s = WSASocket(af, type, protocol, NULL, 0, WSA_FLAG_OVERLAPPED);
284 if (s == INVALID_SOCKET)
285 {
286 TranslateSocketError();
287 return INVALID_SOCKET;
288 }
289
290 if (ioctlsocket(s, FIONBIO, &on))
291 {
292 TranslateSocketError();
293 return INVALID_SOCKET;
294 }
295 errno = 0;
296
297 return s;
298 }
299
300 int
pgwin32_bind(SOCKET s,struct sockaddr * addr,int addrlen)301 pgwin32_bind(SOCKET s, struct sockaddr * addr, int addrlen)
302 {
303 int res;
304
305 res = bind(s, addr, addrlen);
306 if (res < 0)
307 TranslateSocketError();
308 return res;
309 }
310
311 int
pgwin32_listen(SOCKET s,int backlog)312 pgwin32_listen(SOCKET s, int backlog)
313 {
314 int res;
315
316 res = listen(s, backlog);
317 if (res < 0)
318 TranslateSocketError();
319 return res;
320 }
321
322 SOCKET
pgwin32_accept(SOCKET s,struct sockaddr * addr,int * addrlen)323 pgwin32_accept(SOCKET s, struct sockaddr * addr, int *addrlen)
324 {
325 SOCKET rs;
326
327 /*
328 * Poll for signals, but don't return with EINTR, since we don't handle
329 * that in pqcomm.c
330 */
331 pgwin32_poll_signals();
332
333 rs = WSAAccept(s, addr, addrlen, NULL, 0);
334 if (rs == INVALID_SOCKET)
335 {
336 TranslateSocketError();
337 return INVALID_SOCKET;
338 }
339 return rs;
340 }
341
342
343 /* No signal delivery during connect. */
344 int
pgwin32_connect(SOCKET s,const struct sockaddr * addr,int addrlen)345 pgwin32_connect(SOCKET s, const struct sockaddr * addr, int addrlen)
346 {
347 int r;
348
349 r = WSAConnect(s, addr, addrlen, NULL, NULL, NULL, NULL);
350 if (r == 0)
351 return 0;
352
353 if (WSAGetLastError() != WSAEWOULDBLOCK)
354 {
355 TranslateSocketError();
356 return -1;
357 }
358
359 while (pgwin32_waitforsinglesocket(s, FD_CONNECT, INFINITE) == 0)
360 {
361 /* Loop endlessly as long as we are just delivering signals */
362 }
363
364 return 0;
365 }
366
367 int
pgwin32_recv(SOCKET s,char * buf,int len,int f)368 pgwin32_recv(SOCKET s, char *buf, int len, int f)
369 {
370 WSABUF wbuf;
371 int r;
372 DWORD b;
373 DWORD flags = f;
374 int n;
375
376 if (pgwin32_poll_signals())
377 return -1;
378
379 wbuf.len = len;
380 wbuf.buf = buf;
381
382 r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL);
383 if (r != SOCKET_ERROR)
384 return b; /* success */
385
386 if (WSAGetLastError() != WSAEWOULDBLOCK)
387 {
388 TranslateSocketError();
389 return -1;
390 }
391
392 if (pgwin32_noblock)
393 {
394 /*
395 * No data received, and we are in "emulated non-blocking mode", so
396 * return indicating that we'd block if we were to continue.
397 */
398 errno = EWOULDBLOCK;
399 return -1;
400 }
401
402 /* We're in blocking mode, so wait for data */
403
404 for (n = 0; n < 5; n++)
405 {
406 if (pgwin32_waitforsinglesocket(s, FD_READ | FD_CLOSE | FD_ACCEPT,
407 INFINITE) == 0)
408 return -1; /* errno already set */
409
410 r = WSARecv(s, &wbuf, 1, &b, &flags, NULL, NULL);
411 if (r != SOCKET_ERROR)
412 return b; /* success */
413 if (WSAGetLastError() != WSAEWOULDBLOCK)
414 {
415 TranslateSocketError();
416 return -1;
417 }
418
419 /*
420 * There seem to be cases on win2k (at least) where WSARecv can return
421 * WSAEWOULDBLOCK even when pgwin32_waitforsinglesocket claims the
422 * socket is readable. In this case, just sleep for a moment and try
423 * again. We try up to 5 times - if it fails more than that it's not
424 * likely to ever come back.
425 */
426 pg_usleep(10000);
427 }
428 ereport(NOTICE,
429 (errmsg_internal("could not read from ready socket (after retries)")));
430 errno = EWOULDBLOCK;
431 return -1;
432 }
433
434 /*
435 * The second argument to send() is defined by SUS to be a "const void *"
436 * and so we use the same signature here to keep compilers happy when
437 * handling callers.
438 *
439 * But the buf member of a WSABUF struct is defined as "char *", so we cast
440 * the second argument to that here when assigning it, also to keep compilers
441 * happy.
442 */
443
444 int
pgwin32_send(SOCKET s,const void * buf,int len,int flags)445 pgwin32_send(SOCKET s, const void *buf, int len, int flags)
446 {
447 WSABUF wbuf;
448 int r;
449 DWORD b;
450
451 if (pgwin32_poll_signals())
452 return -1;
453
454 wbuf.len = len;
455 wbuf.buf = (char *) buf;
456
457 /*
458 * Readiness of socket to send data to UDP socket may be not true: socket
459 * can become busy again! So loop until send or error occurs.
460 */
461 for (;;)
462 {
463 r = WSASend(s, &wbuf, 1, &b, flags, NULL, NULL);
464 if (r != SOCKET_ERROR && b > 0)
465 /* Write succeeded right away */
466 return b;
467
468 if (r == SOCKET_ERROR &&
469 WSAGetLastError() != WSAEWOULDBLOCK)
470 {
471 TranslateSocketError();
472 return -1;
473 }
474
475 if (pgwin32_noblock)
476 {
477 /*
478 * No data sent, and we are in "emulated non-blocking mode", so
479 * return indicating that we'd block if we were to continue.
480 */
481 errno = EWOULDBLOCK;
482 return -1;
483 }
484
485 /* No error, zero bytes (win2000+) or error+WSAEWOULDBLOCK (<=nt4) */
486
487 if (pgwin32_waitforsinglesocket(s, FD_WRITE | FD_CLOSE, INFINITE) == 0)
488 return -1;
489 }
490
491 return -1;
492 }
493
494
495 /*
496 * Wait for activity on one or more sockets.
497 * While waiting, allow signals to run
498 *
499 * NOTE! Currently does not implement exceptfds check,
500 * since it is not used in postgresql!
501 */
502 int
pgwin32_select(int nfds,fd_set * readfds,fd_set * writefds,fd_set * exceptfds,const struct timeval * timeout)503 pgwin32_select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timeval * timeout)
504 {
505 WSAEVENT events[FD_SETSIZE * 2]; /* worst case is readfds totally
506 * different from writefds, so
507 * 2*FD_SETSIZE sockets */
508 SOCKET sockets[FD_SETSIZE * 2];
509 int numevents = 0;
510 int i;
511 int r;
512 DWORD timeoutval = WSA_INFINITE;
513 FD_SET outreadfds;
514 FD_SET outwritefds;
515 int nummatches = 0;
516
517 Assert(exceptfds == NULL);
518
519 if (pgwin32_poll_signals())
520 return -1;
521
522 FD_ZERO(&outreadfds);
523 FD_ZERO(&outwritefds);
524
525 /*
526 * Windows does not guarantee to log an FD_WRITE network event indicating
527 * that more data can be sent unless the previous send() failed with
528 * WSAEWOULDBLOCK. While our caller might well have made such a call, we
529 * cannot assume that here. Therefore, if waiting for write-ready, force
530 * the issue by doing a dummy send(). If the dummy send() succeeds,
531 * assume that the socket is in fact write-ready, and return immediately.
532 * Also, if it fails with something other than WSAEWOULDBLOCK, return a
533 * write-ready indication to let our caller deal with the error condition.
534 */
535 if (writefds != NULL)
536 {
537 for (i = 0; i < writefds->fd_count; i++)
538 {
539 char c;
540 WSABUF buf;
541 DWORD sent;
542
543 buf.buf = &c;
544 buf.len = 0;
545
546 r = WSASend(writefds->fd_array[i], &buf, 1, &sent, 0, NULL, NULL);
547 if (r == 0 || WSAGetLastError() != WSAEWOULDBLOCK)
548 FD_SET(writefds->fd_array[i], &outwritefds);
549 }
550
551 /* If we found any write-ready sockets, just return them immediately */
552 if (outwritefds.fd_count > 0)
553 {
554 memcpy(writefds, &outwritefds, sizeof(fd_set));
555 if (readfds)
556 FD_ZERO(readfds);
557 return outwritefds.fd_count;
558 }
559 }
560
561
562 /* Now set up for an actual select */
563
564 if (timeout != NULL)
565 {
566 /* timeoutval is in milliseconds */
567 timeoutval = timeout->tv_sec * 1000 + timeout->tv_usec / 1000;
568 }
569
570 if (readfds != NULL)
571 {
572 for (i = 0; i < readfds->fd_count; i++)
573 {
574 events[numevents] = WSACreateEvent();
575 sockets[numevents] = readfds->fd_array[i];
576 numevents++;
577 }
578 }
579 if (writefds != NULL)
580 {
581 for (i = 0; i < writefds->fd_count; i++)
582 {
583 if (!readfds ||
584 !FD_ISSET(writefds->fd_array[i], readfds))
585 {
586 /* If the socket is not in the read list */
587 events[numevents] = WSACreateEvent();
588 sockets[numevents] = writefds->fd_array[i];
589 numevents++;
590 }
591 }
592 }
593
594 for (i = 0; i < numevents; i++)
595 {
596 int flags = 0;
597
598 if (readfds && FD_ISSET(sockets[i], readfds))
599 flags |= FD_READ | FD_ACCEPT | FD_CLOSE;
600
601 if (writefds && FD_ISSET(sockets[i], writefds))
602 flags |= FD_WRITE | FD_CLOSE;
603
604 if (WSAEventSelect(sockets[i], events[i], flags) != 0)
605 {
606 TranslateSocketError();
607 /* release already-assigned event objects */
608 while (--i >= 0)
609 WSAEventSelect(sockets[i], NULL, 0);
610 for (i = 0; i < numevents; i++)
611 WSACloseEvent(events[i]);
612 return -1;
613 }
614 }
615
616 events[numevents] = pgwin32_signal_event;
617 r = WaitForMultipleObjectsEx(numevents + 1, events, FALSE, timeoutval, TRUE);
618 if (r != WAIT_TIMEOUT && r != WAIT_IO_COMPLETION && r != (WAIT_OBJECT_0 + numevents))
619 {
620 /*
621 * We scan all events, even those not signalled, in case more than one
622 * event has been tagged but Wait.. can only return one.
623 */
624 WSANETWORKEVENTS resEvents;
625
626 for (i = 0; i < numevents; i++)
627 {
628 ZeroMemory(&resEvents, sizeof(resEvents));
629 if (WSAEnumNetworkEvents(sockets[i], events[i], &resEvents) != 0)
630 elog(ERROR, "failed to enumerate network events: error code %u",
631 WSAGetLastError());
632 /* Read activity? */
633 if (readfds && FD_ISSET(sockets[i], readfds))
634 {
635 if ((resEvents.lNetworkEvents & FD_READ) ||
636 (resEvents.lNetworkEvents & FD_ACCEPT) ||
637 (resEvents.lNetworkEvents & FD_CLOSE))
638 {
639 FD_SET(sockets[i], &outreadfds);
640
641 nummatches++;
642 }
643 }
644 /* Write activity? */
645 if (writefds && FD_ISSET(sockets[i], writefds))
646 {
647 if ((resEvents.lNetworkEvents & FD_WRITE) ||
648 (resEvents.lNetworkEvents & FD_CLOSE))
649 {
650 FD_SET(sockets[i], &outwritefds);
651
652 nummatches++;
653 }
654 }
655 }
656 }
657
658 /* Clean up all the event objects */
659 for (i = 0; i < numevents; i++)
660 {
661 WSAEventSelect(sockets[i], NULL, 0);
662 WSACloseEvent(events[i]);
663 }
664
665 if (r == WSA_WAIT_TIMEOUT)
666 {
667 if (readfds)
668 FD_ZERO(readfds);
669 if (writefds)
670 FD_ZERO(writefds);
671 return 0;
672 }
673
674 /* Signal-like events. */
675 if (r == WAIT_OBJECT_0 + numevents || r == WAIT_IO_COMPLETION)
676 {
677 pgwin32_dispatch_queued_signals();
678 errno = EINTR;
679 if (readfds)
680 FD_ZERO(readfds);
681 if (writefds)
682 FD_ZERO(writefds);
683 return -1;
684 }
685
686 /* Overwrite socket sets with our resulting values */
687 if (readfds)
688 memcpy(readfds, &outreadfds, sizeof(fd_set));
689 if (writefds)
690 memcpy(writefds, &outwritefds, sizeof(fd_set));
691 return nummatches;
692 }
693
694
695 /*
696 * Return win32 error string, since strerror can't
697 * handle winsock codes
698 */
699 static char wserrbuf[256];
700 const char *
pgwin32_socket_strerror(int err)701 pgwin32_socket_strerror(int err)
702 {
703 static HANDLE handleDLL = INVALID_HANDLE_VALUE;
704
705 if (handleDLL == INVALID_HANDLE_VALUE)
706 {
707 handleDLL = LoadLibraryEx("netmsg.dll", NULL, DONT_RESOLVE_DLL_REFERENCES | LOAD_LIBRARY_AS_DATAFILE);
708 if (handleDLL == NULL)
709 ereport(FATAL,
710 (errmsg_internal("could not load netmsg.dll: error code %lu", GetLastError())));
711 }
712
713 ZeroMemory(&wserrbuf, sizeof(wserrbuf));
714 if (FormatMessage(FORMAT_MESSAGE_IGNORE_INSERTS |
715 FORMAT_MESSAGE_FROM_SYSTEM |
716 FORMAT_MESSAGE_FROM_HMODULE,
717 handleDLL,
718 err,
719 MAKELANGID(LANG_ENGLISH, SUBLANG_DEFAULT),
720 wserrbuf,
721 sizeof(wserrbuf) - 1,
722 NULL) == 0)
723 {
724 /* Failed to get id */
725 sprintf(wserrbuf, "unrecognized winsock error %d", err);
726 }
727 return wserrbuf;
728 }
729