1 /*-
2 * Copyright (c) 2006 Verdens Gang AS
3 * Copyright (c) 2006-2015 Varnish Software AS
4 * All rights reserved.
5 *
6 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7 *
8 * SPDX-License-Identifier: BSD-2-Clause
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include "config.h"
33
34 #include <sys/types.h>
35 #include <sys/socket.h>
36 #include <sys/time.h> // for NetBSD
37 #include <sys/ioctl.h>
38 #ifdef HAVE_SYS_FILIO_H
39 # include <sys/filio.h>
40 #endif
41
42 #include <netinet/in.h>
43 #include <netinet/tcp.h>
44
45 #include <math.h>
46 #include <netdb.h>
47 #include <poll.h>
48 #include <stdio.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <stdlib.h>
52
53 #include "vdef.h"
54 #include "miniobj.h"
55 #include "vas.h"
56 #include "vsa.h"
57 #include "vss.h"
58 #include "vtcp.h"
59 #include "vtim.h"
60
61 /*--------------------------------------------------------------------*/
62 static void
vtcp_sa_to_ascii(const void * sa,socklen_t l,char * abuf,unsigned alen,char * pbuf,unsigned plen)63 vtcp_sa_to_ascii(const void *sa, socklen_t l, char *abuf, unsigned alen,
64 char *pbuf, unsigned plen)
65 {
66 int i;
67
68 assert(abuf == NULL || alen > 0);
69 assert(pbuf == NULL || plen > 0);
70 i = getnameinfo(sa, l, abuf, alen, pbuf, plen,
71 NI_NUMERICHOST | NI_NUMERICSERV);
72 if (i) {
73 /*
74 * XXX this printf is shitty, but we may not have space
75 * for the gai_strerror in the bufffer :-(
76 */
77 fprintf(stderr, "getnameinfo = %d %s\n", i, gai_strerror(i));
78 if (i == EAI_SYSTEM)
79 fprintf(stderr, "errno = %d %s\n", errno, vstrerror(errno));
80 if (abuf != NULL)
81 (void)snprintf(abuf, alen, "Conversion");
82 if (pbuf != NULL)
83 (void)snprintf(pbuf, plen, "Failed");
84 return;
85 }
86 /* XXX dirty hack for v4-to-v6 mapped addresses */
87 if (abuf != NULL && strncmp(abuf, "::ffff:", 7) == 0) {
88 for (i = 0; abuf[i + 7]; ++i)
89 abuf[i] = abuf[i + 7];
90 abuf[i] = '\0';
91 }
92 }
93
94 /*--------------------------------------------------------------------*/
95
96 void
VTCP_name(const struct suckaddr * addr,char * abuf,unsigned alen,char * pbuf,unsigned plen)97 VTCP_name(const struct suckaddr *addr, char *abuf, unsigned alen,
98 char *pbuf, unsigned plen)
99 {
100 const struct sockaddr *sa;
101 socklen_t sl;
102
103 sa = VSA_Get_Sockaddr(addr, &sl);
104 AN(sa);
105 vtcp_sa_to_ascii(sa, sl, abuf, alen, pbuf, plen);
106 }
107
108 /*--------------------------------------------------------------------*/
109
110 struct suckaddr *
VTCP_my_suckaddr(int sock)111 VTCP_my_suckaddr(int sock)
112 {
113 struct suckaddr *r;
114
115 r = malloc(vsa_suckaddr_len);
116 AN(VSA_getsockname(sock, r, vsa_suckaddr_len));
117 return (r);
118 }
119
120 /*--------------------------------------------------------------------*/
121
122 void
VTCP_myname(int sock,char * abuf,unsigned alen,char * pbuf,unsigned plen)123 VTCP_myname(int sock, char *abuf, unsigned alen, char *pbuf, unsigned plen)
124 {
125 char buf[vsa_suckaddr_len];
126
127 VTCP_name(VSA_getsockname(sock, buf, sizeof buf),
128 abuf, alen, pbuf, plen);
129 }
130
131 /*--------------------------------------------------------------------*/
132
133 void
VTCP_hisname(int sock,char * abuf,unsigned alen,char * pbuf,unsigned plen)134 VTCP_hisname(int sock, char *abuf, unsigned alen, char *pbuf, unsigned plen)
135 {
136 char buf[vsa_suckaddr_len];
137 struct suckaddr *sua;
138
139 sua = VSA_getpeername(sock, buf, sizeof buf);
140 if (sua != NULL)
141 VTCP_name(sua, abuf, alen, pbuf, plen);
142 else {
143 (void)snprintf(abuf, alen, "<none>");
144 (void)snprintf(pbuf, plen, "<none>");
145 }
146 }
147
148 /*--------------------------------------------------------------------*/
149
150 #ifdef HAVE_ACCEPT_FILTERS
151
152 int
VTCP_filter_http(int sock)153 VTCP_filter_http(int sock)
154 {
155 int retval;
156 struct accept_filter_arg afa;
157
158 memset(&afa, 0, sizeof afa);
159 bprintf(afa.af_name, "%s", "httpready");
160 retval = setsockopt(sock, SOL_SOCKET, SO_ACCEPTFILTER,
161 &afa, sizeof afa);
162 return (retval);
163 }
164
165 #elif defined(__linux)
166
167 int
VTCP_filter_http(int sock)168 VTCP_filter_http(int sock)
169 {
170 int retval;
171 int defer = 1;
172
173 retval = setsockopt(sock, SOL_TCP, TCP_DEFER_ACCEPT,
174 &defer, sizeof defer);
175 return (retval);
176 }
177
178 #else
179
180 int
VTCP_filter_http(int sock)181 VTCP_filter_http(int sock)
182 {
183 errno = EOPNOTSUPP;
184 (void)sock;
185 return (-1);
186 }
187
188 #endif
189
190 /*--------------------------------------------------------------------*/
191
192 #ifdef HAVE_TCP_FASTOPEN
193
194 int
VTCP_fastopen(int sock,int depth)195 VTCP_fastopen(int sock, int depth)
196 {
197 return (setsockopt(sock, SOL_TCP, TCP_FASTOPEN, &depth, sizeof depth));
198 }
199
200 #else
201
202 int
VTCP_fastopen(int sock,int depth)203 VTCP_fastopen(int sock, int depth)
204 {
205 errno = EOPNOTSUPP;
206 (void)sock;
207 (void)depth;
208 return (-1);
209 }
210
211 #endif
212
213 /*--------------------------------------------------------------------
214 * Functions for controlling NONBLOCK mode.
215 *
216 * We use FIONBIO because it is cheaper than fcntl(2), which requires
217 * us to do two syscalls, one to get and one to set, the latter of
218 * which mucks about a bit before it ends up calling ioctl(FIONBIO),
219 * at least on FreeBSD.
220 * On Solaris ioctl(FIONBIO) can fail with connection related errnos,
221 * but as long as that is how they fail, we're fine.
222 */
223
224 void
VTCP_blocking(int sock)225 VTCP_blocking(int sock)
226 {
227 int i, j;
228
229 i = 0;
230 j = ioctl(sock, FIONBIO, &i);
231 VTCP_Assert(j);
232 }
233
234 void
VTCP_nonblocking(int sock)235 VTCP_nonblocking(int sock)
236 {
237 int i, j;
238
239 i = 1;
240 j = ioctl(sock, FIONBIO, &i);
241 VTCP_Assert(j);
242 }
243
244 /*--------------------------------------------------------------------
245 * On TCP a connect(2) can block for a looong time, and we don't want that.
246 * Unfortunately, the SocketWizards back in those days were happy to wait
247 * any amount of time for a connection, so the connect(2) syscall does not
248 * take an argument for patience.
249 *
250 * There is a little used work-around, and we employ it at our peril.
251 *
252 */
253
254 int
VTCP_connected(int s)255 VTCP_connected(int s)
256 {
257 int k;
258 socklen_t l;
259
260 /* Find out if we got a connection */
261 l = sizeof k;
262 AZ(getsockopt(s, SOL_SOCKET, SO_ERROR, &k, &l));
263
264 /* An error means no connection established */
265 errno = k;
266 if (k) {
267 closefd(&s);
268 return (-1);
269 }
270
271 VTCP_blocking(s);
272 return (s);
273 }
274
275 int
VTCP_connect(const struct suckaddr * name,int msec)276 VTCP_connect(const struct suckaddr *name, int msec)
277 {
278 int s, i;
279 struct pollfd fds[1];
280 const struct sockaddr *sa;
281 socklen_t sl;
282 int val;
283
284 if (name == NULL)
285 return (-1);
286 /* Attempt the connect */
287 AN(VSA_Sane(name));
288 sa = VSA_Get_Sockaddr(name, &sl);
289 AN(sa);
290 AN(sl);
291
292 s = socket(sa->sa_family, SOCK_STREAM, 0);
293 if (s < 0)
294 return (s);
295
296 /* Set the socket non-blocking */
297 if (msec != 0)
298 VTCP_nonblocking(s);
299
300 val = 1;
301 AZ(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &val, sizeof val));
302
303 i = connect(s, sa, sl);
304 if (i == 0)
305 return (s);
306 if (errno != EINPROGRESS) {
307 closefd(&s);
308 return (-1);
309 }
310
311 if (msec < 0) {
312 /*
313 * Caller is responsible for waiting and
314 * calling VTCP_connected
315 */
316 return (s);
317 }
318
319 assert(msec > 0);
320 /* Exercise our patience, polling for write */
321 fds[0].fd = s;
322 fds[0].events = POLLWRNORM;
323 fds[0].revents = 0;
324 i = poll(fds, 1, msec);
325
326 if (i == 0) {
327 /* Timeout, close and give up */
328 closefd(&s);
329 errno = ETIMEDOUT;
330 return (-1);
331 }
332
333 return (VTCP_connected(s));
334 }
335
336 /*--------------------------------------------------------------------
337 * When closing a TCP connection, a couple of errno's are legit, we
338 * can't be held responsible for the other end wanting to talk to us.
339 */
340
341 void
VTCP_close(int * s)342 VTCP_close(int *s)
343 {
344 int i;
345
346 i = close(*s);
347
348 VTCP_Assert(i);
349 *s = -1;
350 }
351
352 void
VTCP_set_read_timeout(int s,vtim_dur seconds)353 VTCP_set_read_timeout(int s, vtim_dur seconds)
354 {
355 #ifdef SO_RCVTIMEO_WORKS
356 struct timeval timeout = VTIM_timeval(seconds);
357 /*
358 * Solaris bug (present at least in snv_151 and older): If this fails
359 * with EINVAL, the socket is half-closed (SS_CANTSENDMORE) and the
360 * timeout does not get set. Needs to be fixed in Solaris, there is
361 * nothing we can do about this.
362 */
363 VTCP_Assert(setsockopt(s, SOL_SOCKET, SO_RCVTIMEO,
364 &timeout, sizeof timeout));
365 #else
366 (void)s;
367 (void)seconds;
368 #endif
369 }
370
371 /*--------------------------------------------------------------------
372 */
373
374 struct vto_priv {
375 unsigned magic;
376 #define VTO_PRIV_MAGIC 0xca70b0e7
377 int latest_errno;
378 int fd;
379 double timeout;
380 };
381
v_matchproto_(vss_resolved_f)382 static int v_matchproto_(vss_resolved_f)
383 vtcp_open_callback(void *priv, const struct suckaddr *sa)
384 {
385 struct vto_priv *vto;
386 int fd;
387
388 CAST_OBJ_NOTNULL(vto, priv, VTO_PRIV_MAGIC);
389
390 errno = 0;
391 fd = VTCP_connect(sa, (int)floor(vto->timeout * 1e3));
392 if (fd >= 0) {
393 vto->fd = fd;
394 vto->latest_errno = 0;
395 return (1);
396 }
397 vto->latest_errno = errno;
398 return (0);
399 }
400
401 int
VTCP_open(const char * addr,const char * def_port,vtim_dur timeout,const char ** errp)402 VTCP_open(const char *addr, const char *def_port, vtim_dur timeout,
403 const char **errp)
404 {
405 struct vto_priv vto[1];
406
407 AN(errp);
408 assert(timeout >= 0);
409 INIT_OBJ(vto, VTO_PRIV_MAGIC);
410 vto->fd = -1;
411 vto->timeout = timeout;
412
413 if (VSS_resolver(addr, def_port, vtcp_open_callback, vto, errp) < 0)
414 return (-1);
415 if (vto->fd < 0)
416 *errp = strerror(vto->latest_errno);
417 return (vto->fd);
418 }
419
420 /*--------------------------------------------------------------------
421 * Given a struct suckaddr, open a socket of the appropriate type, and bind
422 * it to the requested address.
423 *
424 * If the address is an IPv6 address, the IPV6_V6ONLY option is set to
425 * avoid conflicts between INADDR_ANY and IN6ADDR_ANY.
426 */
427
428 int
VTCP_bind(const struct suckaddr * sa,const char ** errp)429 VTCP_bind(const struct suckaddr *sa, const char **errp)
430 {
431 int sd, val, e;
432 socklen_t sl;
433 const struct sockaddr *so;
434 int proto;
435
436 if (errp != NULL)
437 *errp = NULL;
438
439 proto = VSA_Get_Proto(sa);
440 sd = socket(proto, SOCK_STREAM, 0);
441 if (sd < 0) {
442 if (errp != NULL)
443 *errp = "socket(2)";
444 return (-1);
445 }
446 val = 1;
447 if (setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof val) != 0) {
448 if (errp != NULL)
449 *errp = "setsockopt(SO_REUSEADDR, 1)";
450 e = errno;
451 closefd(&sd);
452 errno = e;
453 return (-1);
454 }
455 #ifdef IPV6_V6ONLY
456 /* forcibly use separate sockets for IPv4 and IPv6 */
457 val = 1;
458 if (proto == AF_INET6 &&
459 setsockopt(sd, IPPROTO_IPV6, IPV6_V6ONLY, &val, sizeof val) != 0) {
460 if (errp != NULL)
461 *errp = "setsockopt(IPV6_V6ONLY, 1)";
462 e = errno;
463 closefd(&sd);
464 errno = e;
465 return (-1);
466 }
467 #endif
468 so = VSA_Get_Sockaddr(sa, &sl);
469 if (bind(sd, so, sl) != 0) {
470 if (errp != NULL)
471 *errp = "bind(2)";
472 e = errno;
473 closefd(&sd);
474 errno = e;
475 return (-1);
476 }
477 return (sd);
478 }
479
480 /*--------------------------------------------------------------------
481 * Given a struct suckaddr, open a socket of the appropriate type, bind it
482 * to the requested address, and start listening.
483 */
484
485 int
VTCP_listen(const struct suckaddr * sa,int depth,const char ** errp)486 VTCP_listen(const struct suckaddr *sa, int depth, const char **errp)
487 {
488 int sd;
489 int e;
490
491 if (errp != NULL)
492 *errp = NULL;
493 sd = VTCP_bind(sa, errp);
494 if (sd >= 0) {
495 if (listen(sd, depth) != 0) {
496 e = errno;
497 closefd(&sd);
498 errno = e;
499 if (errp != NULL)
500 *errp = "listen(2)";
501 return (-1);
502 }
503 }
504 return (sd);
505 }
506
507 /*--------------------------------------------------------------------*/
508
509 struct helper {
510 int depth;
511 const char **errp;
512 };
513
v_matchproto_(vss_resolved_f)514 static int v_matchproto_(vss_resolved_f)
515 vtcp_lo_cb(void *priv, const struct suckaddr *sa)
516 {
517 int sock;
518 struct helper *hp = priv;
519
520 sock = VTCP_listen(sa, hp->depth, hp->errp);
521 if (sock > 0) {
522 *hp->errp = NULL;
523 return (sock);
524 }
525 AN(*hp->errp);
526 return (0);
527 }
528
529 int
VTCP_listen_on(const char * addr,const char * def_port,int depth,const char ** errp)530 VTCP_listen_on(const char *addr, const char *def_port, int depth,
531 const char **errp)
532 {
533 struct helper h;
534 int sock;
535
536 AN(errp);
537 h.depth = depth;
538 h.errp = errp;
539
540 sock = VSS_resolver(addr, def_port, vtcp_lo_cb, &h, errp);
541 if (*errp != NULL)
542 return (-1);
543 return (sock);
544 }
545
546 /*--------------------------------------------------------------------
547 * Set or reset SO_LINGER flag
548 */
549
550 int
VTCP_linger(int sock,int linger)551 VTCP_linger(int sock, int linger)
552 {
553 struct linger lin;
554 int i;
555
556 memset(&lin, 0, sizeof lin);
557 lin.l_onoff = linger;
558 i = setsockopt(sock, SOL_SOCKET, SO_LINGER, &lin, sizeof lin);
559 VTCP_Assert(i);
560 return (i);
561 }
562
563 /*--------------------------------------------------------------------
564 * Do a poll to check for remote HUP
565 */
566
567 int
VTCP_check_hup(int sock)568 VTCP_check_hup(int sock)
569 {
570 struct pollfd pfd;
571
572 assert(sock > 0);
573 pfd.fd = sock;
574 pfd.events = POLLOUT;
575 pfd.revents = 0;
576
577 if (poll(&pfd, 1, 0) == 1 && pfd.revents & POLLHUP)
578 return (1);
579 return (0);
580 }
581
582 /*--------------------------------------------------------------------
583 * Check if a TCP syscall return value is fatal
584 */
585
586 int
VTCP_Check(ssize_t a)587 VTCP_Check(ssize_t a)
588 {
589 if (a == 0)
590 return (1);
591 if (a > 0)
592 return (1);
593 if (errno == ECONNRESET || errno == ENOTCONN || errno == EPIPE)
594 return (1);
595 /* Accept EAGAIN (and EWOULDBLOCK in case they are not the same)
596 * as errno values. Even though our sockets are all non-blocking,
597 * when a SO_{SND|RCV}TIMEO expires, read() or write() on the
598 * socket will return (-1) and errno set to EAGAIN. (This is not
599 * documented in the read(2) and write(2) manpages, but is
600 * described in the socket(7) manpage.) */
601 if (errno == EAGAIN || errno == EWOULDBLOCK)
602 return (1);
603 /* tcp(7): The other end didn't acknowledge retransmitted data after
604 * some time. */
605 if (errno == ETIMEDOUT)
606 return (1);
607 /* #3539 various errnos documented on linux as POSIX.1 */
608 if (errno == ENETDOWN || errno == ENETUNREACH || errno == ENETRESET ||
609 errno == ECONNABORTED || /* ECONNRESET see above */
610 errno == EHOSTUNREACH || errno == EHOSTDOWN) {
611 return (1);
612 }
613
614 #if (defined (__SVR4) && defined (__sun))
615 if (errno == ECONNREFUSED) // in r02702.vtc
616 return (1);
617 if (errno == EPROTO)
618 return (1);
619 #endif
620 #if (defined (__SVR4) && defined (__sun)) || \
621 defined (__NetBSD__) || \
622 defined (__APPLE__)
623 /*
624 * Solaris and MacOS returns EINVAL if the other end unexpectedly reset
625 * the connection.
626 *
627 * On NetBSD it is documented behaviour.
628 */
629 if (errno == EINVAL)
630 return (1);
631 #endif
632 return (0);
633 }
634
635 /*--------------------------------------------------------------------
636 *
637 */
638
639 int
VTCP_read(int fd,void * ptr,size_t len,vtim_dur tmo)640 VTCP_read(int fd, void *ptr, size_t len, vtim_dur tmo)
641 {
642 struct pollfd pfd[1];
643 int i, j;
644
645 if (tmo > 0.0) {
646 pfd[0].fd = fd;
647 pfd[0].events = POLLIN;
648 pfd[0].revents = 0;
649 j = (int)floor(tmo * 1e3);
650 if (j == 0)
651 j++;
652 j = poll(pfd, 1, j);
653 if (j == 0)
654 return (-2);
655 }
656 i = read(fd, ptr, len);
657 VTCP_Assert(i);
658 return (i < 0 ? -1 : i);
659 }
660