1 /*-
2  * Copyright (c) 2006 Verdens Gang AS
3  * Copyright (c) 2006-2015 Varnish Software AS
4  * All rights reserved.
5  *
6  * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7  *
8  * SPDX-License-Identifier: BSD-2-Clause
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include "config.h"
33 
34 #include <sys/types.h>
35 #include <sys/socket.h>
36 #include <sys/time.h>		// for NetBSD
37 #include <sys/ioctl.h>
38 #ifdef HAVE_SYS_FILIO_H
39 #  include <sys/filio.h>
40 #endif
41 
42 #include <netinet/in.h>
43 #include <netinet/tcp.h>
44 
45 #include <math.h>
46 #include <netdb.h>
47 #include <poll.h>
48 #include <stdio.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <stdlib.h>
52 
53 #include "vdef.h"
54 #include "miniobj.h"
55 #include "vas.h"
56 #include "vsa.h"
57 #include "vss.h"
58 #include "vtcp.h"
59 #include "vtim.h"
60 
61 /*--------------------------------------------------------------------*/
62 static void
vtcp_sa_to_ascii(const void * sa,socklen_t l,char * abuf,unsigned alen,char * pbuf,unsigned plen)63 vtcp_sa_to_ascii(const void *sa, socklen_t l, char *abuf, unsigned alen,
64     char *pbuf, unsigned plen)
65 {
66 	int i;
67 
68 	assert(abuf == NULL || alen > 0);
69 	assert(pbuf == NULL || plen > 0);
70 	i = getnameinfo(sa, l, abuf, alen, pbuf, plen,
71 	   NI_NUMERICHOST | NI_NUMERICSERV);
72 	if (i) {
73 		/*
74 		 * XXX this printf is shitty, but we may not have space
75 		 * for the gai_strerror in the bufffer :-(
76 		 */
77 		fprintf(stderr, "getnameinfo = %d %s\n", i, gai_strerror(i));
78 		if (i == EAI_SYSTEM)
79 			fprintf(stderr, "errno = %d %s\n", errno, vstrerror(errno));
80 		if (abuf != NULL)
81 			(void)snprintf(abuf, alen, "Conversion");
82 		if (pbuf != NULL)
83 			(void)snprintf(pbuf, plen, "Failed");
84 		return;
85 	}
86 	/* XXX dirty hack for v4-to-v6 mapped addresses */
87 	if (abuf != NULL && strncmp(abuf, "::ffff:", 7) == 0) {
88 		for (i = 0; abuf[i + 7]; ++i)
89 			abuf[i] = abuf[i + 7];
90 		abuf[i] = '\0';
91 	}
92 }
93 
94 /*--------------------------------------------------------------------*/
95 
96 void
VTCP_name(const struct suckaddr * addr,char * abuf,unsigned alen,char * pbuf,unsigned plen)97 VTCP_name(const struct suckaddr *addr, char *abuf, unsigned alen,
98     char *pbuf, unsigned plen)
99 {
100 	const struct sockaddr *sa;
101 	socklen_t sl;
102 
103 	sa = VSA_Get_Sockaddr(addr, &sl);
104 	AN(sa);
105 	vtcp_sa_to_ascii(sa, sl, abuf, alen, pbuf, plen);
106 }
107 
108 /*--------------------------------------------------------------------*/
109 
110 struct suckaddr *
VTCP_my_suckaddr(int sock)111 VTCP_my_suckaddr(int sock)
112 {
113 	struct suckaddr *r;
114 
115 	r = malloc(vsa_suckaddr_len);
116 	AN(VSA_getsockname(sock, r, vsa_suckaddr_len));
117 	return (r);
118 }
119 
120 /*--------------------------------------------------------------------*/
121 
122 void
VTCP_myname(int sock,char * abuf,unsigned alen,char * pbuf,unsigned plen)123 VTCP_myname(int sock, char *abuf, unsigned alen, char *pbuf, unsigned plen)
124 {
125 	char buf[vsa_suckaddr_len];
126 
127 	VTCP_name(VSA_getsockname(sock, buf, sizeof buf),
128 		  abuf, alen, pbuf, plen);
129 }
130 
131 /*--------------------------------------------------------------------*/
132 
133 void
VTCP_hisname(int sock,char * abuf,unsigned alen,char * pbuf,unsigned plen)134 VTCP_hisname(int sock, char *abuf, unsigned alen, char *pbuf, unsigned plen)
135 {
136 	char buf[vsa_suckaddr_len];
137 	struct suckaddr *sua;
138 
139 	sua = VSA_getpeername(sock, buf, sizeof buf);
140 	if (sua != NULL)
141 		VTCP_name(sua, abuf, alen, pbuf, plen);
142 	else {
143 		(void)snprintf(abuf, alen, "<none>");
144 		(void)snprintf(pbuf, plen, "<none>");
145 	}
146 }
147 
148 /*--------------------------------------------------------------------*/
149 
150 #ifdef HAVE_ACCEPT_FILTERS
151 
152 int
VTCP_filter_http(int sock)153 VTCP_filter_http(int sock)
154 {
155 	int retval;
156 	struct accept_filter_arg afa;
157 
158 	memset(&afa, 0, sizeof afa);
159 	bprintf(afa.af_name, "%s", "httpready");
160 	retval = setsockopt(sock, SOL_SOCKET, SO_ACCEPTFILTER,
161 	    &afa, sizeof afa);
162 	return (retval);
163 }
164 
165 #elif defined(__linux)
166 
167 int
VTCP_filter_http(int sock)168 VTCP_filter_http(int sock)
169 {
170 	int retval;
171 	int defer = 1;
172 
173 	retval = setsockopt(sock, SOL_TCP, TCP_DEFER_ACCEPT,
174 	    &defer, sizeof defer);
175 	return (retval);
176 }
177 
178 #else
179 
180 int
VTCP_filter_http(int sock)181 VTCP_filter_http(int sock)
182 {
183 	errno = EOPNOTSUPP;
184 	(void)sock;
185 	return (-1);
186 }
187 
188 #endif
189 
190 /*--------------------------------------------------------------------*/
191 
192 #ifdef HAVE_TCP_FASTOPEN
193 
194 int
VTCP_fastopen(int sock,int depth)195 VTCP_fastopen(int sock, int depth)
196 {
197 	return (setsockopt(sock, SOL_TCP, TCP_FASTOPEN, &depth, sizeof depth));
198 }
199 
200 #else
201 
202 int
VTCP_fastopen(int sock,int depth)203 VTCP_fastopen(int sock, int depth)
204 {
205 	errno = EOPNOTSUPP;
206 	(void)sock;
207 	(void)depth;
208 	return (-1);
209 }
210 
211 #endif
212 
213 /*--------------------------------------------------------------------
214  * Functions for controlling NONBLOCK mode.
215  *
216  * We use FIONBIO because it is cheaper than fcntl(2), which requires
217  * us to do two syscalls, one to get and one to set, the latter of
218  * which mucks about a bit before it ends up calling ioctl(FIONBIO),
219  * at least on FreeBSD.
220  * On Solaris ioctl(FIONBIO) can fail with connection related errnos,
221  * but as long as that is how they fail, we're fine.
222  */
223 
224 void
VTCP_blocking(int sock)225 VTCP_blocking(int sock)
226 {
227 	int i, j;
228 
229 	i = 0;
230 	j = ioctl(sock, FIONBIO, &i);
231 	VTCP_Assert(j);
232 }
233 
234 void
VTCP_nonblocking(int sock)235 VTCP_nonblocking(int sock)
236 {
237 	int i, j;
238 
239 	i = 1;
240 	j = ioctl(sock, FIONBIO, &i);
241 	VTCP_Assert(j);
242 }
243 
244 /*--------------------------------------------------------------------
245  * On TCP a connect(2) can block for a looong time, and we don't want that.
246  * Unfortunately, the SocketWizards back in those days were happy to wait
247  * any amount of time for a connection, so the connect(2) syscall does not
248  * take an argument for patience.
249  *
250  * There is a little used work-around, and we employ it at our peril.
251  *
252  */
253 
254 int
VTCP_connected(int s)255 VTCP_connected(int s)
256 {
257 	int k;
258 	socklen_t l;
259 
260 	/* Find out if we got a connection */
261 	l = sizeof k;
262 	AZ(getsockopt(s, SOL_SOCKET, SO_ERROR, &k, &l));
263 
264 	/* An error means no connection established */
265 	errno = k;
266 	if (k) {
267 		closefd(&s);
268 		return (-1);
269 	}
270 
271 	VTCP_blocking(s);
272 	return (s);
273 }
274 
275 int
VTCP_connect(const struct suckaddr * name,int msec)276 VTCP_connect(const struct suckaddr *name, int msec)
277 {
278 	int s, i;
279 	struct pollfd fds[1];
280 	const struct sockaddr *sa;
281 	socklen_t sl;
282 	int val;
283 
284 	if (name == NULL)
285 		return (-1);
286 	/* Attempt the connect */
287 	AN(VSA_Sane(name));
288 	sa = VSA_Get_Sockaddr(name, &sl);
289 	AN(sa);
290 	AN(sl);
291 
292 	s = socket(sa->sa_family, SOCK_STREAM, 0);
293 	if (s < 0)
294 		return (s);
295 
296 	/* Set the socket non-blocking */
297 	if (msec != 0)
298 		VTCP_nonblocking(s);
299 
300 	val = 1;
301 	AZ(setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &val, sizeof val));
302 
303 	i = connect(s, sa, sl);
304 	if (i == 0)
305 		return (s);
306 	if (errno != EINPROGRESS) {
307 		closefd(&s);
308 		return (-1);
309 	}
310 
311 	if (msec < 0) {
312 		/*
313 		 * Caller is responsible for waiting and
314 		 * calling VTCP_connected
315 		 */
316 		return (s);
317 	}
318 
319 	assert(msec > 0);
320 	/* Exercise our patience, polling for write */
321 	fds[0].fd = s;
322 	fds[0].events = POLLWRNORM;
323 	fds[0].revents = 0;
324 	i = poll(fds, 1, msec);
325 
326 	if (i == 0) {
327 		/* Timeout, close and give up */
328 		closefd(&s);
329 		errno = ETIMEDOUT;
330 		return (-1);
331 	}
332 
333 	return (VTCP_connected(s));
334 }
335 
336 /*--------------------------------------------------------------------
337  * When closing a TCP connection, a couple of errno's are legit, we
338  * can't be held responsible for the other end wanting to talk to us.
339  */
340 
341 void
VTCP_close(int * s)342 VTCP_close(int *s)
343 {
344 	int i;
345 
346 	i = close(*s);
347 
348 	VTCP_Assert(i);
349 	*s = -1;
350 }
351 
352 void
VTCP_set_read_timeout(int s,vtim_dur seconds)353 VTCP_set_read_timeout(int s, vtim_dur seconds)
354 {
355 #ifdef SO_RCVTIMEO_WORKS
356 	struct timeval timeout = VTIM_timeval(seconds);
357 	/*
358 	 * Solaris bug (present at least in snv_151 and older): If this fails
359 	 * with EINVAL, the socket is half-closed (SS_CANTSENDMORE) and the
360 	 * timeout does not get set. Needs to be fixed in Solaris, there is
361 	 * nothing we can do about this.
362 	 */
363 	VTCP_Assert(setsockopt(s, SOL_SOCKET, SO_RCVTIMEO,
364 	    &timeout, sizeof timeout));
365 #else
366 	(void)s;
367 	(void)seconds;
368 #endif
369 }
370 
371 /*--------------------------------------------------------------------
372  */
373 
374 struct vto_priv {
375 	unsigned		magic;
376 #define VTO_PRIV_MAGIC		0xca70b0e7
377 	int			latest_errno;
378 	int			fd;
379 	double			timeout;
380 };
381 
v_matchproto_(vss_resolved_f)382 static int v_matchproto_(vss_resolved_f)
383 vtcp_open_callback(void *priv, const struct suckaddr *sa)
384 {
385 	struct vto_priv *vto;
386 	int fd;
387 
388 	CAST_OBJ_NOTNULL(vto, priv, VTO_PRIV_MAGIC);
389 
390 	errno = 0;
391 	fd = VTCP_connect(sa, (int)floor(vto->timeout * 1e3));
392 	if (fd >= 0) {
393 		vto->fd = fd;
394 		vto->latest_errno = 0;
395 		return (1);
396 	}
397 	vto->latest_errno = errno;
398 	return (0);
399 }
400 
401 int
VTCP_open(const char * addr,const char * def_port,vtim_dur timeout,const char ** errp)402 VTCP_open(const char *addr, const char *def_port, vtim_dur timeout,
403     const char **errp)
404 {
405 	struct vto_priv vto[1];
406 
407 	AN(errp);
408 	assert(timeout >= 0);
409 	INIT_OBJ(vto, VTO_PRIV_MAGIC);
410 	vto->fd = -1;
411 	vto->timeout = timeout;
412 
413 	if (VSS_resolver(addr, def_port, vtcp_open_callback, vto, errp) < 0)
414 		return (-1);
415 	if (vto->fd < 0)
416 		*errp = strerror(vto->latest_errno);
417 	return (vto->fd);
418 }
419 
420 /*--------------------------------------------------------------------
421  * Given a struct suckaddr, open a socket of the appropriate type, and bind
422  * it to the requested address.
423  *
424  * If the address is an IPv6 address, the IPV6_V6ONLY option is set to
425  * avoid conflicts between INADDR_ANY and IN6ADDR_ANY.
426  */
427 
428 int
VTCP_bind(const struct suckaddr * sa,const char ** errp)429 VTCP_bind(const struct suckaddr *sa, const char **errp)
430 {
431 	int sd, val, e;
432 	socklen_t sl;
433 	const struct sockaddr *so;
434 	int proto;
435 
436 	if (errp != NULL)
437 		*errp = NULL;
438 
439 	proto = VSA_Get_Proto(sa);
440 	sd = socket(proto, SOCK_STREAM, 0);
441 	if (sd < 0) {
442 		if (errp != NULL)
443 			*errp = "socket(2)";
444 		return (-1);
445 	}
446 	val = 1;
447 	if (setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof val) != 0) {
448 		if (errp != NULL)
449 			*errp = "setsockopt(SO_REUSEADDR, 1)";
450 		e = errno;
451 		closefd(&sd);
452 		errno = e;
453 		return (-1);
454 	}
455 #ifdef IPV6_V6ONLY
456 	/* forcibly use separate sockets for IPv4 and IPv6 */
457 	val = 1;
458 	if (proto == AF_INET6 &&
459 	    setsockopt(sd, IPPROTO_IPV6, IPV6_V6ONLY, &val, sizeof val) != 0) {
460 		if (errp != NULL)
461 			*errp = "setsockopt(IPV6_V6ONLY, 1)";
462 		e = errno;
463 		closefd(&sd);
464 		errno = e;
465 		return (-1);
466 	}
467 #endif
468 	so = VSA_Get_Sockaddr(sa, &sl);
469 	if (bind(sd, so, sl) != 0) {
470 		if (errp != NULL)
471 			*errp = "bind(2)";
472 		e = errno;
473 		closefd(&sd);
474 		errno = e;
475 		return (-1);
476 	}
477 	return (sd);
478 }
479 
480 /*--------------------------------------------------------------------
481  * Given a struct suckaddr, open a socket of the appropriate type, bind it
482  * to the requested address, and start listening.
483  */
484 
485 int
VTCP_listen(const struct suckaddr * sa,int depth,const char ** errp)486 VTCP_listen(const struct suckaddr *sa, int depth, const char **errp)
487 {
488 	int sd;
489 	int e;
490 
491 	if (errp != NULL)
492 		*errp = NULL;
493 	sd = VTCP_bind(sa, errp);
494 	if (sd >= 0)  {
495 		if (listen(sd, depth) != 0) {
496 			e = errno;
497 			closefd(&sd);
498 			errno = e;
499 			if (errp != NULL)
500 				*errp = "listen(2)";
501 			return (-1);
502 		}
503 	}
504 	return (sd);
505 }
506 
507 /*--------------------------------------------------------------------*/
508 
509 struct helper {
510 	int		depth;
511 	const char	**errp;
512 };
513 
v_matchproto_(vss_resolved_f)514 static int v_matchproto_(vss_resolved_f)
515 vtcp_lo_cb(void *priv, const struct suckaddr *sa)
516 {
517 	int sock;
518 	struct helper *hp = priv;
519 
520 	sock = VTCP_listen(sa, hp->depth, hp->errp);
521 	if (sock > 0) {
522 		*hp->errp = NULL;
523 		return (sock);
524 	}
525 	AN(*hp->errp);
526 	return (0);
527 }
528 
529 int
VTCP_listen_on(const char * addr,const char * def_port,int depth,const char ** errp)530 VTCP_listen_on(const char *addr, const char *def_port, int depth,
531     const char **errp)
532 {
533 	struct helper h;
534 	int sock;
535 
536 	AN(errp);
537 	h.depth = depth;
538 	h.errp = errp;
539 
540 	sock = VSS_resolver(addr, def_port, vtcp_lo_cb, &h, errp);
541 	if (*errp != NULL)
542 		return (-1);
543 	return (sock);
544 }
545 
546 /*--------------------------------------------------------------------
547  * Set or reset SO_LINGER flag
548  */
549 
550 int
VTCP_linger(int sock,int linger)551 VTCP_linger(int sock, int linger)
552 {
553 	struct linger lin;
554 	int i;
555 
556 	memset(&lin, 0, sizeof lin);
557 	lin.l_onoff = linger;
558 	i = setsockopt(sock, SOL_SOCKET, SO_LINGER, &lin, sizeof lin);
559 	VTCP_Assert(i);
560 	return (i);
561 }
562 
563 /*--------------------------------------------------------------------
564  * Do a poll to check for remote HUP
565  */
566 
567 int
VTCP_check_hup(int sock)568 VTCP_check_hup(int sock)
569 {
570 	struct pollfd pfd;
571 
572 	assert(sock > 0);
573 	pfd.fd = sock;
574 	pfd.events = POLLOUT;
575 	pfd.revents = 0;
576 
577 	if (poll(&pfd, 1, 0) == 1 && pfd.revents & POLLHUP)
578 		return (1);
579 	return (0);
580 }
581 
582 /*--------------------------------------------------------------------
583  * Check if a TCP syscall return value is fatal
584  */
585 
586 int
VTCP_Check(ssize_t a)587 VTCP_Check(ssize_t a)
588 {
589 	if (a == 0)
590 		return (1);
591 	if (a > 0)
592 		return (1);
593 	if (errno == ECONNRESET || errno == ENOTCONN || errno == EPIPE)
594 		return (1);
595 	/* Accept EAGAIN (and EWOULDBLOCK in case they are not the same)
596 	 * as errno values. Even though our sockets are all non-blocking,
597 	 * when a SO_{SND|RCV}TIMEO expires, read() or write() on the
598 	 * socket will return (-1) and errno set to EAGAIN. (This is not
599 	 * documented in the read(2) and write(2) manpages, but is
600 	 * described in the socket(7) manpage.) */
601 	if (errno == EAGAIN || errno == EWOULDBLOCK)
602 		return (1);
603 	/* tcp(7): The other end didn't acknowledge retransmitted data after
604 	 * some time. */
605 	if (errno == ETIMEDOUT)
606 		return (1);
607 	/* #3539 various errnos documented on linux as POSIX.1 */
608 	if (errno == ENETDOWN || errno == ENETUNREACH || errno == ENETRESET ||
609 	    errno == ECONNABORTED || /* ECONNRESET see above */
610 	    errno == EHOSTUNREACH || errno == EHOSTDOWN) {
611 		return (1);
612 	}
613 
614 #if (defined (__SVR4) && defined (__sun))
615 	if (errno == ECONNREFUSED)	// in r02702.vtc
616 		return (1);
617 	if (errno == EPROTO)
618 		return (1);
619 #endif
620 #if (defined (__SVR4) && defined (__sun)) ||		\
621     defined (__NetBSD__) ||				\
622     defined (__APPLE__)
623 	/*
624 	 * Solaris and MacOS returns EINVAL if the other end unexpectedly reset
625 	 * the connection.
626 	 *
627 	 * On NetBSD it is documented behaviour.
628 	 */
629 	if (errno == EINVAL)
630 		return (1);
631 #endif
632 	return (0);
633 }
634 
635 /*--------------------------------------------------------------------
636  *
637  */
638 
639 int
VTCP_read(int fd,void * ptr,size_t len,vtim_dur tmo)640 VTCP_read(int fd, void *ptr, size_t len, vtim_dur tmo)
641 {
642 	struct pollfd pfd[1];
643 	int i, j;
644 
645 	if (tmo > 0.0) {
646 		pfd[0].fd = fd;
647 		pfd[0].events = POLLIN;
648 		pfd[0].revents = 0;
649 		j = (int)floor(tmo * 1e3);
650 		if (j == 0)
651 			j++;
652 		j = poll(pfd, 1, j);
653 		if (j == 0)
654 			return (-2);
655 	}
656 	i = read(fd, ptr, len);
657 	VTCP_Assert(i);
658 	return (i < 0 ? -1 : i);
659 }
660