1 /*
2  * AF_INET/AF_INET6 SOCK_STREAM protocol layer (tcp)
3  *
4  * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 /* this is to have tcp_info defined on systems using musl
14  * library, such as Alpine Linux
15  */
16 #define _GNU_SOURCE
17 
18 #include <ctype.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <time.h>
25 
26 #include <sys/param.h>
27 #include <sys/socket.h>
28 #include <sys/types.h>
29 
30 #include <netinet/tcp.h>
31 #include <netinet/in.h>
32 
33 #include <haproxy/action-t.h>
34 #include <haproxy/api.h>
35 #include <haproxy/arg.h>
36 #include <haproxy/channel.h>
37 #include <haproxy/connection.h>
38 #include <haproxy/errors.h>
39 #include <haproxy/fd.h>
40 #include <haproxy/global.h>
41 #include <haproxy/http_rules.h>
42 #include <haproxy/list.h>
43 #include <haproxy/listener.h>
44 #include <haproxy/log.h>
45 #include <haproxy/namespace.h>
46 #include <haproxy/port_range.h>
47 #include <haproxy/proto_tcp.h>
48 #include <haproxy/protocol.h>
49 #include <haproxy/proxy-t.h>
50 #include <haproxy/sample.h>
51 #include <haproxy/server.h>
52 #include <haproxy/stream-t.h>
53 #include <haproxy/tcp_rules.h>
54 #include <haproxy/tools.h>
55 
56 
57 static int tcp_bind_listeners(struct protocol *proto, char *errmsg, int errlen);
58 static int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen);
59 static void tcpv4_add_listener(struct listener *listener, int port);
60 static void tcpv6_add_listener(struct listener *listener, int port);
61 
62 /* Note: must not be declared <const> as its list will be overwritten */
63 static struct protocol proto_tcpv4 = {
64 	.name = "tcpv4",
65 	.sock_domain = AF_INET,
66 	.sock_type = SOCK_STREAM,
67 	.sock_prot = IPPROTO_TCP,
68 	.sock_family = AF_INET,
69 	.sock_addrlen = sizeof(struct sockaddr_in),
70 	.l3_addrlen = 32/8,
71 	.accept = &listener_accept,
72 	.connect = tcp_connect_server,
73 	.bind = tcp_bind_listener,
74 	.bind_all = tcp_bind_listeners,
75 	.unbind_all = unbind_all_listeners,
76 	.enable_all = enable_all_listeners,
77 	.get_src = tcp_get_src,
78 	.get_dst = tcp_get_dst,
79 	.pause = tcp_pause_listener,
80 	.add = tcpv4_add_listener,
81 	.listeners = LIST_HEAD_INIT(proto_tcpv4.listeners),
82 	.nb_listeners = 0,
83 };
84 
85 INITCALL1(STG_REGISTER, protocol_register, &proto_tcpv4);
86 
87 /* Note: must not be declared <const> as its list will be overwritten */
88 static struct protocol proto_tcpv6 = {
89 	.name = "tcpv6",
90 	.sock_domain = AF_INET6,
91 	.sock_type = SOCK_STREAM,
92 	.sock_prot = IPPROTO_TCP,
93 	.sock_family = AF_INET6,
94 	.sock_addrlen = sizeof(struct sockaddr_in6),
95 	.l3_addrlen = 128/8,
96 	.accept = &listener_accept,
97 	.connect = tcp_connect_server,
98 	.bind = tcp_bind_listener,
99 	.bind_all = tcp_bind_listeners,
100 	.unbind_all = unbind_all_listeners,
101 	.enable_all = enable_all_listeners,
102 	.get_src = tcp_get_src,
103 	.get_dst = tcp_get_dst,
104 	.pause = tcp_pause_listener,
105 	.add = tcpv6_add_listener,
106 	.listeners = LIST_HEAD_INIT(proto_tcpv6.listeners),
107 	.nb_listeners = 0,
108 };
109 
110 INITCALL1(STG_REGISTER, protocol_register, &proto_tcpv6);
111 
112 /* Default TCP parameters, got by opening a temporary TCP socket. */
113 #ifdef TCP_MAXSEG
114 static THREAD_LOCAL int default_tcp_maxseg = -1;
115 static THREAD_LOCAL int default_tcp6_maxseg = -1;
116 #endif
117 
118 /* Binds ipv4/ipv6 address <local> to socket <fd>, unless <flags> is set, in which
119  * case we try to bind <remote>. <flags> is a 2-bit field consisting of :
120  *  - 0 : ignore remote address (may even be a NULL pointer)
121  *  - 1 : use provided address
122  *  - 2 : use provided port
123  *  - 3 : use both
124  *
125  * The function supports multiple foreign binding methods :
126  *   - linux_tproxy: we directly bind to the foreign address
127  * The second one can be used as a fallback for the first one.
128  * This function returns 0 when everything's OK, 1 if it could not bind, to the
129  * local address, 2 if it could not bind to the foreign address.
130  */
tcp_bind_socket(int fd,int flags,struct sockaddr_storage * local,struct sockaddr_storage * remote)131 int tcp_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct sockaddr_storage *remote)
132 {
133 	struct sockaddr_storage bind_addr;
134 	int foreign_ok = 0;
135 	int ret;
136 	static THREAD_LOCAL int ip_transp_working = 1;
137 	static THREAD_LOCAL int ip6_transp_working = 1;
138 
139 	switch (local->ss_family) {
140 	case AF_INET:
141 		if (flags && ip_transp_working) {
142 			/* This deserves some explanation. Some platforms will support
143 			 * multiple combinations of certain methods, so we try the
144 			 * supported ones until one succeeds.
145 			 */
146 			if (0
147 #if defined(IP_TRANSPARENT)
148 			    || (setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)) == 0)
149 #endif
150 #if defined(IP_FREEBIND)
151 			    || (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == 0)
152 #endif
153 #if defined(IP_BINDANY)
154 			    || (setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == 0)
155 #endif
156 #if defined(SO_BINDANY)
157 			    || (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0)
158 #endif
159 			    )
160 				foreign_ok = 1;
161 			else
162 				ip_transp_working = 0;
163 		}
164 		break;
165 	case AF_INET6:
166 		if (flags && ip6_transp_working) {
167 			if (0
168 #if defined(IPV6_TRANSPARENT) && defined(SOL_IPV6)
169 			    || (setsockopt(fd, SOL_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == 0)
170 #endif
171 #if defined(IP_FREEBIND)
172 			    || (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == 0)
173 #endif
174 #if defined(IPV6_BINDANY)
175 			    || (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == 0)
176 #endif
177 #if defined(SO_BINDANY)
178 			    || (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0)
179 #endif
180 			    )
181 				foreign_ok = 1;
182 			else
183 				ip6_transp_working = 0;
184 		}
185 		break;
186 	}
187 
188 	if (flags) {
189 		memset(&bind_addr, 0, sizeof(bind_addr));
190 		bind_addr.ss_family = remote->ss_family;
191 		switch (remote->ss_family) {
192 		case AF_INET:
193 			if (flags & 1)
194 				((struct sockaddr_in *)&bind_addr)->sin_addr = ((struct sockaddr_in *)remote)->sin_addr;
195 			if (flags & 2)
196 				((struct sockaddr_in *)&bind_addr)->sin_port = ((struct sockaddr_in *)remote)->sin_port;
197 			break;
198 		case AF_INET6:
199 			if (flags & 1)
200 				((struct sockaddr_in6 *)&bind_addr)->sin6_addr = ((struct sockaddr_in6 *)remote)->sin6_addr;
201 			if (flags & 2)
202 				((struct sockaddr_in6 *)&bind_addr)->sin6_port = ((struct sockaddr_in6 *)remote)->sin6_port;
203 			break;
204 		default:
205 			/* we don't want to try to bind to an unknown address family */
206 			foreign_ok = 0;
207 		}
208 	}
209 
210 	setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
211 	if (foreign_ok) {
212 		if (is_inet_addr(&bind_addr)) {
213 			ret = bind(fd, (struct sockaddr *)&bind_addr, get_addr_len(&bind_addr));
214 			if (ret < 0)
215 				return 2;
216 		}
217 	}
218 	else {
219 		if (is_inet_addr(local)) {
220 			ret = bind(fd, (struct sockaddr *)local, get_addr_len(local));
221 			if (ret < 0)
222 				return 1;
223 		}
224 	}
225 
226 	if (!flags)
227 		return 0;
228 
229 	if (!foreign_ok)
230 		/* we could not bind to a foreign address */
231 		return 2;
232 
233 	return 0;
234 }
235 
236 /* conn->dst MUST be valid */
create_server_socket(struct connection * conn)237 static int create_server_socket(struct connection *conn)
238 {
239 	const struct netns_entry *ns = NULL;
240 
241 #ifdef USE_NS
242 	if (objt_server(conn->target)) {
243 		if (__objt_server(conn->target)->flags & SRV_F_USE_NS_FROM_PP)
244 			ns = conn->proxy_netns;
245 		else
246 			ns = __objt_server(conn->target)->netns;
247 	}
248 #endif
249 	return my_socketat(ns, conn->dst->ss_family, SOCK_STREAM, IPPROTO_TCP);
250 }
251 
252 /*
253  * This function initiates a TCP connection establishment to the target assigned
254  * to connection <conn> using (si->{target,dst}). A source address may be
255  * pointed to by conn->src in case of transparent proxying. Normal source
256  * bind addresses are still determined locally (due to the possible need of a
257  * source port). conn->target may point either to a valid server or to a backend,
258  * depending on conn->target. Only OBJ_TYPE_PROXY and OBJ_TYPE_SERVER are
259  * supported. The <data> parameter is a boolean indicating whether there are data
260  * waiting for being sent or not, in order to adjust data write polling and on
261  * some platforms, the ability to avoid an empty initial ACK. The <flags> argument
262  * allows the caller to force using a delayed ACK when establishing the connection
263  *   - 0 = no delayed ACK unless data are advertised and backend has tcp-smart-connect
264  *   - CONNECT_DELACK_SMART_CONNECT = delayed ACK if backend has tcp-smart-connect, regardless of data
265  *   - CONNECT_DELACK_ALWAYS = delayed ACK regardless of backend options
266  *
267  * Note that a pending send_proxy message accounts for data.
268  *
269  * It can return one of :
270  *  - SF_ERR_NONE if everything's OK
271  *  - SF_ERR_SRVTO if there are no more servers
272  *  - SF_ERR_SRVCL if the connection was refused by the server
273  *  - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
274  *  - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
275  *  - SF_ERR_INTERNAL for any other purely internal errors
276  * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
277  *
278  * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise
279  * it's invalid and the caller has nothing to do.
280  */
281 
tcp_connect_server(struct connection * conn,int flags)282 int tcp_connect_server(struct connection *conn, int flags)
283 {
284 	int fd;
285 	struct server *srv;
286 	struct proxy *be;
287 	struct conn_src *src;
288 	int use_fastopen = 0;
289 	struct sockaddr_storage *addr;
290 
291 	conn->flags |= CO_FL_WAIT_L4_CONN; /* connection in progress */
292 
293 	switch (obj_type(conn->target)) {
294 	case OBJ_TYPE_PROXY:
295 		be = objt_proxy(conn->target);
296 		srv = NULL;
297 		break;
298 	case OBJ_TYPE_SERVER:
299 		srv = objt_server(conn->target);
300 		be = srv->proxy;
301 		/* Make sure we check that we have data before activating
302 		 * TFO, or we could trigger a kernel issue whereby after
303 		 * a successful connect() == 0, any subsequent connect()
304 		 * will return EINPROGRESS instead of EISCONN.
305 		 */
306 		use_fastopen = (srv->flags & SRV_F_FASTOPEN) &&
307 		               ((flags & (CONNECT_CAN_USE_TFO | CONNECT_HAS_DATA)) ==
308 				(CONNECT_CAN_USE_TFO | CONNECT_HAS_DATA));
309 		break;
310 	default:
311 		conn->flags |= CO_FL_ERROR;
312 		return SF_ERR_INTERNAL;
313 	}
314 
315 	if (!conn->dst) {
316 		conn->flags |= CO_FL_ERROR;
317 		return SF_ERR_INTERNAL;
318 	}
319 
320 	fd = conn->handle.fd = create_server_socket(conn);
321 
322 	if (fd == -1) {
323 		qfprintf(stderr, "Cannot get a server socket.\n");
324 
325 		if (errno == ENFILE) {
326 			conn->err_code = CO_ER_SYS_FDLIM;
327 			send_log(be, LOG_EMERG,
328 				 "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n",
329 				 be->id, global.maxsock);
330 		}
331 		else if (errno == EMFILE) {
332 			conn->err_code = CO_ER_PROC_FDLIM;
333 			send_log(be, LOG_EMERG,
334 				 "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n",
335 				 be->id, global.maxsock);
336 		}
337 		else if (errno == ENOBUFS || errno == ENOMEM) {
338 			conn->err_code = CO_ER_SYS_MEMLIM;
339 			send_log(be, LOG_EMERG,
340 				 "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n",
341 				 be->id, global.maxsock);
342 		}
343 		else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
344 			conn->err_code = CO_ER_NOPROTO;
345 		}
346 		else
347 			conn->err_code = CO_ER_SOCK_ERR;
348 
349 		/* this is a resource error */
350 		conn->flags |= CO_FL_ERROR;
351 		return SF_ERR_RESOURCE;
352 	}
353 
354 	if (fd >= global.maxsock) {
355 		/* do not log anything there, it's a normal condition when this option
356 		 * is used to serialize connections to a server !
357 		 */
358 		ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
359 		close(fd);
360 		conn->err_code = CO_ER_CONF_FDLIM;
361 		conn->flags |= CO_FL_ERROR;
362 		return SF_ERR_PRXCOND; /* it is a configuration limit */
363 	}
364 
365 	if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
366 	    (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) == -1)) {
367 		qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
368 		close(fd);
369 		conn->err_code = CO_ER_SOCK_ERR;
370 		conn->flags |= CO_FL_ERROR;
371 		return SF_ERR_INTERNAL;
372 	}
373 
374 	if (master == 1 && (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1)) {
375 		ha_alert("Cannot set CLOEXEC on client socket.\n");
376 		close(fd);
377 		conn->err_code = CO_ER_SOCK_ERR;
378 		conn->flags |= CO_FL_ERROR;
379 		return SF_ERR_INTERNAL;
380 	}
381 
382 	if (be->options & PR_O_TCP_SRV_KA)
383 		setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one));
384 
385 	/* allow specific binding :
386 	 * - server-specific at first
387 	 * - proxy-specific next
388 	 */
389 	if (srv && srv->conn_src.opts & CO_SRC_BIND)
390 		src = &srv->conn_src;
391 	else if (be->conn_src.opts & CO_SRC_BIND)
392 		src = &be->conn_src;
393 	else
394 		src = NULL;
395 
396 	if (src) {
397 		int ret, flags = 0;
398 
399 		if (conn->src && is_inet_addr(conn->src)) {
400 			switch (src->opts & CO_SRC_TPROXY_MASK) {
401 			case CO_SRC_TPROXY_CLI:
402 				conn->flags |= CO_FL_PRIVATE;
403 				/* fall through */
404 			case CO_SRC_TPROXY_ADDR:
405 				flags = 3;
406 				break;
407 			case CO_SRC_TPROXY_CIP:
408 			case CO_SRC_TPROXY_DYN:
409 				conn->flags |= CO_FL_PRIVATE;
410 				flags = 1;
411 				break;
412 			}
413 		}
414 
415 #ifdef SO_BINDTODEVICE
416 		/* Note: this might fail if not CAP_NET_RAW */
417 		if (src->iface_name)
418 			setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, src->iface_name, src->iface_len + 1);
419 #endif
420 
421 		if (src->sport_range) {
422 			int attempts = 10; /* should be more than enough to find a spare port */
423 			struct sockaddr_storage sa;
424 
425 			ret = 1;
426 			memcpy(&sa, &src->source_addr, sizeof(sa));
427 
428 			do {
429 				/* note: in case of retry, we may have to release a previously
430 				 * allocated port, hence this loop's construct.
431 				 */
432 				port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
433 				fdinfo[fd].port_range = NULL;
434 
435 				if (!attempts)
436 					break;
437 				attempts--;
438 
439 				fdinfo[fd].local_port = port_range_alloc_port(src->sport_range);
440 				if (!fdinfo[fd].local_port) {
441 					conn->err_code = CO_ER_PORT_RANGE;
442 					break;
443 				}
444 
445 				fdinfo[fd].port_range = src->sport_range;
446 				set_host_port(&sa, fdinfo[fd].local_port);
447 
448 				ret = tcp_bind_socket(fd, flags, &sa, conn->src);
449 				if (ret != 0)
450 					conn->err_code = CO_ER_CANT_BIND;
451 			} while (ret != 0); /* binding NOK */
452 		}
453 		else {
454 #ifdef IP_BIND_ADDRESS_NO_PORT
455 			static THREAD_LOCAL int bind_address_no_port = 1;
456 			setsockopt(fd, SOL_IP, IP_BIND_ADDRESS_NO_PORT, (const void *) &bind_address_no_port, sizeof(int));
457 #endif
458 			ret = tcp_bind_socket(fd, flags, &src->source_addr, conn->src);
459 			if (ret != 0)
460 				conn->err_code = CO_ER_CANT_BIND;
461 		}
462 
463 		if (unlikely(ret != 0)) {
464 			port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
465 			fdinfo[fd].port_range = NULL;
466 			close(fd);
467 
468 			if (ret == 1) {
469 				ha_alert("Cannot bind to source address before connect() for backend %s. Aborting.\n",
470 					 be->id);
471 				send_log(be, LOG_EMERG,
472 					 "Cannot bind to source address before connect() for backend %s.\n",
473 					 be->id);
474 			} else {
475 				ha_alert("Cannot bind to tproxy source address before connect() for backend %s. Aborting.\n",
476 					 be->id);
477 				send_log(be, LOG_EMERG,
478 					 "Cannot bind to tproxy source address before connect() for backend %s.\n",
479 					 be->id);
480 			}
481 			conn->flags |= CO_FL_ERROR;
482 			return SF_ERR_RESOURCE;
483 		}
484 	}
485 
486 #if defined(TCP_QUICKACK)
487 	/* disabling tcp quick ack now allows the first request to leave the
488 	 * machine with the first ACK. We only do this if there are pending
489 	 * data in the buffer.
490 	 */
491 	if (flags & (CONNECT_DELACK_ALWAYS) ||
492 	    ((flags & CONNECT_DELACK_SMART_CONNECT ||
493 	      (flags & CONNECT_HAS_DATA) || conn->send_proxy_ofs) &&
494 	     (be->options2 & PR_O2_SMARTCON)))
495                 setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &zero, sizeof(zero));
496 #endif
497 
498 #ifdef TCP_USER_TIMEOUT
499 	/* there is not much more we can do here when it fails, it's still minor */
500 	if (srv && srv->tcp_ut)
501 		setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &srv->tcp_ut, sizeof(srv->tcp_ut));
502 #endif
503 
504 	if (use_fastopen) {
505 #if defined(TCP_FASTOPEN_CONNECT)
506                 setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN_CONNECT, &one, sizeof(one));
507 #endif
508 	}
509 	if (global.tune.server_sndbuf)
510                 setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf));
511 
512 	if (global.tune.server_rcvbuf)
513                 setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
514 
515 	addr = (conn->flags & CO_FL_SOCKS4) ? &srv->socks4_addr : conn->dst;
516 	if (connect(fd, (const struct sockaddr *)addr, get_addr_len(addr)) == -1) {
517 		if (errno == EINPROGRESS || errno == EALREADY) {
518 			/* common case, let's wait for connect status */
519 			conn->flags |= CO_FL_WAIT_L4_CONN;
520 		}
521 		else if (errno == EISCONN) {
522 			/* should normally not happen but if so, indicates that it's OK */
523 			conn->flags &= ~CO_FL_WAIT_L4_CONN;
524 		}
525 		else if (errno == EAGAIN || errno == EADDRINUSE || errno == EADDRNOTAVAIL) {
526 			char *msg;
527 			if (errno == EAGAIN || errno == EADDRNOTAVAIL) {
528 				msg = "no free ports";
529 				conn->err_code = CO_ER_FREE_PORTS;
530 			}
531 			else {
532 				msg = "local address already in use";
533 				conn->err_code = CO_ER_ADDR_INUSE;
534 			}
535 
536 			qfprintf(stderr,"Connect() failed for backend %s: %s.\n", be->id, msg);
537 			port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
538 			fdinfo[fd].port_range = NULL;
539 			close(fd);
540 			send_log(be, LOG_ERR, "Connect() failed for backend %s: %s.\n", be->id, msg);
541 			conn->flags |= CO_FL_ERROR;
542 			return SF_ERR_RESOURCE;
543 		} else if (errno == ETIMEDOUT) {
544 			//qfprintf(stderr,"Connect(): ETIMEDOUT");
545 			port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
546 			fdinfo[fd].port_range = NULL;
547 			close(fd);
548 			conn->err_code = CO_ER_SOCK_ERR;
549 			conn->flags |= CO_FL_ERROR;
550 			return SF_ERR_SRVTO;
551 		} else {
552 			// (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
553 			//qfprintf(stderr,"Connect(): %d", errno);
554 			port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
555 			fdinfo[fd].port_range = NULL;
556 			close(fd);
557 			conn->err_code = CO_ER_SOCK_ERR;
558 			conn->flags |= CO_FL_ERROR;
559 			return SF_ERR_SRVCL;
560 		}
561 	}
562 	else {
563 		/* connect() == 0, this is great! */
564 		conn->flags &= ~CO_FL_WAIT_L4_CONN;
565 	}
566 
567 	conn->flags |= CO_FL_ADDR_TO_SET;
568 
569 	conn_ctrl_init(conn);       /* registers the FD */
570 	fdtab[fd].linger_risk = 1;  /* close hard if needed */
571 
572 	if (conn->flags & CO_FL_WAIT_L4_CONN) {
573 		fd_want_send(fd);
574 		fd_cant_send(fd);
575 	}
576 
577 	if (conn_xprt_init(conn) < 0) {
578 		conn_full_close(conn);
579 		conn->flags |= CO_FL_ERROR;
580 		return SF_ERR_RESOURCE;
581 	}
582 
583 	return SF_ERR_NONE;  /* connection is OK */
584 }
585 
586 
587 /*
588  * Retrieves the source address for the socket <fd>, with <dir> indicating
589  * if we're a listener (=0) or an initiator (!=0). It returns 0 in case of
590  * success, -1 in case of error. The socket's source address is stored in
591  * <sa> for <salen> bytes.
592  */
tcp_get_src(int fd,struct sockaddr * sa,socklen_t salen,int dir)593 int tcp_get_src(int fd, struct sockaddr *sa, socklen_t salen, int dir)
594 {
595 	if (dir)
596 		return getsockname(fd, sa, &salen);
597 	else
598 		return getpeername(fd, sa, &salen);
599 }
600 
601 
602 /*
603  * Retrieves the original destination address for the socket <fd>, with <dir>
604  * indicating if we're a listener (=0) or an initiator (!=0). In the case of a
605  * listener, if the original destination address was translated, the original
606  * address is retrieved. It returns 0 in case of success, -1 in case of error.
607  * The socket's source address is stored in <sa> for <salen> bytes.
608  */
tcp_get_dst(int fd,struct sockaddr * sa,socklen_t salen,int dir)609 int tcp_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir)
610 {
611 	if (dir)
612 		return getpeername(fd, sa, &salen);
613 	else {
614 		int ret = getsockname(fd, sa, &salen);
615 
616 		if (ret < 0)
617 			return ret;
618 
619 #if defined(USE_TPROXY) && defined(SO_ORIGINAL_DST)
620 		/* For TPROXY and Netfilter's NAT, we can retrieve the original
621 		 * IPv4 address before DNAT/REDIRECT. We must not do that with
622 		 * other families because v6-mapped IPv4 addresses are still
623 		 * reported as v4.
624 		 */
625 		if (((struct sockaddr_storage *)sa)->ss_family == AF_INET
626 		    && getsockopt(fd, SOL_IP, SO_ORIGINAL_DST, sa, &salen) == 0)
627 			return 0;
628 #endif
629 		return ret;
630 	}
631 }
632 
633 /* XXX: Should probably be elsewhere */
compare_sockaddr(struct sockaddr_storage * a,struct sockaddr_storage * b)634 static int compare_sockaddr(struct sockaddr_storage *a, struct sockaddr_storage *b)
635 {
636 	if (a->ss_family != b->ss_family) {
637 		return (-1);
638 	}
639 	switch (a->ss_family) {
640 	case AF_INET:
641 		{
642 			struct sockaddr_in *a4 = (void *)a, *b4 = (void *)b;
643 			if (a4->sin_port != b4->sin_port)
644 				return (-1);
645 			return (memcmp(&a4->sin_addr, &b4->sin_addr,
646 			    sizeof(a4->sin_addr)));
647 		}
648 	case AF_INET6:
649 		{
650 			struct sockaddr_in6 *a6 = (void *)a, *b6 = (void *)b;
651 			if (a6->sin6_port != b6->sin6_port)
652 				return (-1);
653 			return (memcmp(&a6->sin6_addr, &b6->sin6_addr,
654 			    sizeof(a6->sin6_addr)));
655 		}
656 	default:
657 		return (-1);
658 	}
659 
660 }
661 
662 #define LI_MANDATORY_FLAGS	(LI_O_FOREIGN | LI_O_V6ONLY | LI_O_V4V6)
663 /* When binding the listeners, check if a socket has been sent to us by the
664  * previous process that we could reuse, instead of creating a new one.
665  */
tcp_find_compatible_fd(struct listener * l)666 static int tcp_find_compatible_fd(struct listener *l)
667 {
668 	struct xfer_sock_list *xfer_sock = xfer_sock_list;
669 	int ret = -1;
670 
671 	while (xfer_sock) {
672 		if (!compare_sockaddr(&xfer_sock->addr, &l->addr)) {
673 			if ((l->interface == NULL && xfer_sock->iface == NULL) ||
674 			    (l->interface != NULL && xfer_sock->iface != NULL &&
675 			     !strcmp(l->interface, xfer_sock->iface))) {
676 				if ((l->options & LI_MANDATORY_FLAGS) ==
677 				    (xfer_sock->options & LI_MANDATORY_FLAGS)) {
678 					if ((xfer_sock->namespace == NULL &&
679 					    l->netns == NULL)
680 #ifdef USE_NS
681 					    || (xfer_sock->namespace != NULL &&
682 					    l->netns != NULL &&
683 					    !strcmp(xfer_sock->namespace,
684 					    l->netns->node.key))
685 #endif
686 					   ) {
687 						break;
688 					}
689 
690 				}
691 			}
692 		}
693 		xfer_sock = xfer_sock->next;
694 	}
695 	if (xfer_sock != NULL) {
696 		ret = xfer_sock->fd;
697 		if (xfer_sock == xfer_sock_list)
698 			xfer_sock_list = xfer_sock->next;
699 		if (xfer_sock->prev)
700 			xfer_sock->prev->next = xfer_sock->next;
701 		if (xfer_sock->next)
702 			xfer_sock->next->prev = xfer_sock->prev;
703 		free(xfer_sock->iface);
704 		free(xfer_sock->namespace);
705 		free(xfer_sock);
706 	}
707 	return ret;
708 }
709 #undef L1_MANDATORY_FLAGS
710 
711 /* This function tries to bind a TCPv4/v6 listener. It may return a warning or
712  * an error message in <errmsg> if the message is at most <errlen> bytes long
713  * (including '\0'). Note that <errmsg> may be NULL if <errlen> is also zero.
714  * The return value is composed from ERR_ABORT, ERR_WARN,
715  * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
716  * was alright and that no message was returned. ERR_RETRYABLE means that an
717  * error occurred but that it may vanish after a retry (eg: port in use), and
718  * ERR_FATAL indicates a non-fixable error. ERR_WARN and ERR_ALERT do not alter
719  * the meaning of the error, but just indicate that a message is present which
720  * should be displayed with the respective level. Last, ERR_ABORT indicates
721  * that it's pointless to try to start other listeners. No error message is
722  * returned if errlen is NULL.
723  */
tcp_bind_listener(struct listener * listener,char * errmsg,int errlen)724 int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen)
725 {
726 	__label__ tcp_return, tcp_close_return;
727 	int fd, err;
728 	int ext, ready;
729 	socklen_t ready_len;
730 	const char *msg = NULL;
731 #ifdef TCP_MAXSEG
732 
733 	/* Create a temporary TCP socket to get default parameters we can't
734 	 * guess.
735 	 * */
736 	ready_len = sizeof(default_tcp_maxseg);
737 	if (default_tcp_maxseg == -1) {
738 		default_tcp_maxseg = -2;
739 		fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
740 		if (fd < 0)
741 			ha_warning("Failed to create a temporary socket!\n");
742 		else {
743 			if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &default_tcp_maxseg,
744 			    &ready_len) == -1)
745 				ha_warning("Failed to get the default value of TCP_MAXSEG\n");
746 			close(fd);
747 		}
748 	}
749 	if (default_tcp6_maxseg == -1) {
750 		default_tcp6_maxseg = -2;
751 		fd = socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP);
752 		if (fd >= 0) {
753 			if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &default_tcp6_maxseg,
754 			    &ready_len) == -1)
755 				ha_warning("Failed ot get the default value of TCP_MAXSEG for IPv6\n");
756 			close(fd);
757 		}
758 	}
759 #endif
760 
761 
762 	/* ensure we never return garbage */
763 	if (errlen)
764 		*errmsg = 0;
765 
766 	if (listener->state != LI_ASSIGNED)
767 		return ERR_NONE; /* already bound */
768 
769 	err = ERR_NONE;
770 
771 	if (listener->fd == -1)
772 		listener->fd = tcp_find_compatible_fd(listener);
773 
774 	/* if the listener already has an fd assigned, then we were offered the
775 	 * fd by an external process (most likely the parent), and we don't want
776 	 * to create a new socket. However we still want to set a few flags on
777 	 * the socket.
778 	 */
779 	fd = listener->fd;
780 	ext = (fd >= 0);
781 
782 	if (!ext) {
783 		fd = my_socketat(listener->netns, listener->addr.ss_family, SOCK_STREAM, IPPROTO_TCP);
784 
785 		if (fd == -1) {
786 			err |= ERR_RETRYABLE | ERR_ALERT;
787 			msg = "cannot create listening socket";
788 			goto tcp_return;
789 		}
790 	}
791 
792 	if (fd >= global.maxsock) {
793 		err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
794 		msg = "not enough free sockets (raise '-n' parameter)";
795 		goto tcp_close_return;
796 	}
797 
798 	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
799 		err |= ERR_FATAL | ERR_ALERT;
800 		msg = "cannot make socket non-blocking";
801 		goto tcp_close_return;
802 	}
803 
804 	if (!ext && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) {
805 		/* not fatal but should be reported */
806 		msg = "cannot do so_reuseaddr";
807 		err |= ERR_ALERT;
808 	}
809 
810 	if (listener->options & LI_O_NOLINGER)
811 		setsockopt(fd, SOL_SOCKET, SO_LINGER, &nolinger, sizeof(struct linger));
812 	else {
813 		struct linger tmplinger;
814 		socklen_t len = sizeof(tmplinger);
815 		if (getsockopt(fd, SOL_SOCKET, SO_LINGER, &tmplinger, &len) == 0 &&
816 		    (tmplinger.l_onoff == 1 || tmplinger.l_linger == 0)) {
817 			tmplinger.l_onoff = 0;
818 			tmplinger.l_linger = 0;
819 			setsockopt(fd, SOL_SOCKET, SO_LINGER, &tmplinger,
820 			    sizeof(tmplinger));
821 		}
822 	}
823 
824 #ifdef SO_REUSEPORT
825 	/* OpenBSD and Linux 3.9 support this. As it's present in old libc versions of
826 	 * Linux, it might return an error that we will silently ignore.
827 	 */
828 	if (!ext && (global.tune.options & GTUNE_USE_REUSEPORT))
829 		setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
830 #endif
831 
832 	if (!ext && (listener->options & LI_O_FOREIGN)) {
833 		switch (listener->addr.ss_family) {
834 		case AF_INET:
835 			if (1
836 #if defined(IP_TRANSPARENT)
837 			    && (setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)) == -1)
838 #endif
839 #if defined(IP_FREEBIND)
840 			    && (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == -1)
841 #endif
842 #if defined(IP_BINDANY)
843 			    && (setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == -1)
844 #endif
845 #if defined(SO_BINDANY)
846 			    && (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == -1)
847 #endif
848 			    ) {
849 				msg = "cannot make listening socket transparent";
850 				err |= ERR_ALERT;
851 			}
852 		break;
853 		case AF_INET6:
854 			if (1
855 #if defined(IPV6_TRANSPARENT) && defined(SOL_IPV6)
856 			    && (setsockopt(fd, SOL_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == -1)
857 #endif
858 #if defined(IP_FREEBIND)
859 			    && (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == -1)
860 #endif
861 #if defined(IPV6_BINDANY)
862 			    && (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == -1)
863 #endif
864 #if defined(SO_BINDANY)
865 			    && (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == -1)
866 #endif
867 			    ) {
868 				msg = "cannot make listening socket transparent";
869 				err |= ERR_ALERT;
870 			}
871 		break;
872 		}
873 	}
874 
875 #ifdef SO_BINDTODEVICE
876 	/* Note: this might fail if not CAP_NET_RAW */
877 	if (!ext && listener->interface) {
878 		if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
879 			       listener->interface, strlen(listener->interface) + 1) == -1) {
880 			msg = "cannot bind listener to device";
881 			err |= ERR_WARN;
882 		}
883 	}
884 #endif
885 #if defined(TCP_MAXSEG)
886 	if (listener->maxseg > 0) {
887 		if (setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG,
888 			       &listener->maxseg, sizeof(listener->maxseg)) == -1) {
889 			msg = "cannot set MSS";
890 			err |= ERR_WARN;
891 		}
892 	} else if (ext) {
893 		int tmpmaxseg = -1;
894 		int defaultmss;
895 		socklen_t len = sizeof(tmpmaxseg);
896 
897 		if (listener->addr.ss_family == AF_INET)
898 			defaultmss = default_tcp_maxseg;
899 		else
900 			defaultmss = default_tcp6_maxseg;
901 
902 		getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &tmpmaxseg, &len);
903 		if (defaultmss > 0 &&
904 		    tmpmaxseg != defaultmss &&
905 		    setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &defaultmss, sizeof(defaultmss)) == -1) {
906 			msg = "cannot set MSS";
907 			err |= ERR_WARN;
908 		}
909 	}
910 #endif
911 #if defined(TCP_USER_TIMEOUT)
912 	if (listener->tcp_ut) {
913 		if (setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT,
914 			       &listener->tcp_ut, sizeof(listener->tcp_ut)) == -1) {
915 			msg = "cannot set TCP User Timeout";
916 			err |= ERR_WARN;
917 		}
918 	} else
919 		setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &zero,
920 		    sizeof(zero));
921 #endif
922 #if defined(TCP_DEFER_ACCEPT)
923 	if (listener->options & LI_O_DEF_ACCEPT) {
924 		/* defer accept by up to one second */
925 		int accept_delay = 1;
926 		if (setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &accept_delay, sizeof(accept_delay)) == -1) {
927 			msg = "cannot enable DEFER_ACCEPT";
928 			err |= ERR_WARN;
929 		}
930 	} else
931 		setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &zero,
932 		    sizeof(zero));
933 #endif
934 #if defined(TCP_FASTOPEN)
935 	if (listener->options & LI_O_TCP_FO) {
936 		/* TFO needs a queue length, let's use the configured backlog */
937 		int qlen = listener_backlog(listener);
938 		if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) {
939 			msg = "cannot enable TCP_FASTOPEN";
940 			err |= ERR_WARN;
941 		}
942 	} else {
943 		socklen_t len;
944 		int qlen;
945 		len = sizeof(qlen);
946 		/* Only disable fast open if it was enabled, we don't want
947 		 * the kernel to create a fast open queue if there's none.
948 		 */
949 		if (getsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, &len) == 0 &&
950 		    qlen != 0) {
951 			if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &zero,
952 			    sizeof(zero)) == -1) {
953 				msg = "cannot disable TCP_FASTOPEN";
954 				err |= ERR_WARN;
955 			}
956 		}
957 	}
958 #endif
959 #if defined(IPV6_V6ONLY)
960 	if (listener->options & LI_O_V6ONLY)
961                 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one));
962 	else if (listener->options & LI_O_V4V6)
963                 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero));
964 #endif
965 
966 	if (!ext && bind(fd, (struct sockaddr *)&listener->addr, listener->proto->sock_addrlen) == -1) {
967 		err |= ERR_RETRYABLE | ERR_ALERT;
968 		msg = "cannot bind socket";
969 		goto tcp_close_return;
970 	}
971 
972 	ready = 0;
973 	ready_len = sizeof(ready);
974 	if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &ready, &ready_len) == -1)
975 		ready = 0;
976 
977 	if (!(ext && ready) && /* only listen if not already done by external process */
978 	    listen(fd, listener_backlog(listener)) == -1) {
979 		err |= ERR_RETRYABLE | ERR_ALERT;
980 		msg = "cannot listen to socket";
981 		goto tcp_close_return;
982 	}
983 
984 #if defined(TCP_QUICKACK)
985 	if (listener->options & LI_O_NOQUICKACK)
986 		setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &zero, sizeof(zero));
987 	else
988 		setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &one, sizeof(one));
989 #endif
990 
991 	/* the socket is ready */
992 	listener->fd = fd;
993 	listener->state = LI_LISTEN;
994 
995 	fd_insert(fd, listener, listener->proto->accept,
996 	          thread_mask(listener->bind_conf->bind_thread) & all_threads_mask);
997 
998  tcp_return:
999 	if (msg && errlen) {
1000 		char pn[INET6_ADDRSTRLEN];
1001 
1002 		addr_to_str(&listener->addr, pn, sizeof(pn));
1003 		snprintf(errmsg, errlen, "%s [%s:%d]", msg, pn, get_host_port(&listener->addr));
1004 	}
1005 	return err;
1006 
1007  tcp_close_return:
1008 	close(fd);
1009 	goto tcp_return;
1010 }
1011 
1012 /* This function creates all TCP sockets bound to the protocol entry <proto>.
1013  * It is intended to be used as the protocol's bind_all() function.
1014  * The sockets will be registered but not added to any fd_set, in order not to
1015  * loose them across the fork(). A call to enable_all_listeners() is needed
1016  * to complete initialization. The return value is composed from ERR_*.
1017  *
1018  * Must be called with proto_lock held.
1019  *
1020  */
tcp_bind_listeners(struct protocol * proto,char * errmsg,int errlen)1021 static int tcp_bind_listeners(struct protocol *proto, char *errmsg, int errlen)
1022 {
1023 	struct listener *listener;
1024 	int err = ERR_NONE;
1025 
1026 	list_for_each_entry(listener, &proto->listeners, proto_list) {
1027 		err |= tcp_bind_listener(listener, errmsg, errlen);
1028 		if (err & ERR_ABORT)
1029 			break;
1030 	}
1031 
1032 	return err;
1033 }
1034 
1035 /* Add <listener> to the list of tcpv4 listeners, on port <port>. The
1036  * listener's state is automatically updated from LI_INIT to LI_ASSIGNED.
1037  * The number of listeners for the protocol is updated.
1038  *
1039  * Must be called with proto_lock held.
1040  *
1041  */
tcpv4_add_listener(struct listener * listener,int port)1042 static void tcpv4_add_listener(struct listener *listener, int port)
1043 {
1044 	if (listener->state != LI_INIT)
1045 		return;
1046 	listener->state = LI_ASSIGNED;
1047 	listener->proto = &proto_tcpv4;
1048 	((struct sockaddr_in *)(&listener->addr))->sin_port = htons(port);
1049 	LIST_ADDQ(&proto_tcpv4.listeners, &listener->proto_list);
1050 	proto_tcpv4.nb_listeners++;
1051 }
1052 
1053 /* Add <listener> to the list of tcpv6 listeners, on port <port>. The
1054  * listener's state is automatically updated from LI_INIT to LI_ASSIGNED.
1055  * The number of listeners for the protocol is updated.
1056  *
1057  * Must be called with proto_lock held.
1058  *
1059  */
tcpv6_add_listener(struct listener * listener,int port)1060 static void tcpv6_add_listener(struct listener *listener, int port)
1061 {
1062 	if (listener->state != LI_INIT)
1063 		return;
1064 	listener->state = LI_ASSIGNED;
1065 	listener->proto = &proto_tcpv6;
1066 	((struct sockaddr_in *)(&listener->addr))->sin_port = htons(port);
1067 	LIST_ADDQ(&proto_tcpv6.listeners, &listener->proto_list);
1068 	proto_tcpv6.nb_listeners++;
1069 }
1070 
1071 /* Pause a listener. Returns < 0 in case of failure, 0 if the listener
1072  * was totally stopped, or > 0 if correctly paused.
1073  */
tcp_pause_listener(struct listener * l)1074 int tcp_pause_listener(struct listener *l)
1075 {
1076 	if (shutdown(l->fd, SHUT_WR) != 0)
1077 		return -1; /* Solaris dies here */
1078 
1079 	if (listen(l->fd, listener_backlog(l)) != 0)
1080 		return -1; /* OpenBSD dies here */
1081 
1082 	if (shutdown(l->fd, SHUT_RD) != 0)
1083 		return -1; /* should always be OK */
1084 	return 1;
1085 }
1086 
1087 /*
1088  * Execute the "set-src" action. May be called from {tcp,http}request.
1089  * It only changes the address and tries to preserve the original port. If the
1090  * previous family was neither AF_INET nor AF_INET6, the port is set to zero.
1091  */
tcp_action_req_set_src(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * s,int flags)1092 enum act_return tcp_action_req_set_src(struct act_rule *rule, struct proxy *px,
1093                                               struct session *sess, struct stream *s, int flags)
1094 {
1095 	struct connection *cli_conn;
1096 
1097 	if ((cli_conn = objt_conn(sess->origin)) && conn_get_src(cli_conn)) {
1098 		struct sample *smp;
1099 
1100 		smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_ADDR);
1101 		if (smp) {
1102 			int port = get_net_port(cli_conn->src);
1103 
1104 			if (smp->data.type == SMP_T_IPV4) {
1105 				((struct sockaddr_in *)cli_conn->src)->sin_family = AF_INET;
1106 				((struct sockaddr_in *)cli_conn->src)->sin_addr.s_addr = smp->data.u.ipv4.s_addr;
1107 				((struct sockaddr_in *)cli_conn->src)->sin_port = port;
1108 			} else if (smp->data.type == SMP_T_IPV6) {
1109 				((struct sockaddr_in6 *)cli_conn->src)->sin6_family = AF_INET6;
1110 				memcpy(&((struct sockaddr_in6 *)cli_conn->src)->sin6_addr, &smp->data.u.ipv6, sizeof(struct in6_addr));
1111 				((struct sockaddr_in6 *)cli_conn->src)->sin6_port = port;
1112 			}
1113 		}
1114 		cli_conn->flags |= CO_FL_ADDR_FROM_SET;
1115 	}
1116 	return ACT_RET_CONT;
1117 }
1118 
1119 /*
1120  * Execute the "set-dst" action. May be called from {tcp,http}request.
1121  * It only changes the address and tries to preserve the original port. If the
1122  * previous family was neither AF_INET nor AF_INET6, the port is set to zero.
1123  */
tcp_action_req_set_dst(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * s,int flags)1124 enum act_return tcp_action_req_set_dst(struct act_rule *rule, struct proxy *px,
1125                                               struct session *sess, struct stream *s, int flags)
1126 {
1127 	struct connection *cli_conn;
1128 
1129 	if ((cli_conn = objt_conn(sess->origin)) && conn_get_dst(cli_conn)) {
1130 		struct sample *smp;
1131 
1132 		smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_ADDR);
1133 		if (smp) {
1134 			int port = get_net_port(cli_conn->dst);
1135 
1136 			if (smp->data.type == SMP_T_IPV4) {
1137 				((struct sockaddr_in *)cli_conn->dst)->sin_family = AF_INET;
1138 				((struct sockaddr_in *)cli_conn->dst)->sin_addr.s_addr = smp->data.u.ipv4.s_addr;
1139 				((struct sockaddr_in *)cli_conn->dst)->sin_port = port;
1140 			} else if (smp->data.type == SMP_T_IPV6) {
1141 				((struct sockaddr_in6 *)cli_conn->dst)->sin6_family = AF_INET6;
1142 				memcpy(&((struct sockaddr_in6 *)cli_conn->dst)->sin6_addr, &smp->data.u.ipv6, sizeof(struct in6_addr));
1143 				((struct sockaddr_in6 *)cli_conn->dst)->sin6_port = port;
1144 			}
1145 			cli_conn->flags |= CO_FL_ADDR_TO_SET;
1146 		}
1147 	}
1148 	return ACT_RET_CONT;
1149 }
1150 
1151 /*
1152  * Execute the "set-src-port" action. May be called from {tcp,http}request.
1153  * We must test the sin_family before setting the port. If the address family
1154  * is neither AF_INET nor AF_INET6, the address is forced to AF_INET "0.0.0.0"
1155  * and the port is assigned.
1156  */
tcp_action_req_set_src_port(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * s,int flags)1157 enum act_return tcp_action_req_set_src_port(struct act_rule *rule, struct proxy *px,
1158                                               struct session *sess, struct stream *s, int flags)
1159 {
1160 	struct connection *cli_conn;
1161 
1162 	if ((cli_conn = objt_conn(sess->origin)) && conn_get_src(cli_conn)) {
1163 		struct sample *smp;
1164 
1165 		smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
1166 		if (smp) {
1167 			if (cli_conn->src->ss_family == AF_INET6) {
1168 				((struct sockaddr_in6 *)cli_conn->src)->sin6_port = htons(smp->data.u.sint);
1169 			} else {
1170 				if (cli_conn->src->ss_family != AF_INET) {
1171 					cli_conn->src->ss_family = AF_INET;
1172 					((struct sockaddr_in *)cli_conn->src)->sin_addr.s_addr = 0;
1173 				}
1174 				((struct sockaddr_in *)cli_conn->src)->sin_port = htons(smp->data.u.sint);
1175 			}
1176 		}
1177 	}
1178 	return ACT_RET_CONT;
1179 }
1180 
1181 /*
1182  * Execute the "set-dst-port" action. May be called from {tcp,http}request.
1183  * We must test the sin_family before setting the port. If the address family
1184  * is neither AF_INET nor AF_INET6, the address is forced to AF_INET "0.0.0.0"
1185  * and the port is assigned.
1186  */
tcp_action_req_set_dst_port(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * s,int flags)1187 enum act_return tcp_action_req_set_dst_port(struct act_rule *rule, struct proxy *px,
1188                                               struct session *sess, struct stream *s, int flags)
1189 {
1190 	struct connection *cli_conn;
1191 
1192 	if ((cli_conn = objt_conn(sess->origin)) && conn_get_dst(cli_conn)) {
1193 		struct sample *smp;
1194 
1195 		smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
1196 		if (smp) {
1197 			if (cli_conn->dst->ss_family == AF_INET6) {
1198 				((struct sockaddr_in6 *)cli_conn->dst)->sin6_port = htons(smp->data.u.sint);
1199 			} else {
1200 				if (cli_conn->dst->ss_family != AF_INET) {
1201 					cli_conn->dst->ss_family = AF_INET;
1202 					((struct sockaddr_in *)cli_conn->dst)->sin_addr.s_addr = 0;
1203 				}
1204 				((struct sockaddr_in *)cli_conn->dst)->sin_port = htons(smp->data.u.sint);
1205 			}
1206 		}
1207 	}
1208 	return ACT_RET_CONT;
1209 }
1210 
1211 /* Executes the "silent-drop" action. May be called from {tcp,http}{request,response} */
tcp_exec_action_silent_drop(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * strm,int flags)1212 static enum act_return tcp_exec_action_silent_drop(struct act_rule *rule, struct proxy *px, struct session *sess, struct stream *strm, int flags)
1213 {
1214 	struct connection *conn = objt_conn(sess->origin);
1215 
1216 	if (!conn)
1217 		goto out;
1218 
1219 	if (!conn_ctrl_ready(conn))
1220 		goto out;
1221 
1222 #ifdef TCP_QUICKACK
1223 	/* drain is needed only to send the quick ACK */
1224 	conn_sock_drain(conn);
1225 
1226 	/* re-enable quickack if it was disabled to ack all data and avoid
1227 	 * retransmits from the client that might trigger a real reset.
1228 	 */
1229 	setsockopt(conn->handle.fd, SOL_TCP, TCP_QUICKACK, &one, sizeof(one));
1230 #endif
1231 	/* lingering must absolutely be disabled so that we don't send a
1232 	 * shutdown(), this is critical to the TCP_REPAIR trick. When no stream
1233 	 * is present, returning with ERR will cause lingering to be disabled.
1234 	 */
1235 	if (strm)
1236 		strm->si[0].flags |= SI_FL_NOLINGER;
1237 
1238 	/* We're on the client-facing side, we must force to disable lingering to
1239 	 * ensure we will use an RST exclusively and kill any pending data.
1240 	 */
1241 	fdtab[conn->handle.fd].linger_risk = 1;
1242 
1243 #ifdef TCP_REPAIR
1244 	if (setsockopt(conn->handle.fd, SOL_TCP, TCP_REPAIR, &one, sizeof(one)) == 0) {
1245 		/* socket will be quiet now */
1246 		goto out;
1247 	}
1248 #endif
1249 	/* either TCP_REPAIR is not defined or it failed (eg: permissions).
1250 	 * Let's fall back on the TTL trick, though it only works for routed
1251 	 * network and has no effect on local net.
1252 	 */
1253 #ifdef IP_TTL
1254 	if (conn->src && conn->src->ss_family == AF_INET)
1255 		setsockopt(conn->handle.fd, SOL_IP, IP_TTL, &one, sizeof(one));
1256 #endif
1257 #ifdef IPV6_UNICAST_HOPS
1258 #if defined(SOL_IPV6)
1259 	if (conn->src && conn->src->ss_family == AF_INET6)
1260 		setsockopt(conn->handle.fd, SOL_IPV6, IPV6_UNICAST_HOPS, &one, sizeof(one));
1261 #elif defined(IPPROTO_IPV6)
1262 	if (conn->src && conn->src->ss_family == AF_INET6)
1263 		setsockopt(conn->handle.fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &one, sizeof(one));
1264 #endif
1265 #endif
1266  out:
1267 	/* kill the stream if any */
1268 	if (strm) {
1269 		channel_abort(&strm->req);
1270 		channel_abort(&strm->res);
1271 		strm->req.analysers &= AN_REQ_FLT_END;
1272 		strm->res.analysers &= AN_RES_FLT_END;
1273 		if (strm->flags & SF_BE_ASSIGNED)
1274 			_HA_ATOMIC_ADD(&strm->be->be_counters.denied_req, 1);
1275 		if (!(strm->flags & SF_ERR_MASK))
1276 			strm->flags |= SF_ERR_PRXCOND;
1277 		if (!(strm->flags & SF_FINST_MASK))
1278 			strm->flags |= SF_FINST_R;
1279 	}
1280 
1281 	_HA_ATOMIC_ADD(&sess->fe->fe_counters.denied_req, 1);
1282 	if (sess->listener && sess->listener->counters)
1283 		_HA_ATOMIC_ADD(&sess->listener->counters->denied_req, 1);
1284 
1285 	return ACT_RET_ABRT;
1286 }
1287 
1288 /* parse "set-{src,dst}[-port]" action */
tcp_parse_set_src_dst(const char ** args,int * orig_arg,struct proxy * px,struct act_rule * rule,char ** err)1289 enum act_parse_ret tcp_parse_set_src_dst(const char **args, int *orig_arg, struct proxy *px, struct act_rule *rule, char **err)
1290 {
1291 	int cur_arg;
1292 	struct sample_expr *expr;
1293 	unsigned int where;
1294 
1295 	cur_arg = *orig_arg;
1296 	expr = sample_parse_expr((char **)args, &cur_arg, px->conf.args.file, px->conf.args.line, err, &px->conf.args, NULL);
1297 	if (!expr)
1298 		return ACT_RET_PRS_ERR;
1299 
1300 	where = 0;
1301 	if (px->cap & PR_CAP_FE)
1302 		where |= SMP_VAL_FE_HRQ_HDR;
1303 	if (px->cap & PR_CAP_BE)
1304 		where |= SMP_VAL_BE_HRQ_HDR;
1305 
1306 	if (!(expr->fetch->val & where)) {
1307 		memprintf(err,
1308 			  "fetch method '%s' extracts information from '%s', none of which is available here",
1309 			  args[cur_arg-1], sample_src_names(expr->fetch->use));
1310 		free(expr);
1311 		return ACT_RET_PRS_ERR;
1312 	}
1313 	rule->arg.expr = expr;
1314 	rule->action = ACT_CUSTOM;
1315 
1316 	if (!strcmp(args[*orig_arg-1], "set-src")) {
1317 		rule->action_ptr = tcp_action_req_set_src;
1318 	} else if (!strcmp(args[*orig_arg-1], "set-src-port")) {
1319 		rule->action_ptr = tcp_action_req_set_src_port;
1320 	} else if (!strcmp(args[*orig_arg-1], "set-dst")) {
1321 		rule->action_ptr = tcp_action_req_set_dst;
1322 	} else if (!strcmp(args[*orig_arg-1], "set-dst-port")) {
1323 		rule->action_ptr = tcp_action_req_set_dst_port;
1324 	} else {
1325 		return ACT_RET_PRS_ERR;
1326 	}
1327 
1328 	(*orig_arg)++;
1329 
1330 	return ACT_RET_PRS_OK;
1331 }
1332 
1333 
1334 /* Parse a "silent-drop" action. It takes no argument. It returns ACT_RET_PRS_OK on
1335  * success, ACT_RET_PRS_ERR on error.
1336  */
tcp_parse_silent_drop(const char ** args,int * orig_arg,struct proxy * px,struct act_rule * rule,char ** err)1337 static enum act_parse_ret tcp_parse_silent_drop(const char **args, int *orig_arg, struct proxy *px,
1338                                                 struct act_rule *rule, char **err)
1339 {
1340 	rule->action     = ACT_CUSTOM;
1341 	rule->action_ptr = tcp_exec_action_silent_drop;
1342 	return ACT_RET_PRS_OK;
1343 }
1344 
1345 
1346 /************************************************************************/
1347 /*       All supported sample fetch functions must be declared here     */
1348 /************************************************************************/
1349 
1350 /* fetch the connection's source IPv4/IPv6 address */
smp_fetch_src(const struct arg * args,struct sample * smp,const char * kw,void * private)1351 int smp_fetch_src(const struct arg *args, struct sample *smp, const char *kw, void *private)
1352 {
1353 	struct connection *cli_conn = objt_conn(smp->sess->origin);
1354 
1355 	if (!cli_conn)
1356 		return 0;
1357 
1358 	if (!conn_get_src(cli_conn))
1359 		return 0;
1360 
1361 	switch (cli_conn->src->ss_family) {
1362 	case AF_INET:
1363 		smp->data.u.ipv4 = ((struct sockaddr_in *)cli_conn->src)->sin_addr;
1364 		smp->data.type = SMP_T_IPV4;
1365 		break;
1366 	case AF_INET6:
1367 		smp->data.u.ipv6 = ((struct sockaddr_in6 *)cli_conn->src)->sin6_addr;
1368 		smp->data.type = SMP_T_IPV6;
1369 		break;
1370 	default:
1371 		return 0;
1372 	}
1373 
1374 	smp->flags = 0;
1375 	return 1;
1376 }
1377 
1378 /* set temp integer to the connection's source port */
1379 static int
smp_fetch_sport(const struct arg * args,struct sample * smp,const char * k,void * private)1380 smp_fetch_sport(const struct arg *args, struct sample *smp, const char *k, void *private)
1381 {
1382 	struct connection *cli_conn = objt_conn(smp->sess->origin);
1383 
1384 	if (!cli_conn)
1385 		return 0;
1386 
1387 	if (!conn_get_src(cli_conn))
1388 		return 0;
1389 
1390 	smp->data.type = SMP_T_SINT;
1391 	if (!(smp->data.u.sint = get_host_port(cli_conn->src)))
1392 		return 0;
1393 
1394 	smp->flags = 0;
1395 	return 1;
1396 }
1397 
1398 /* fetch the connection's destination IPv4/IPv6 address */
1399 static int
smp_fetch_dst(const struct arg * args,struct sample * smp,const char * kw,void * private)1400 smp_fetch_dst(const struct arg *args, struct sample *smp, const char *kw, void *private)
1401 {
1402 	struct connection *cli_conn = objt_conn(smp->sess->origin);
1403 
1404 	if (!cli_conn)
1405 		return 0;
1406 
1407 	if (!conn_get_dst(cli_conn))
1408 		return 0;
1409 
1410 	switch (cli_conn->dst->ss_family) {
1411 	case AF_INET:
1412 		smp->data.u.ipv4 = ((struct sockaddr_in *)cli_conn->dst)->sin_addr;
1413 		smp->data.type = SMP_T_IPV4;
1414 		break;
1415 	case AF_INET6:
1416 		smp->data.u.ipv6 = ((struct sockaddr_in6 *)cli_conn->dst)->sin6_addr;
1417 		smp->data.type = SMP_T_IPV6;
1418 		break;
1419 	default:
1420 		return 0;
1421 	}
1422 
1423 	smp->flags = 0;
1424 	return 1;
1425 }
1426 
1427 /* check if the destination address of the front connection is local to the
1428  * system or if it was intercepted.
1429  */
smp_fetch_dst_is_local(const struct arg * args,struct sample * smp,const char * kw,void * private)1430 int smp_fetch_dst_is_local(const struct arg *args, struct sample *smp, const char *kw, void *private)
1431 {
1432 	struct connection *conn = objt_conn(smp->sess->origin);
1433 	struct listener *li = smp->sess->listener;
1434 
1435 	if (!conn)
1436 		return 0;
1437 
1438 	if (!conn_get_dst(conn))
1439 		return 0;
1440 
1441 	smp->data.type = SMP_T_BOOL;
1442 	smp->flags = 0;
1443 	smp->data.u.sint = addr_is_local(li->netns, conn->dst);
1444 	return smp->data.u.sint >= 0;
1445 }
1446 
1447 /* check if the source address of the front connection is local to the system
1448  * or not.
1449  */
smp_fetch_src_is_local(const struct arg * args,struct sample * smp,const char * kw,void * private)1450 int smp_fetch_src_is_local(const struct arg *args, struct sample *smp, const char *kw, void *private)
1451 {
1452 	struct connection *conn = objt_conn(smp->sess->origin);
1453 	struct listener *li = smp->sess->listener;
1454 
1455 	if (!conn)
1456 		return 0;
1457 
1458 	if (!conn_get_src(conn))
1459 		return 0;
1460 
1461 	smp->data.type = SMP_T_BOOL;
1462 	smp->flags = 0;
1463 	smp->data.u.sint = addr_is_local(li->netns, conn->src);
1464 	return smp->data.u.sint >= 0;
1465 }
1466 
1467 /* set temp integer to the frontend connexion's destination port */
1468 static int
smp_fetch_dport(const struct arg * args,struct sample * smp,const char * kw,void * private)1469 smp_fetch_dport(const struct arg *args, struct sample *smp, const char *kw, void *private)
1470 {
1471 	struct connection *cli_conn = objt_conn(smp->sess->origin);
1472 
1473 	if (!cli_conn)
1474 		return 0;
1475 
1476 	if (!conn_get_dst(cli_conn))
1477 		return 0;
1478 
1479 	smp->data.type = SMP_T_SINT;
1480 	if (!(smp->data.u.sint = get_host_port(cli_conn->dst)))
1481 		return 0;
1482 
1483 	smp->flags = 0;
1484 	return 1;
1485 }
1486 
1487 #ifdef TCP_INFO
1488 
1489 
1490 /* Validates the arguments passed to "fc_*" fetch keywords returning a time
1491  * value. These keywords support an optional string representing the unit of the
1492  * result: "us" for microseconds and "ms" for milliseconds". Returns 0 on error
1493  * and non-zero if OK.
1494  */
val_fc_time_value(struct arg * args,char ** err)1495 static int val_fc_time_value(struct arg *args, char **err)
1496 {
1497 	if (args[0].type == ARGT_STR) {
1498 		if (strcmp(args[0].data.str.area, "us") == 0) {
1499 			chunk_destroy(&args[0].data.str);
1500 			args[0].type = ARGT_SINT;
1501 			args[0].data.sint = TIME_UNIT_US;
1502 		}
1503 		else if (strcmp(args[0].data.str.area, "ms") == 0) {
1504 			chunk_destroy(&args[0].data.str);
1505 			args[0].type = ARGT_SINT;
1506 			args[0].data.sint = TIME_UNIT_MS;
1507 		}
1508 		else {
1509 			memprintf(err, "expects 'us' or 'ms', got '%s'",
1510 				  args[0].data.str.area);
1511 			return 0;
1512 		}
1513 	}
1514 	else {
1515 		memprintf(err, "Unexpected arg type");
1516 		return 0;
1517 	}
1518 
1519 	return 1;
1520 }
1521 
1522 /* Validates the arguments passed to "fc_*" fetch keywords returning a
1523  * counter. These keywords should be used without any keyword, but because of a
1524  * bug in previous versions, an optional string argument may be passed. In such
1525  * case, the argument is ignored and a warning is emitted. Returns 0 on error
1526  * and non-zero if OK.
1527  */
var_fc_counter(struct arg * args,char ** err)1528 static int var_fc_counter(struct arg *args, char **err)
1529 {
1530 	if (args[0].type != ARGT_STOP) {
1531 		ha_warning("no argument supported for 'fc_*' sample expressions returning counters.\n");
1532 		if (args[0].type == ARGT_STR)
1533 			chunk_destroy(&args[0].data.str);
1534 		args[0].type = ARGT_STOP;
1535 	}
1536 
1537 	return 1;
1538 }
1539 
1540 /* Returns some tcp_info data if it's available. "dir" must be set to 0 if
1541  * the client connection is required, otherwise it is set to 1. "val" represents
1542  * the required value.
1543  * If the function fails it returns 0, otherwise it returns 1 and "result" is filled.
1544  */
get_tcp_info(const struct arg * args,struct sample * smp,int dir,int val)1545 static inline int get_tcp_info(const struct arg *args, struct sample *smp,
1546                                int dir, int val)
1547 {
1548 	struct connection *conn;
1549 	struct tcp_info info;
1550 	socklen_t optlen;
1551 
1552 	/* strm can be null. */
1553 	if (!smp->strm)
1554 		return 0;
1555 
1556 	/* get the object associated with the stream interface.The
1557 	 * object can be other thing than a connection. For example,
1558 	 * it be a appctx. */
1559 	conn = cs_conn(objt_cs(smp->strm->si[dir].end));
1560 	if (!conn)
1561 		return 0;
1562 
1563 	/* The fd may not be available for the tcp_info struct, and the
1564 	  syscal can fail. */
1565 	optlen = sizeof(info);
1566 	if (getsockopt(conn->handle.fd, SOL_TCP, TCP_INFO, &info, &optlen) == -1)
1567 		return 0;
1568 
1569 	/* extract the value. */
1570 	smp->data.type = SMP_T_SINT;
1571 	switch (val) {
1572 	case 0:  smp->data.u.sint = info.tcpi_rtt;            break;
1573 	case 1:  smp->data.u.sint = info.tcpi_rttvar;         break;
1574 #if defined(__linux__)
1575 	/* these ones are common to all Linux versions */
1576 	case 2:  smp->data.u.sint = info.tcpi_unacked;        break;
1577 	case 3:  smp->data.u.sint = info.tcpi_sacked;         break;
1578 	case 4:  smp->data.u.sint = info.tcpi_lost;           break;
1579 	case 5:  smp->data.u.sint = info.tcpi_retrans;        break;
1580 	case 6:  smp->data.u.sint = info.tcpi_fackets;        break;
1581 	case 7:  smp->data.u.sint = info.tcpi_reordering;     break;
1582 #elif defined(__FreeBSD__) || defined(__NetBSD__)
1583 	/* the ones are found on FreeBSD and NetBSD featuring TCP_INFO */
1584 	case 2:  smp->data.u.sint = info.__tcpi_unacked;      break;
1585 	case 3:  smp->data.u.sint = info.__tcpi_sacked;       break;
1586 	case 4:  smp->data.u.sint = info.__tcpi_lost;         break;
1587 	case 5:  smp->data.u.sint = info.__tcpi_retrans;      break;
1588 	case 6:  smp->data.u.sint = info.__tcpi_fackets;      break;
1589 	case 7:  smp->data.u.sint = info.__tcpi_reordering;   break;
1590 #endif
1591 	default: return 0;
1592 	}
1593 
1594 	return 1;
1595 }
1596 
1597 /* get the mean rtt of a client connection */
1598 static int
smp_fetch_fc_rtt(const struct arg * args,struct sample * smp,const char * kw,void * private)1599 smp_fetch_fc_rtt(const struct arg *args, struct sample *smp, const char *kw, void *private)
1600 {
1601 	if (!get_tcp_info(args, smp, 0, 0))
1602 		return 0;
1603 
1604 	/* By default or if explicitly specified, convert rtt to ms */
1605 	if (!args || args[0].type == ARGT_STOP || args[0].data.sint == TIME_UNIT_MS)
1606 		smp->data.u.sint = (smp->data.u.sint + 500) / 1000;
1607 
1608 	return 1;
1609 }
1610 
1611 /* get the variance of the mean rtt of a client connection */
1612 static int
smp_fetch_fc_rttvar(const struct arg * args,struct sample * smp,const char * kw,void * private)1613 smp_fetch_fc_rttvar(const struct arg *args, struct sample *smp, const char *kw, void *private)
1614 {
1615 	if (!get_tcp_info(args, smp, 0, 1))
1616 		return 0;
1617 
1618 	/* By default or if explicitly specified, convert rttvar to ms */
1619 	if (!args || args[0].type == ARGT_STOP || args[0].data.sint == TIME_UNIT_MS)
1620 		smp->data.u.sint = (smp->data.u.sint + 500) / 1000;
1621 
1622 	return 1;
1623 }
1624 
1625 #if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__)
1626 
1627 /* get the unacked counter on a client connection */
1628 static int
smp_fetch_fc_unacked(const struct arg * args,struct sample * smp,const char * kw,void * private)1629 smp_fetch_fc_unacked(const struct arg *args, struct sample *smp, const char *kw, void *private)
1630 {
1631 	if (!get_tcp_info(args, smp, 0, 2))
1632 		return 0;
1633 	return 1;
1634 }
1635 
1636 /* get the sacked counter on a client connection */
1637 static int
smp_fetch_fc_sacked(const struct arg * args,struct sample * smp,const char * kw,void * private)1638 smp_fetch_fc_sacked(const struct arg *args, struct sample *smp, const char *kw, void *private)
1639 {
1640 	if (!get_tcp_info(args, smp, 0, 3))
1641 		return 0;
1642 	return 1;
1643 }
1644 
1645 /* get the lost counter on a client connection */
1646 static int
smp_fetch_fc_lost(const struct arg * args,struct sample * smp,const char * kw,void * private)1647 smp_fetch_fc_lost(const struct arg *args, struct sample *smp, const char *kw, void *private)
1648 {
1649 	if (!get_tcp_info(args, smp, 0, 4))
1650 		return 0;
1651 	return 1;
1652 }
1653 
1654 /* get the retrans counter on a client connection */
1655 static int
smp_fetch_fc_retrans(const struct arg * args,struct sample * smp,const char * kw,void * private)1656 smp_fetch_fc_retrans(const struct arg *args, struct sample *smp, const char *kw, void *private)
1657 {
1658 	if (!get_tcp_info(args, smp, 0, 5))
1659 		return 0;
1660 	return 1;
1661 }
1662 
1663 /* get the fackets counter on a client connection */
1664 static int
smp_fetch_fc_fackets(const struct arg * args,struct sample * smp,const char * kw,void * private)1665 smp_fetch_fc_fackets(const struct arg *args, struct sample *smp, const char *kw, void *private)
1666 {
1667 	if (!get_tcp_info(args, smp, 0, 6))
1668 		return 0;
1669 	return 1;
1670 }
1671 
1672 /* get the reordering counter on a client connection */
1673 static int
smp_fetch_fc_reordering(const struct arg * args,struct sample * smp,const char * kw,void * private)1674 smp_fetch_fc_reordering(const struct arg *args, struct sample *smp, const char *kw, void *private)
1675 {
1676 	if (!get_tcp_info(args, smp, 0, 7))
1677 		return 0;
1678 	return 1;
1679 }
1680 #endif // linux || freebsd || netbsd
1681 #endif // TCP_INFO
1682 
1683 #ifdef IPV6_V6ONLY
1684 /* parse the "v4v6" bind keyword */
bind_parse_v4v6(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1685 static int bind_parse_v4v6(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1686 {
1687 	struct listener *l;
1688 
1689 	list_for_each_entry(l, &conf->listeners, by_bind) {
1690 		if (l->addr.ss_family == AF_INET6)
1691 			l->options |= LI_O_V4V6;
1692 	}
1693 
1694 	return 0;
1695 }
1696 
1697 /* parse the "v6only" bind keyword */
bind_parse_v6only(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1698 static int bind_parse_v6only(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1699 {
1700 	struct listener *l;
1701 
1702 	list_for_each_entry(l, &conf->listeners, by_bind) {
1703 		if (l->addr.ss_family == AF_INET6)
1704 			l->options |= LI_O_V6ONLY;
1705 	}
1706 
1707 	return 0;
1708 }
1709 #endif
1710 
1711 #ifdef CONFIG_HAP_TRANSPARENT
1712 /* parse the "transparent" bind keyword */
bind_parse_transparent(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1713 static int bind_parse_transparent(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1714 {
1715 	struct listener *l;
1716 
1717 	list_for_each_entry(l, &conf->listeners, by_bind) {
1718 		if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1719 			l->options |= LI_O_FOREIGN;
1720 	}
1721 
1722 	return 0;
1723 }
1724 #endif
1725 
1726 #ifdef TCP_DEFER_ACCEPT
1727 /* parse the "defer-accept" bind keyword */
bind_parse_defer_accept(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1728 static int bind_parse_defer_accept(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1729 {
1730 	struct listener *l;
1731 
1732 	list_for_each_entry(l, &conf->listeners, by_bind) {
1733 		if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1734 			l->options |= LI_O_DEF_ACCEPT;
1735 	}
1736 
1737 	return 0;
1738 }
1739 #endif
1740 
1741 #ifdef TCP_FASTOPEN
1742 /* parse the "tfo" bind keyword */
bind_parse_tfo(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1743 static int bind_parse_tfo(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1744 {
1745 	struct listener *l;
1746 
1747 	list_for_each_entry(l, &conf->listeners, by_bind) {
1748 		if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1749 			l->options |= LI_O_TCP_FO;
1750 	}
1751 
1752 	return 0;
1753 }
1754 #endif
1755 
1756 #ifdef TCP_MAXSEG
1757 /* parse the "mss" bind keyword */
bind_parse_mss(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1758 static int bind_parse_mss(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1759 {
1760 	struct listener *l;
1761 	int mss;
1762 
1763 	if (!*args[cur_arg + 1]) {
1764 		memprintf(err, "'%s' : missing MSS value", args[cur_arg]);
1765 		return ERR_ALERT | ERR_FATAL;
1766 	}
1767 
1768 	mss = atoi(args[cur_arg + 1]);
1769 	if (!mss || abs(mss) > 65535) {
1770 		memprintf(err, "'%s' : expects an MSS with and absolute value between 1 and 65535", args[cur_arg]);
1771 		return ERR_ALERT | ERR_FATAL;
1772 	}
1773 
1774 	list_for_each_entry(l, &conf->listeners, by_bind) {
1775 		if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1776 			l->maxseg = mss;
1777 	}
1778 
1779 	return 0;
1780 }
1781 #endif
1782 
1783 #ifdef TCP_USER_TIMEOUT
1784 /* parse the "tcp-ut" bind keyword */
bind_parse_tcp_ut(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1785 static int bind_parse_tcp_ut(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1786 {
1787 	const char *ptr = NULL;
1788 	struct listener *l;
1789 	unsigned int timeout;
1790 
1791 	if (!*args[cur_arg + 1]) {
1792 		memprintf(err, "'%s' : missing TCP User Timeout value", args[cur_arg]);
1793 		return ERR_ALERT | ERR_FATAL;
1794 	}
1795 
1796 	ptr = parse_time_err(args[cur_arg + 1], &timeout, TIME_UNIT_MS);
1797 	if (ptr == PARSE_TIME_OVER) {
1798 		memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
1799 			  args[cur_arg+1], args[cur_arg]);
1800 		return ERR_ALERT | ERR_FATAL;
1801 	}
1802 	else if (ptr == PARSE_TIME_UNDER) {
1803 		memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
1804 			  args[cur_arg+1], args[cur_arg]);
1805 		return ERR_ALERT | ERR_FATAL;
1806 	}
1807 	else if (ptr) {
1808 		memprintf(err, "'%s' : expects a positive delay in milliseconds", args[cur_arg]);
1809 		return ERR_ALERT | ERR_FATAL;
1810 	}
1811 
1812 	list_for_each_entry(l, &conf->listeners, by_bind) {
1813 		if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1814 			l->tcp_ut = timeout;
1815 	}
1816 
1817 	return 0;
1818 }
1819 #endif
1820 
1821 #ifdef SO_BINDTODEVICE
1822 /* parse the "interface" bind keyword */
bind_parse_interface(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1823 static int bind_parse_interface(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1824 {
1825 	struct listener *l;
1826 
1827 	if (!*args[cur_arg + 1]) {
1828 		memprintf(err, "'%s' : missing interface name", args[cur_arg]);
1829 		return ERR_ALERT | ERR_FATAL;
1830 	}
1831 
1832 	list_for_each_entry(l, &conf->listeners, by_bind) {
1833 		if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1834 			l->interface = strdup(args[cur_arg + 1]);
1835 	}
1836 
1837 	return 0;
1838 }
1839 #endif
1840 
1841 #ifdef USE_NS
1842 /* parse the "namespace" bind keyword */
bind_parse_namespace(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1843 static int bind_parse_namespace(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1844 {
1845 	struct listener *l;
1846 	char *namespace = NULL;
1847 
1848 	if (!*args[cur_arg + 1]) {
1849 		memprintf(err, "'%s' : missing namespace id", args[cur_arg]);
1850 		return ERR_ALERT | ERR_FATAL;
1851 	}
1852 	namespace = args[cur_arg + 1];
1853 
1854 	list_for_each_entry(l, &conf->listeners, by_bind) {
1855 		l->netns = netns_store_lookup(namespace, strlen(namespace));
1856 
1857 		if (l->netns == NULL)
1858 			l->netns = netns_store_insert(namespace);
1859 
1860 		if (l->netns == NULL) {
1861 			ha_alert("Cannot open namespace '%s'.\n", args[cur_arg + 1]);
1862 			return ERR_ALERT | ERR_FATAL;
1863 		}
1864 	}
1865 	return 0;
1866 }
1867 #endif
1868 
1869 #ifdef TCP_USER_TIMEOUT
1870 /* parse the "tcp-ut" server keyword */
srv_parse_tcp_ut(char ** args,int * cur_arg,struct proxy * px,struct server * newsrv,char ** err)1871 static int srv_parse_tcp_ut(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
1872 {
1873 	const char *ptr = NULL;
1874 	unsigned int timeout;
1875 
1876 	if (!*args[*cur_arg + 1]) {
1877 		memprintf(err, "'%s' : missing TCP User Timeout value", args[*cur_arg]);
1878 		return ERR_ALERT | ERR_FATAL;
1879 	}
1880 
1881 	ptr = parse_time_err(args[*cur_arg + 1], &timeout, TIME_UNIT_MS);
1882 	if (ptr == PARSE_TIME_OVER) {
1883 		memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
1884 			  args[*cur_arg+1], args[*cur_arg]);
1885 		return ERR_ALERT | ERR_FATAL;
1886 	}
1887 	else if (ptr == PARSE_TIME_UNDER) {
1888 		memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
1889 			  args[*cur_arg+1], args[*cur_arg]);
1890 		return ERR_ALERT | ERR_FATAL;
1891 	}
1892 	else if (ptr) {
1893 		memprintf(err, "'%s' : expects a positive delay in milliseconds", args[*cur_arg]);
1894 		return ERR_ALERT | ERR_FATAL;
1895 	}
1896 
1897 	if (newsrv->addr.ss_family == AF_INET || newsrv->addr.ss_family == AF_INET6)
1898 		newsrv->tcp_ut = timeout;
1899 
1900 	return 0;
1901 }
1902 #endif
1903 
1904 
1905 /* Note: must not be declared <const> as its list will be overwritten.
1906  * Note: fetches that may return multiple types must be declared as the lowest
1907  * common denominator, the type that can be casted into all other ones. For
1908  * instance v4/v6 must be declared v4.
1909  */
1910 static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
1911 	{ "dst",      smp_fetch_dst,   0, NULL, SMP_T_IPV4, SMP_USE_L4CLI },
1912 	{ "dst_is_local", smp_fetch_dst_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
1913 	{ "dst_port", smp_fetch_dport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
1914 	{ "src",      smp_fetch_src,   0, NULL, SMP_T_IPV4, SMP_USE_L4CLI },
1915 	{ "src_is_local", smp_fetch_src_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
1916 	{ "src_port", smp_fetch_sport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
1917 #ifdef TCP_INFO
1918 	{ "fc_rtt",           smp_fetch_fc_rtt,           ARG1(0,STR), val_fc_time_value, SMP_T_SINT, SMP_USE_L4CLI },
1919 	{ "fc_rttvar",        smp_fetch_fc_rttvar,        ARG1(0,STR), val_fc_time_value, SMP_T_SINT, SMP_USE_L4CLI },
1920 #if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__)
1921 	{ "fc_unacked",       smp_fetch_fc_unacked,       ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1922 	{ "fc_sacked",        smp_fetch_fc_sacked,        ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1923 	{ "fc_retrans",       smp_fetch_fc_retrans,       ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1924 	{ "fc_fackets",       smp_fetch_fc_fackets,       ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1925 	{ "fc_lost",          smp_fetch_fc_lost,          ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1926 	{ "fc_reordering",    smp_fetch_fc_reordering,    ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1927 #endif // linux || freebsd || netbsd
1928 #endif // TCP_INFO
1929 	{ /* END */ },
1930 }};
1931 
1932 INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
1933 
1934 /************************************************************************/
1935 /*           All supported bind keywords must be declared here.         */
1936 /************************************************************************/
1937 
1938 /* Note: must not be declared <const> as its list will be overwritten.
1939  * Please take care of keeping this list alphabetically sorted, doing so helps
1940  * all code contributors.
1941  * Optional keywords are also declared with a NULL ->parse() function so that
1942  * the config parser can report an appropriate error when a known keyword was
1943  * not enabled.
1944  */
1945 static struct bind_kw_list bind_kws = { "TCP", { }, {
1946 #ifdef TCP_DEFER_ACCEPT
1947 	{ "defer-accept",  bind_parse_defer_accept, 0 }, /* wait for some data for 1 second max before doing accept */
1948 #endif
1949 #ifdef SO_BINDTODEVICE
1950 	{ "interface",     bind_parse_interface,    1 }, /* specifically bind to this interface */
1951 #endif
1952 #ifdef TCP_MAXSEG
1953 	{ "mss",           bind_parse_mss,          1 }, /* set MSS of listening socket */
1954 #endif
1955 #ifdef TCP_USER_TIMEOUT
1956 	{ "tcp-ut",        bind_parse_tcp_ut,       1 }, /* set User Timeout on listening socket */
1957 #endif
1958 #ifdef TCP_FASTOPEN
1959 	{ "tfo",           bind_parse_tfo,          0 }, /* enable TCP_FASTOPEN of listening socket */
1960 #endif
1961 #ifdef CONFIG_HAP_TRANSPARENT
1962 	{ "transparent",   bind_parse_transparent,  0 }, /* transparently bind to the specified addresses */
1963 #endif
1964 #ifdef IPV6_V6ONLY
1965 	{ "v4v6",          bind_parse_v4v6,         0 }, /* force socket to bind to IPv4+IPv6 */
1966 	{ "v6only",        bind_parse_v6only,       0 }, /* force socket to bind to IPv6 only */
1967 #endif
1968 #ifdef USE_NS
1969 	{ "namespace",     bind_parse_namespace,    1 },
1970 #endif
1971 	/* the versions with the NULL parse function*/
1972 	{ "defer-accept",  NULL,  0 },
1973 	{ "interface",     NULL,  1 },
1974 	{ "mss",           NULL,  1 },
1975 	{ "transparent",   NULL,  0 },
1976 	{ "v4v6",          NULL,  0 },
1977 	{ "v6only",        NULL,  0 },
1978 	{ NULL, NULL, 0 },
1979 }};
1980 
1981 INITCALL1(STG_REGISTER, bind_register_keywords, &bind_kws);
1982 
1983 static struct srv_kw_list srv_kws = { "TCP", { }, {
1984 #ifdef TCP_USER_TIMEOUT
1985 	{ "tcp-ut",        srv_parse_tcp_ut,        1,  1 }, /* set TCP user timeout on server */
1986 #endif
1987 	{ NULL, NULL, 0 },
1988 }};
1989 
1990 INITCALL1(STG_REGISTER, srv_register_keywords, &srv_kws);
1991 
1992 static struct action_kw_list tcp_req_conn_actions = {ILH, {
1993 	{ "set-src",      tcp_parse_set_src_dst },
1994 	{ "set-src-port", tcp_parse_set_src_dst },
1995 	{ "set-dst"     , tcp_parse_set_src_dst },
1996 	{ "set-dst-port", tcp_parse_set_src_dst },
1997 	{ "silent-drop",  tcp_parse_silent_drop },
1998 	{ /* END */ }
1999 }};
2000 
2001 INITCALL1(STG_REGISTER, tcp_req_conn_keywords_register, &tcp_req_conn_actions);
2002 
2003 static struct action_kw_list tcp_req_sess_actions = {ILH, {
2004 	{ "set-src",      tcp_parse_set_src_dst },
2005 	{ "set-src-port", tcp_parse_set_src_dst },
2006 	{ "set-dst"     , tcp_parse_set_src_dst },
2007 	{ "set-dst-port", tcp_parse_set_src_dst },
2008 	{ "silent-drop",  tcp_parse_silent_drop },
2009 	{ /* END */ }
2010 }};
2011 
2012 INITCALL1(STG_REGISTER, tcp_req_sess_keywords_register, &tcp_req_sess_actions);
2013 
2014 static struct action_kw_list tcp_req_cont_actions = {ILH, {
2015 	{ "set-dst"     , tcp_parse_set_src_dst },
2016 	{ "set-dst-port", tcp_parse_set_src_dst },
2017 	{ "silent-drop",  tcp_parse_silent_drop },
2018 	{ /* END */ }
2019 }};
2020 
2021 INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_req_cont_actions);
2022 
2023 static struct action_kw_list tcp_res_cont_actions = {ILH, {
2024 	{ "silent-drop", tcp_parse_silent_drop },
2025 	{ /* END */ }
2026 }};
2027 
2028 INITCALL1(STG_REGISTER, tcp_res_cont_keywords_register, &tcp_res_cont_actions);
2029 
2030 static struct action_kw_list http_req_actions = {ILH, {
2031 	{ "silent-drop",  tcp_parse_silent_drop },
2032 	{ "set-src",      tcp_parse_set_src_dst },
2033 	{ "set-src-port", tcp_parse_set_src_dst },
2034 	{ "set-dst",      tcp_parse_set_src_dst },
2035 	{ "set-dst-port", tcp_parse_set_src_dst },
2036 	{ /* END */ }
2037 }};
2038 
2039 INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_actions);
2040 
2041 static struct action_kw_list http_res_actions = {ILH, {
2042 	{ "silent-drop", tcp_parse_silent_drop },
2043 	{ /* END */ }
2044 }};
2045 
2046 INITCALL1(STG_REGISTER, http_res_keywords_register, &http_res_actions);
2047 
2048 REGISTER_BUILD_OPTS("Built with transparent proxy support using:"
2049 #if defined(IP_TRANSPARENT)
2050 		    " IP_TRANSPARENT"
2051 #endif
2052 #if defined(IPV6_TRANSPARENT)
2053 		    " IPV6_TRANSPARENT"
2054 #endif
2055 #if defined(IP_FREEBIND)
2056 		    " IP_FREEBIND"
2057 #endif
2058 #if defined(IP_BINDANY)
2059 		    " IP_BINDANY"
2060 #endif
2061 #if defined(IPV6_BINDANY)
2062 		    " IPV6_BINDANY"
2063 #endif
2064 #if defined(SO_BINDANY)
2065 		    " SO_BINDANY"
2066 #endif
2067 		    "");
2068 
2069 
2070 /*
2071  * Local variables:
2072  *  c-indent-level: 8
2073  *  c-basic-offset: 8
2074  * End:
2075  */
2076