1 /*
2  * AF_INET/AF_INET6 SOCK_STREAM protocol layer (tcp)
3  *
4  * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 /* this is to have tcp_info defined on systems using musl
14  * library, such as Alpine Linux
15  */
16 #define _GNU_SOURCE
17 
18 #include <ctype.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <time.h>
25 
26 #include <sys/param.h>
27 #include <sys/socket.h>
28 #include <sys/stat.h>
29 #include <sys/types.h>
30 #include <sys/un.h>
31 
32 #include <netinet/tcp.h>
33 #include <netinet/in.h>
34 
35 #include <common/compat.h>
36 #include <common/config.h>
37 #include <common/debug.h>
38 #include <common/errors.h>
39 #include <common/mini-clist.h>
40 #include <common/standard.h>
41 #include <common/namespace.h>
42 
43 #include <types/action.h>
44 #include <types/connection.h>
45 #include <types/global.h>
46 #include <types/stream.h>
47 
48 #include <proto/arg.h>
49 #include <proto/channel.h>
50 #include <proto/connection.h>
51 #include <proto/fd.h>
52 #include <proto/listener.h>
53 #include <proto/log.h>
54 #include <proto/port_range.h>
55 #include <proto/protocol.h>
56 #include <proto/proto_http.h>
57 #include <proto/proto_tcp.h>
58 #include <proto/proxy.h>
59 #include <proto/sample.h>
60 #include <proto/server.h>
61 #include <proto/task.h>
62 #include <proto/tcp_rules.h>
63 
64 static int tcp_bind_listeners(struct protocol *proto, char *errmsg, int errlen);
65 static int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen);
66 
67 /* Note: must not be declared <const> as its list will be overwritten */
68 static struct protocol proto_tcpv4 = {
69 	.name = "tcpv4",
70 	.sock_domain = AF_INET,
71 	.sock_type = SOCK_STREAM,
72 	.sock_prot = IPPROTO_TCP,
73 	.sock_family = AF_INET,
74 	.sock_addrlen = sizeof(struct sockaddr_in),
75 	.l3_addrlen = 32/8,
76 	.accept = &listener_accept,
77 	.connect = tcp_connect_server,
78 	.bind = tcp_bind_listener,
79 	.bind_all = tcp_bind_listeners,
80 	.unbind_all = unbind_all_listeners,
81 	.enable_all = enable_all_listeners,
82 	.get_src = tcp_get_src,
83 	.get_dst = tcp_get_dst,
84 	.drain = tcp_drain,
85 	.pause = tcp_pause_listener,
86 	.listeners = LIST_HEAD_INIT(proto_tcpv4.listeners),
87 	.nb_listeners = 0,
88 };
89 
90 /* Note: must not be declared <const> as its list will be overwritten */
91 static struct protocol proto_tcpv6 = {
92 	.name = "tcpv6",
93 	.sock_domain = AF_INET6,
94 	.sock_type = SOCK_STREAM,
95 	.sock_prot = IPPROTO_TCP,
96 	.sock_family = AF_INET6,
97 	.sock_addrlen = sizeof(struct sockaddr_in6),
98 	.l3_addrlen = 128/8,
99 	.accept = &listener_accept,
100 	.connect = tcp_connect_server,
101 	.bind = tcp_bind_listener,
102 	.bind_all = tcp_bind_listeners,
103 	.unbind_all = unbind_all_listeners,
104 	.enable_all = enable_all_listeners,
105 	.get_src = tcp_get_src,
106 	.get_dst = tcp_get_dst,
107 	.drain = tcp_drain,
108 	.pause = tcp_pause_listener,
109 	.listeners = LIST_HEAD_INIT(proto_tcpv6.listeners),
110 	.nb_listeners = 0,
111 };
112 
113 /* Binds ipv4/ipv6 address <local> to socket <fd>, unless <flags> is set, in which
114  * case we try to bind <remote>. <flags> is a 2-bit field consisting of :
115  *  - 0 : ignore remote address (may even be a NULL pointer)
116  *  - 1 : use provided address
117  *  - 2 : use provided port
118  *  - 3 : use both
119  *
120  * The function supports multiple foreign binding methods :
121  *   - linux_tproxy: we directly bind to the foreign address
122  * The second one can be used as a fallback for the first one.
123  * This function returns 0 when everything's OK, 1 if it could not bind, to the
124  * local address, 2 if it could not bind to the foreign address.
125  */
tcp_bind_socket(int fd,int flags,struct sockaddr_storage * local,struct sockaddr_storage * remote)126 int tcp_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct sockaddr_storage *remote)
127 {
128 	struct sockaddr_storage bind_addr;
129 	int foreign_ok = 0;
130 	int ret;
131 	static int ip_transp_working = 1;
132 	static int ip6_transp_working = 1;
133 
134 	switch (local->ss_family) {
135 	case AF_INET:
136 		if (flags && ip_transp_working) {
137 			/* This deserves some explanation. Some platforms will support
138 			 * multiple combinations of certain methods, so we try the
139 			 * supported ones until one succeeds.
140 			 */
141 			if (0
142 #if defined(IP_TRANSPARENT)
143 			    || (setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)) == 0)
144 #endif
145 #if defined(IP_FREEBIND)
146 			    || (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == 0)
147 #endif
148 #if defined(IP_BINDANY)
149 			    || (setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == 0)
150 #endif
151 #if defined(SO_BINDANY)
152 			    || (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0)
153 #endif
154 			    )
155 				foreign_ok = 1;
156 			else
157 				ip_transp_working = 0;
158 		}
159 		break;
160 	case AF_INET6:
161 		if (flags && ip6_transp_working) {
162 			if (0
163 #if defined(IPV6_TRANSPARENT) && defined(SOL_IPV6)
164 			    || (setsockopt(fd, SOL_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == 0)
165 #endif
166 #if defined(IP_FREEBIND)
167 			    || (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == 0)
168 #endif
169 #if defined(IPV6_BINDANY)
170 			    || (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == 0)
171 #endif
172 #if defined(SO_BINDANY)
173 			    || (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0)
174 #endif
175 			    )
176 				foreign_ok = 1;
177 			else
178 				ip6_transp_working = 0;
179 		}
180 		break;
181 	}
182 
183 	if (flags) {
184 		memset(&bind_addr, 0, sizeof(bind_addr));
185 		bind_addr.ss_family = remote->ss_family;
186 		switch (remote->ss_family) {
187 		case AF_INET:
188 			if (flags & 1)
189 				((struct sockaddr_in *)&bind_addr)->sin_addr = ((struct sockaddr_in *)remote)->sin_addr;
190 			if (flags & 2)
191 				((struct sockaddr_in *)&bind_addr)->sin_port = ((struct sockaddr_in *)remote)->sin_port;
192 			break;
193 		case AF_INET6:
194 			if (flags & 1)
195 				((struct sockaddr_in6 *)&bind_addr)->sin6_addr = ((struct sockaddr_in6 *)remote)->sin6_addr;
196 			if (flags & 2)
197 				((struct sockaddr_in6 *)&bind_addr)->sin6_port = ((struct sockaddr_in6 *)remote)->sin6_port;
198 			break;
199 		default:
200 			/* we don't want to try to bind to an unknown address family */
201 			foreign_ok = 0;
202 		}
203 	}
204 
205 	setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
206 	if (foreign_ok) {
207 		if (is_inet_addr(&bind_addr)) {
208 			ret = bind(fd, (struct sockaddr *)&bind_addr, get_addr_len(&bind_addr));
209 			if (ret < 0)
210 				return 2;
211 		}
212 	}
213 	else {
214 		if (is_inet_addr(local)) {
215 			ret = bind(fd, (struct sockaddr *)local, get_addr_len(local));
216 			if (ret < 0)
217 				return 1;
218 		}
219 	}
220 
221 	if (!flags)
222 		return 0;
223 
224 	if (!foreign_ok)
225 		/* we could not bind to a foreign address */
226 		return 2;
227 
228 	return 0;
229 }
230 
create_server_socket(struct connection * conn)231 static int create_server_socket(struct connection *conn)
232 {
233 	const struct netns_entry *ns = NULL;
234 
235 #ifdef CONFIG_HAP_NS
236 	if (objt_server(conn->target)) {
237 		if (__objt_server(conn->target)->flags & SRV_F_USE_NS_FROM_PP)
238 			ns = conn->proxy_netns;
239 		else
240 			ns = __objt_server(conn->target)->netns;
241 	}
242 #endif
243 	return my_socketat(ns, conn->addr.to.ss_family, SOCK_STREAM, IPPROTO_TCP);
244 }
245 
246 /*
247  * This function initiates a TCP connection establishment to the target assigned
248  * to connection <conn> using (si->{target,addr.to}). A source address may be
249  * pointed to by conn->addr.from in case of transparent proxying. Normal source
250  * bind addresses are still determined locally (due to the possible need of a
251  * source port). conn->target may point either to a valid server or to a backend,
252  * depending on conn->target. Only OBJ_TYPE_PROXY and OBJ_TYPE_SERVER are
253  * supported. The <data> parameter is a boolean indicating whether there are data
254  * waiting for being sent or not, in order to adjust data write polling and on
255  * some platforms, the ability to avoid an empty initial ACK. The <delack> argument
256  * allows the caller to force using a delayed ACK when establishing the connection :
257  *   - 0 = no delayed ACK unless data are advertised and backend has tcp-smart-connect
258  *   - 1 = delayed ACK if backend has tcp-smart-connect, regardless of data
259  *   - 2 = delayed ACK regardless of backend options
260  *
261  * Note that a pending send_proxy message accounts for data.
262  *
263  * It can return one of :
264  *  - SF_ERR_NONE if everything's OK
265  *  - SF_ERR_SRVTO if there are no more servers
266  *  - SF_ERR_SRVCL if the connection was refused by the server
267  *  - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
268  *  - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
269  *  - SF_ERR_INTERNAL for any other purely internal errors
270  * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
271  *
272  * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise
273  * it's invalid and the caller has nothing to do.
274  */
275 
tcp_connect_server(struct connection * conn,int data,int delack)276 int tcp_connect_server(struct connection *conn, int data, int delack)
277 {
278 	int fd;
279 	struct server *srv;
280 	struct proxy *be;
281 	struct conn_src *src;
282 
283 	conn->flags = CO_FL_WAIT_L4_CONN; /* connection in progress */
284 
285 	switch (obj_type(conn->target)) {
286 	case OBJ_TYPE_PROXY:
287 		be = objt_proxy(conn->target);
288 		srv = NULL;
289 		break;
290 	case OBJ_TYPE_SERVER:
291 		srv = objt_server(conn->target);
292 		be = srv->proxy;
293 		break;
294 	default:
295 		conn->flags |= CO_FL_ERROR;
296 		return SF_ERR_INTERNAL;
297 	}
298 
299 	fd = conn->t.sock.fd = create_server_socket(conn);
300 
301 	if (fd == -1) {
302 		qfprintf(stderr, "Cannot get a server socket.\n");
303 
304 		if (errno == ENFILE) {
305 			conn->err_code = CO_ER_SYS_FDLIM;
306 			send_log(be, LOG_EMERG,
307 				 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
308 				 be->id, maxfd);
309 		}
310 		else if (errno == EMFILE) {
311 			conn->err_code = CO_ER_PROC_FDLIM;
312 			send_log(be, LOG_EMERG,
313 				 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
314 				 be->id, maxfd);
315 		}
316 		else if (errno == ENOBUFS || errno == ENOMEM) {
317 			conn->err_code = CO_ER_SYS_MEMLIM;
318 			send_log(be, LOG_EMERG,
319 				 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
320 				 be->id, maxfd);
321 		}
322 		else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
323 			conn->err_code = CO_ER_NOPROTO;
324 		}
325 		else
326 			conn->err_code = CO_ER_SOCK_ERR;
327 
328 		/* this is a resource error */
329 		conn->flags |= CO_FL_ERROR;
330 		return SF_ERR_RESOURCE;
331 	}
332 
333 	if (fd >= global.maxsock) {
334 		/* do not log anything there, it's a normal condition when this option
335 		 * is used to serialize connections to a server !
336 		 */
337 		Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
338 		close(fd);
339 		conn->err_code = CO_ER_CONF_FDLIM;
340 		conn->flags |= CO_FL_ERROR;
341 		return SF_ERR_PRXCOND; /* it is a configuration limit */
342 	}
343 
344 	if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
345 	    (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) == -1)) {
346 		qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
347 		close(fd);
348 		conn->err_code = CO_ER_SOCK_ERR;
349 		conn->flags |= CO_FL_ERROR;
350 		return SF_ERR_INTERNAL;
351 	}
352 
353 	if (be->options & PR_O_TCP_SRV_KA)
354 		setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one));
355 
356 	/* allow specific binding :
357 	 * - server-specific at first
358 	 * - proxy-specific next
359 	 */
360 	if (srv && srv->conn_src.opts & CO_SRC_BIND)
361 		src = &srv->conn_src;
362 	else if (be->conn_src.opts & CO_SRC_BIND)
363 		src = &be->conn_src;
364 	else
365 		src = NULL;
366 
367 	if (src) {
368 		int ret, flags = 0;
369 
370 		if (is_inet_addr(&conn->addr.from)) {
371 			switch (src->opts & CO_SRC_TPROXY_MASK) {
372 			case CO_SRC_TPROXY_CLI:
373 				conn->flags |= CO_FL_PRIVATE;
374 				/* fall through */
375 			case CO_SRC_TPROXY_ADDR:
376 				flags = 3;
377 				break;
378 			case CO_SRC_TPROXY_CIP:
379 			case CO_SRC_TPROXY_DYN:
380 				conn->flags |= CO_FL_PRIVATE;
381 				flags = 1;
382 				break;
383 			}
384 		}
385 
386 #ifdef SO_BINDTODEVICE
387 		/* Note: this might fail if not CAP_NET_RAW */
388 		if (src->iface_name)
389 			setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, src->iface_name, src->iface_len + 1);
390 #endif
391 
392 		if (src->sport_range) {
393 			int attempts = 10; /* should be more than enough to find a spare port */
394 			struct sockaddr_storage sa;
395 
396 			ret = 1;
397 			memcpy(&sa, &src->source_addr, sizeof(sa));
398 
399 			do {
400 				/* note: in case of retry, we may have to release a previously
401 				 * allocated port, hence this loop's construct.
402 				 */
403 				port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
404 				fdinfo[fd].port_range = NULL;
405 
406 				if (!attempts)
407 					break;
408 				attempts--;
409 
410 				fdinfo[fd].local_port = port_range_alloc_port(src->sport_range);
411 				if (!fdinfo[fd].local_port) {
412 					conn->err_code = CO_ER_PORT_RANGE;
413 					break;
414 				}
415 
416 				fdinfo[fd].port_range = src->sport_range;
417 				set_host_port(&sa, fdinfo[fd].local_port);
418 
419 				ret = tcp_bind_socket(fd, flags, &sa, &conn->addr.from);
420 				if (ret != 0)
421 					conn->err_code = CO_ER_CANT_BIND;
422 			} while (ret != 0); /* binding NOK */
423 		}
424 		else {
425 #ifdef IP_BIND_ADDRESS_NO_PORT
426 			static int bind_address_no_port = 1;
427 			setsockopt(fd, SOL_IP, IP_BIND_ADDRESS_NO_PORT, (const void *) &bind_address_no_port, sizeof(int));
428 #endif
429 			ret = tcp_bind_socket(fd, flags, &src->source_addr, &conn->addr.from);
430 			if (ret != 0)
431 				conn->err_code = CO_ER_CANT_BIND;
432 		}
433 
434 		if (unlikely(ret != 0)) {
435 			port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
436 			fdinfo[fd].port_range = NULL;
437 			close(fd);
438 
439 			if (ret == 1) {
440 				Alert("Cannot bind to source address before connect() for backend %s. Aborting.\n",
441 				      be->id);
442 				send_log(be, LOG_EMERG,
443 					 "Cannot bind to source address before connect() for backend %s.\n",
444 					 be->id);
445 			} else {
446 				Alert("Cannot bind to tproxy source address before connect() for backend %s. Aborting.\n",
447 				      be->id);
448 				send_log(be, LOG_EMERG,
449 					 "Cannot bind to tproxy source address before connect() for backend %s.\n",
450 					 be->id);
451 			}
452 			conn->flags |= CO_FL_ERROR;
453 			return SF_ERR_RESOURCE;
454 		}
455 	}
456 
457 #if defined(TCP_QUICKACK)
458 	/* disabling tcp quick ack now allows the first request to leave the
459 	 * machine with the first ACK. We only do this if there are pending
460 	 * data in the buffer.
461 	 */
462 	if (delack == 2 || ((delack || data || conn->send_proxy_ofs) && (be->options2 & PR_O2_SMARTCON)))
463                 setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &zero, sizeof(zero));
464 #endif
465 
466 #ifdef TCP_USER_TIMEOUT
467 	/* there is not much more we can do here when it fails, it's still minor */
468 	if (srv && srv->tcp_ut)
469 		setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &srv->tcp_ut, sizeof(srv->tcp_ut));
470 #endif
471 	if (global.tune.server_sndbuf)
472                 setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf));
473 
474 	if (global.tune.server_rcvbuf)
475                 setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
476 
477 	if (connect(fd, (struct sockaddr *)&conn->addr.to, get_addr_len(&conn->addr.to)) == -1) {
478 		if (errno == EINPROGRESS || errno == EALREADY) {
479 			/* common case, let's wait for connect status */
480 			conn->flags |= CO_FL_WAIT_L4_CONN;
481 		}
482 		else if (errno == EISCONN) {
483 			/* should normally not happen but if so, indicates that it's OK */
484 			conn->flags &= ~CO_FL_WAIT_L4_CONN;
485 		}
486 		else if (errno == EAGAIN || errno == EADDRINUSE || errno == EADDRNOTAVAIL) {
487 			char *msg;
488 			if (errno == EAGAIN || errno == EADDRNOTAVAIL) {
489 				msg = "no free ports";
490 				conn->err_code = CO_ER_FREE_PORTS;
491 			}
492 			else {
493 				msg = "local address already in use";
494 				conn->err_code = CO_ER_ADDR_INUSE;
495 			}
496 
497 			qfprintf(stderr,"Connect() failed for backend %s: %s.\n", be->id, msg);
498 			port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
499 			fdinfo[fd].port_range = NULL;
500 			close(fd);
501 			send_log(be, LOG_ERR, "Connect() failed for backend %s: %s.\n", be->id, msg);
502 			conn->flags |= CO_FL_ERROR;
503 			return SF_ERR_RESOURCE;
504 		} else if (errno == ETIMEDOUT) {
505 			//qfprintf(stderr,"Connect(): ETIMEDOUT");
506 			port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
507 			fdinfo[fd].port_range = NULL;
508 			close(fd);
509 			conn->err_code = CO_ER_SOCK_ERR;
510 			conn->flags |= CO_FL_ERROR;
511 			return SF_ERR_SRVTO;
512 		} else {
513 			// (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
514 			//qfprintf(stderr,"Connect(): %d", errno);
515 			port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
516 			fdinfo[fd].port_range = NULL;
517 			close(fd);
518 			conn->err_code = CO_ER_SOCK_ERR;
519 			conn->flags |= CO_FL_ERROR;
520 			return SF_ERR_SRVCL;
521 		}
522 	}
523 	else {
524 		/* connect() == 0, this is great! */
525 		conn->flags &= ~CO_FL_WAIT_L4_CONN;
526 	}
527 
528 	conn->flags |= CO_FL_ADDR_TO_SET;
529 
530 	/* Prepare to send a few handshakes related to the on-wire protocol. */
531 	if (conn->send_proxy_ofs)
532 		conn->flags |= CO_FL_SEND_PROXY;
533 
534 	conn_ctrl_init(conn);       /* registers the FD */
535 	fdtab[fd].linger_risk = 1;  /* close hard if needed */
536 
537 	if (conn_xprt_init(conn) < 0) {
538 		conn_force_close(conn);
539 		conn->flags |= CO_FL_ERROR;
540 		return SF_ERR_RESOURCE;
541 	}
542 
543 	if (conn->flags & (CO_FL_HANDSHAKE | CO_FL_WAIT_L4_CONN)) {
544 		conn_sock_want_send(conn);  /* for connect status, proxy protocol or SSL */
545 	}
546 	else {
547 		/* If there's no more handshake, we need to notify the data
548 		 * layer when the connection is already OK otherwise we'll have
549 		 * no other opportunity to do it later (eg: health checks).
550 		 */
551 		data = 1;
552 	}
553 
554 	if (data)
555 		conn_data_want_send(conn);  /* prepare to send data if any */
556 
557 	return SF_ERR_NONE;  /* connection is OK */
558 }
559 
560 
561 /*
562  * Retrieves the source address for the socket <fd>, with <dir> indicating
563  * if we're a listener (=0) or an initiator (!=0). It returns 0 in case of
564  * success, -1 in case of error. The socket's source address is stored in
565  * <sa> for <salen> bytes.
566  */
tcp_get_src(int fd,struct sockaddr * sa,socklen_t salen,int dir)567 int tcp_get_src(int fd, struct sockaddr *sa, socklen_t salen, int dir)
568 {
569 	if (dir)
570 		return getsockname(fd, sa, &salen);
571 	else
572 		return getpeername(fd, sa, &salen);
573 }
574 
575 
576 /*
577  * Retrieves the original destination address for the socket <fd>, with <dir>
578  * indicating if we're a listener (=0) or an initiator (!=0). In the case of a
579  * listener, if the original destination address was translated, the original
580  * address is retrieved. It returns 0 in case of success, -1 in case of error.
581  * The socket's source address is stored in <sa> for <salen> bytes.
582  */
tcp_get_dst(int fd,struct sockaddr * sa,socklen_t salen,int dir)583 int tcp_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir)
584 {
585 	if (dir)
586 		return getpeername(fd, sa, &salen);
587 	else {
588 		int ret = getsockname(fd, sa, &salen);
589 
590 		if (ret < 0)
591 			return ret;
592 
593 #if defined(TPROXY) && defined(SO_ORIGINAL_DST)
594 		/* For TPROXY and Netfilter's NAT, we can retrieve the original
595 		 * IPv4 address before DNAT/REDIRECT. We must not do that with
596 		 * other families because v6-mapped IPv4 addresses are still
597 		 * reported as v4.
598 		 */
599 		if (((struct sockaddr_storage *)sa)->ss_family == AF_INET
600 		    && getsockopt(fd, SOL_IP, SO_ORIGINAL_DST, sa, &salen) == 0)
601 			return 0;
602 #endif
603 		return ret;
604 	}
605 }
606 
607 /* Tries to drain any pending incoming data from the socket to reach the
608  * receive shutdown. Returns positive if the shutdown was found, negative
609  * if EAGAIN was hit, otherwise zero. This is useful to decide whether we
610  * can close a connection cleanly are we must kill it hard.
611  */
tcp_drain(int fd)612 int tcp_drain(int fd)
613 {
614 	int turns = 2;
615 	int len;
616 
617 	while (turns) {
618 #ifdef MSG_TRUNC_CLEARS_INPUT
619 		len = recv(fd, NULL, INT_MAX, MSG_DONTWAIT | MSG_NOSIGNAL | MSG_TRUNC);
620 		if (len == -1 && errno == EFAULT)
621 #endif
622 			len = recv(fd, trash.str, trash.size, MSG_DONTWAIT | MSG_NOSIGNAL);
623 
624 		if (len == 0) {
625 			/* cool, shutdown received */
626 			fdtab[fd].linger_risk = 0;
627 			return 1;
628 		}
629 
630 		if (len < 0) {
631 			if (errno == EAGAIN) {
632 				/* connection not closed yet */
633 				fd_cant_recv(fd);
634 				return -1;
635 			}
636 			if (errno == EINTR)  /* oops, try again */
637 				continue;
638 			/* other errors indicate a dead connection, fine. */
639 			fdtab[fd].linger_risk = 0;
640 			return 1;
641 		}
642 		/* OK we read some data, let's try again once */
643 		turns--;
644 	}
645 	/* some data are still present, give up */
646 	return 0;
647 }
648 
649 /* This is the callback which is set when a connection establishment is pending
650  * and we have nothing to send. It updates the FD polling status. It returns 0
651  * if it fails in a fatal way or needs to poll to go further, otherwise it
652  * returns non-zero and removes the CO_FL_WAIT_L4_CONN flag from the connection's
653  * flags. In case of error, it sets CO_FL_ERROR and leaves the error code in
654  * errno. The error checking is done in two passes in order to limit the number
655  * of syscalls in the normal case :
656  *   - if POLL_ERR was reported by the poller, we check for a pending error on
657  *     the socket before proceeding. If found, it's assigned to errno so that
658  *     upper layers can see it.
659  *   - otherwise connect() is used to check the connection state again, since
660  *     the getsockopt return cannot reliably be used to know if the connection
661  *     is still pending or ready. This one may often return an error as well,
662  *     since we don't always have POLL_ERR (eg: OSX or cached events).
663  */
tcp_connect_probe(struct connection * conn)664 int tcp_connect_probe(struct connection *conn)
665 {
666 	int fd = conn->t.sock.fd;
667 	socklen_t lskerr;
668 	int skerr;
669 
670 	if (conn->flags & CO_FL_ERROR)
671 		return 0;
672 
673 	if (!conn_ctrl_ready(conn))
674 		return 0;
675 
676 	if (!(conn->flags & CO_FL_WAIT_L4_CONN))
677 		return 1; /* strange we were called while ready */
678 
679 	if (!fd_send_ready(fd))
680 		return 0;
681 
682 	/* we might be the first witness of FD_POLL_ERR. Note that FD_POLL_HUP
683 	 * without FD_POLL_IN also indicates a hangup without input data meaning
684 	 * there was no connection.
685 	 */
686 	if (fdtab[fd].ev & FD_POLL_ERR ||
687 	    (fdtab[fd].ev & (FD_POLL_IN|FD_POLL_HUP)) == FD_POLL_HUP) {
688 		skerr = 0;
689 		lskerr = sizeof(skerr);
690 		getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
691 		errno = skerr;
692 		if (errno == EAGAIN)
693 			errno = 0;
694 		if (errno)
695 			goto out_error;
696 	}
697 
698 	/* Use connect() to check the state of the socket. This has the
699 	 * advantage of giving us the following info :
700 	 *  - error
701 	 *  - connecting (EALREADY, EINPROGRESS)
702 	 *  - connected (EISCONN, 0)
703 	 */
704 	if (connect(fd, (struct sockaddr *)&conn->addr.to, get_addr_len(&conn->addr.to)) < 0) {
705 		if (errno == EALREADY || errno == EINPROGRESS) {
706 			__conn_sock_stop_recv(conn);
707 			fd_cant_send(fd);
708 			return 0;
709 		}
710 
711 		if (errno && errno != EISCONN)
712 			goto out_error;
713 
714 		/* otherwise we're connected */
715 	}
716 
717 	/* The FD is ready now, we'll mark the connection as complete and
718 	 * forward the event to the transport layer which will notify the
719 	 * data layer.
720 	 */
721 	conn->flags &= ~CO_FL_WAIT_L4_CONN;
722 	return 1;
723 
724  out_error:
725 	/* Write error on the file descriptor. Report it to the connection
726 	 * and disable polling on this FD.
727 	 */
728 	fdtab[fd].linger_risk = 0;
729 	conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
730 	__conn_sock_stop_both(conn);
731 	return 0;
732 }
733 
734 
735 /* This function tries to bind a TCPv4/v6 listener. It may return a warning or
736  * an error message in <errmsg> if the message is at most <errlen> bytes long
737  * (including '\0'). Note that <errmsg> may be NULL if <errlen> is also zero.
738  * The return value is composed from ERR_ABORT, ERR_WARN,
739  * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
740  * was alright and that no message was returned. ERR_RETRYABLE means that an
741  * error occurred but that it may vanish after a retry (eg: port in use), and
742  * ERR_FATAL indicates a non-fixable error. ERR_WARN and ERR_ALERT do not alter
743  * the meaning of the error, but just indicate that a message is present which
744  * should be displayed with the respective level. Last, ERR_ABORT indicates
745  * that it's pointless to try to start other listeners. No error message is
746  * returned if errlen is NULL.
747  */
tcp_bind_listener(struct listener * listener,char * errmsg,int errlen)748 int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen)
749 {
750 	__label__ tcp_return, tcp_close_return;
751 	int fd, err;
752 	int ext, ready;
753 	socklen_t ready_len;
754 	const char *msg = NULL;
755 
756 	/* ensure we never return garbage */
757 	if (errlen)
758 		*errmsg = 0;
759 
760 	if (listener->state != LI_ASSIGNED)
761 		return ERR_NONE; /* already bound */
762 
763 	err = ERR_NONE;
764 
765 	/* if the listener already has an fd assigned, then we were offered the
766 	 * fd by an external process (most likely the parent), and we don't want
767 	 * to create a new socket. However we still want to set a few flags on
768 	 * the socket.
769 	 */
770 	fd = listener->fd;
771 	ext = (fd >= 0);
772 
773 	if (!ext) {
774 		fd = my_socketat(listener->netns, listener->addr.ss_family, SOCK_STREAM, IPPROTO_TCP);
775 
776 		if (fd == -1) {
777 			err |= ERR_RETRYABLE | ERR_ALERT;
778 			msg = "cannot create listening socket";
779 			goto tcp_return;
780 		}
781 	}
782 
783 	if (fd >= global.maxsock) {
784 		err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
785 		msg = "not enough free sockets (raise '-n' parameter)";
786 		goto tcp_close_return;
787 	}
788 
789 	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
790 		err |= ERR_FATAL | ERR_ALERT;
791 		msg = "cannot make socket non-blocking";
792 		goto tcp_close_return;
793 	}
794 
795 	if (!ext && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) {
796 		/* not fatal but should be reported */
797 		msg = "cannot do so_reuseaddr";
798 		err |= ERR_ALERT;
799 	}
800 
801 	if (listener->options & LI_O_NOLINGER)
802 		setsockopt(fd, SOL_SOCKET, SO_LINGER, &nolinger, sizeof(struct linger));
803 
804 #ifdef SO_REUSEPORT
805 	/* OpenBSD and Linux 3.9 support this. As it's present in old libc versions of
806 	 * Linux, it might return an error that we will silently ignore.
807 	 */
808 	if (!ext && (global.tune.options & GTUNE_USE_REUSEPORT))
809 		setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
810 #endif
811 
812 	if (!ext && (listener->options & LI_O_FOREIGN)) {
813 		switch (listener->addr.ss_family) {
814 		case AF_INET:
815 			if (1
816 #if defined(IP_TRANSPARENT)
817 			    && (setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)) == -1)
818 #endif
819 #if defined(IP_FREEBIND)
820 			    && (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == -1)
821 #endif
822 #if defined(IP_BINDANY)
823 			    && (setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == -1)
824 #endif
825 #if defined(SO_BINDANY)
826 			    && (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == -1)
827 #endif
828 			    ) {
829 				msg = "cannot make listening socket transparent";
830 				err |= ERR_ALERT;
831 			}
832 		break;
833 		case AF_INET6:
834 			if (1
835 #if defined(IPV6_TRANSPARENT) && defined(SOL_IPV6)
836 			    && (setsockopt(fd, SOL_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == -1)
837 #endif
838 #if defined(IP_FREEBIND)
839 			    && (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == -1)
840 #endif
841 #if defined(IPV6_BINDANY)
842 			    && (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == -1)
843 #endif
844 #if defined(SO_BINDANY)
845 			    && (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == -1)
846 #endif
847 			    ) {
848 				msg = "cannot make listening socket transparent";
849 				err |= ERR_ALERT;
850 			}
851 		break;
852 		}
853 	}
854 
855 #ifdef SO_BINDTODEVICE
856 	/* Note: this might fail if not CAP_NET_RAW */
857 	if (!ext && listener->interface) {
858 		if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
859 			       listener->interface, strlen(listener->interface) + 1) == -1) {
860 			msg = "cannot bind listener to device";
861 			err |= ERR_WARN;
862 		}
863 	}
864 #endif
865 #if defined(TCP_MAXSEG)
866 	if (listener->maxseg > 0) {
867 		if (setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG,
868 			       &listener->maxseg, sizeof(listener->maxseg)) == -1) {
869 			msg = "cannot set MSS";
870 			err |= ERR_WARN;
871 		}
872 	}
873 #endif
874 #if defined(TCP_USER_TIMEOUT)
875 	if (listener->tcp_ut) {
876 		if (setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT,
877 			       &listener->tcp_ut, sizeof(listener->tcp_ut)) == -1) {
878 			msg = "cannot set TCP User Timeout";
879 			err |= ERR_WARN;
880 		}
881 	}
882 #endif
883 #if defined(TCP_DEFER_ACCEPT)
884 	if (listener->options & LI_O_DEF_ACCEPT) {
885 		/* defer accept by up to one second */
886 		int accept_delay = 1;
887 		if (setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &accept_delay, sizeof(accept_delay)) == -1) {
888 			msg = "cannot enable DEFER_ACCEPT";
889 			err |= ERR_WARN;
890 		}
891 	}
892 #endif
893 #if defined(TCP_FASTOPEN)
894 	if (listener->options & LI_O_TCP_FO) {
895 		/* TFO needs a queue length, let's use the configured backlog */
896 		int qlen = listener->backlog ? listener->backlog : listener->maxconn;
897 		if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) {
898 			msg = "cannot enable TCP_FASTOPEN";
899 			err |= ERR_WARN;
900 		}
901 	}
902 #endif
903 #if defined(IPV6_V6ONLY)
904 	if (listener->options & LI_O_V6ONLY)
905                 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one));
906 	else if (listener->options & LI_O_V4V6)
907                 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero));
908 #endif
909 
910 	if (!ext && bind(fd, (struct sockaddr *)&listener->addr, listener->proto->sock_addrlen) == -1) {
911 		err |= ERR_RETRYABLE | ERR_ALERT;
912 		msg = "cannot bind socket";
913 		goto tcp_close_return;
914 	}
915 
916 	ready = 0;
917 	ready_len = sizeof(ready);
918 	if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &ready, &ready_len) == -1)
919 		ready = 0;
920 
921 	if (!(ext && ready) && /* only listen if not already done by external process */
922 	    listen(fd, listener->backlog ? listener->backlog : listener->maxconn) == -1) {
923 		err |= ERR_RETRYABLE | ERR_ALERT;
924 		msg = "cannot listen to socket";
925 		goto tcp_close_return;
926 	}
927 
928 #if defined(TCP_QUICKACK)
929 	if (listener->options & LI_O_NOQUICKACK)
930 		setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &zero, sizeof(zero));
931 #endif
932 
933 	/* the socket is ready */
934 	listener->fd = fd;
935 	listener->state = LI_LISTEN;
936 
937 	fdtab[fd].owner = listener; /* reference the listener instead of a task */
938 	fdtab[fd].iocb = listener->proto->accept;
939 	fd_insert(fd);
940 
941  tcp_return:
942 	if (msg && errlen) {
943 		char pn[INET6_ADDRSTRLEN];
944 
945 		addr_to_str(&listener->addr, pn, sizeof(pn));
946 		snprintf(errmsg, errlen, "%s [%s:%d]", msg, pn, get_host_port(&listener->addr));
947 	}
948 	return err;
949 
950  tcp_close_return:
951 	close(fd);
952 	goto tcp_return;
953 }
954 
955 /* This function creates all TCP sockets bound to the protocol entry <proto>.
956  * It is intended to be used as the protocol's bind_all() function.
957  * The sockets will be registered but not added to any fd_set, in order not to
958  * loose them across the fork(). A call to enable_all_listeners() is needed
959  * to complete initialization. The return value is composed from ERR_*.
960  */
tcp_bind_listeners(struct protocol * proto,char * errmsg,int errlen)961 static int tcp_bind_listeners(struct protocol *proto, char *errmsg, int errlen)
962 {
963 	struct listener *listener;
964 	int err = ERR_NONE;
965 
966 	list_for_each_entry(listener, &proto->listeners, proto_list) {
967 		err |= tcp_bind_listener(listener, errmsg, errlen);
968 		if (err & ERR_ABORT)
969 			break;
970 	}
971 
972 	return err;
973 }
974 
975 /* Add listener to the list of tcpv4 listeners. The listener's state
976  * is automatically updated from LI_INIT to LI_ASSIGNED. The number of
977  * listeners is updated. This is the function to use to add a new listener.
978  */
tcpv4_add_listener(struct listener * listener)979 void tcpv4_add_listener(struct listener *listener)
980 {
981 	if (listener->state != LI_INIT)
982 		return;
983 	listener->state = LI_ASSIGNED;
984 	listener->proto = &proto_tcpv4;
985 	LIST_ADDQ(&proto_tcpv4.listeners, &listener->proto_list);
986 	proto_tcpv4.nb_listeners++;
987 }
988 
989 /* Add listener to the list of tcpv4 listeners. The listener's state
990  * is automatically updated from LI_INIT to LI_ASSIGNED. The number of
991  * listeners is updated. This is the function to use to add a new listener.
992  */
tcpv6_add_listener(struct listener * listener)993 void tcpv6_add_listener(struct listener *listener)
994 {
995 	if (listener->state != LI_INIT)
996 		return;
997 	listener->state = LI_ASSIGNED;
998 	listener->proto = &proto_tcpv6;
999 	LIST_ADDQ(&proto_tcpv6.listeners, &listener->proto_list);
1000 	proto_tcpv6.nb_listeners++;
1001 }
1002 
1003 /* Pause a listener. Returns < 0 in case of failure, 0 if the listener
1004  * was totally stopped, or > 0 if correctly paused.
1005  */
tcp_pause_listener(struct listener * l)1006 int tcp_pause_listener(struct listener *l)
1007 {
1008 	if (shutdown(l->fd, SHUT_WR) != 0)
1009 		return -1; /* Solaris dies here */
1010 
1011 	if (listen(l->fd, l->backlog ? l->backlog : l->maxconn) != 0)
1012 		return -1; /* OpenBSD dies here */
1013 
1014 	if (shutdown(l->fd, SHUT_RD) != 0)
1015 		return -1; /* should always be OK */
1016 	return 1;
1017 }
1018 
1019 /*
1020  * Execute the "set-src" action. May be called from {tcp,http}request.
1021  * It only changes the address and tries to preserve the original port. If the
1022  * previous family was neither AF_INET nor AF_INET6, the port is set to zero.
1023  */
tcp_action_req_set_src(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * s,int flags)1024 enum act_return tcp_action_req_set_src(struct act_rule *rule, struct proxy *px,
1025                                               struct session *sess, struct stream *s, int flags)
1026 {
1027 	struct connection *cli_conn;
1028 
1029 	if ((cli_conn = objt_conn(sess->origin)) && conn_ctrl_ready(cli_conn)) {
1030 		struct sample *smp;
1031 
1032 		smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_ADDR);
1033 		if (smp) {
1034 			int port = get_net_port(&cli_conn->addr.from);
1035 
1036 			if (smp->data.type == SMP_T_IPV4) {
1037 				((struct sockaddr_in *)&cli_conn->addr.from)->sin_family = AF_INET;
1038 				((struct sockaddr_in *)&cli_conn->addr.from)->sin_addr.s_addr = smp->data.u.ipv4.s_addr;
1039 				((struct sockaddr_in *)&cli_conn->addr.from)->sin_port = port;
1040 			} else if (smp->data.type == SMP_T_IPV6) {
1041 				((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_family = AF_INET6;
1042 				memcpy(&((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_addr, &smp->data.u.ipv6, sizeof(struct in6_addr));
1043 				((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_port = port;
1044 			}
1045 		}
1046 		cli_conn->flags |= CO_FL_ADDR_FROM_SET;
1047 	}
1048 	return ACT_RET_CONT;
1049 }
1050 
1051 /*
1052  * Execute the "set-dst" action. May be called from {tcp,http}request.
1053  * It only changes the address and tries to preserve the original port. If the
1054  * previous family was neither AF_INET nor AF_INET6, the port is set to zero.
1055  */
tcp_action_req_set_dst(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * s,int flags)1056 enum act_return tcp_action_req_set_dst(struct act_rule *rule, struct proxy *px,
1057                                               struct session *sess, struct stream *s, int flags)
1058 {
1059 	struct connection *cli_conn;
1060 
1061 	if ((cli_conn = objt_conn(sess->origin)) && conn_ctrl_ready(cli_conn)) {
1062 		struct sample *smp;
1063 
1064 		smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_ADDR);
1065 		if (smp) {
1066 			int port = get_net_port(&cli_conn->addr.to);
1067 
1068 			if (smp->data.type == SMP_T_IPV4) {
1069 				((struct sockaddr_in *)&cli_conn->addr.to)->sin_family = AF_INET;
1070 				((struct sockaddr_in *)&cli_conn->addr.to)->sin_addr.s_addr = smp->data.u.ipv4.s_addr;
1071 				((struct sockaddr_in *)&cli_conn->addr.to)->sin_port = port;
1072 			} else if (smp->data.type == SMP_T_IPV6) {
1073 				((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_family = AF_INET6;
1074 				memcpy(&((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_addr, &smp->data.u.ipv6, sizeof(struct in6_addr));
1075 				((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_port = port;
1076 			}
1077 			cli_conn->flags |= CO_FL_ADDR_TO_SET;
1078 		}
1079 	}
1080 	return ACT_RET_CONT;
1081 }
1082 
1083 /*
1084  * Execute the "set-src-port" action. May be called from {tcp,http}request.
1085  * We must test the sin_family before setting the port. If the address family
1086  * is neither AF_INET nor AF_INET6, the address is forced to AF_INET "0.0.0.0"
1087  * and the port is assigned.
1088  */
tcp_action_req_set_src_port(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * s,int flags)1089 enum act_return tcp_action_req_set_src_port(struct act_rule *rule, struct proxy *px,
1090                                               struct session *sess, struct stream *s, int flags)
1091 {
1092 	struct connection *cli_conn;
1093 
1094 	if ((cli_conn = objt_conn(sess->origin)) && conn_ctrl_ready(cli_conn)) {
1095 		struct sample *smp;
1096 
1097 		conn_get_from_addr(cli_conn);
1098 
1099 		smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
1100 		if (smp) {
1101 			if (cli_conn->addr.from.ss_family == AF_INET6) {
1102 				((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_port = htons(smp->data.u.sint);
1103 			} else {
1104 				if (cli_conn->addr.from.ss_family != AF_INET) {
1105 					cli_conn->addr.from.ss_family = AF_INET;
1106 					((struct sockaddr_in *)&cli_conn->addr.from)->sin_addr.s_addr = 0;
1107 				}
1108 				((struct sockaddr_in *)&cli_conn->addr.from)->sin_port = htons(smp->data.u.sint);
1109 			}
1110 		}
1111 	}
1112 	return ACT_RET_CONT;
1113 }
1114 
1115 /*
1116  * Execute the "set-dst-port" action. May be called from {tcp,http}request.
1117  * We must test the sin_family before setting the port. If the address family
1118  * is neither AF_INET nor AF_INET6, the address is forced to AF_INET "0.0.0.0"
1119  * and the port is assigned.
1120  */
tcp_action_req_set_dst_port(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * s,int flags)1121 enum act_return tcp_action_req_set_dst_port(struct act_rule *rule, struct proxy *px,
1122                                               struct session *sess, struct stream *s, int flags)
1123 {
1124 	struct connection *cli_conn;
1125 
1126 	if ((cli_conn = objt_conn(sess->origin)) && conn_ctrl_ready(cli_conn)) {
1127 		struct sample *smp;
1128 
1129 		conn_get_to_addr(cli_conn);
1130 
1131 		smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
1132 		if (smp) {
1133 			if (cli_conn->addr.to.ss_family == AF_INET6) {
1134 				((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_port = htons(smp->data.u.sint);
1135 			} else {
1136 				if (cli_conn->addr.to.ss_family != AF_INET) {
1137 					cli_conn->addr.to.ss_family = AF_INET;
1138 					((struct sockaddr_in *)&cli_conn->addr.to)->sin_addr.s_addr = 0;
1139 				}
1140 				((struct sockaddr_in *)&cli_conn->addr.to)->sin_port = htons(smp->data.u.sint);
1141 			}
1142 		}
1143 	}
1144 	return ACT_RET_CONT;
1145 }
1146 
1147 /* Executes the "silent-drop" action. May be called from {tcp,http}{request,response} */
tcp_exec_action_silent_drop(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * strm,int flags)1148 static enum act_return tcp_exec_action_silent_drop(struct act_rule *rule, struct proxy *px, struct session *sess, struct stream *strm, int flags)
1149 {
1150 	struct connection *conn = objt_conn(sess->origin);
1151 
1152 	if (!conn)
1153 		goto out;
1154 
1155 	if (!conn_ctrl_ready(conn))
1156 		goto out;
1157 
1158 #ifdef TCP_QUICKACK
1159 	/* drain is needed only to send the quick ACK */
1160 	conn_sock_drain(conn);
1161 
1162 	/* re-enable quickack if it was disabled to ack all data and avoid
1163 	 * retransmits from the client that might trigger a real reset.
1164 	 */
1165 	setsockopt(conn->t.sock.fd, SOL_TCP, TCP_QUICKACK, &one, sizeof(one));
1166 #endif
1167 	/* lingering must absolutely be disabled so that we don't send a
1168 	 * shutdown(), this is critical to the TCP_REPAIR trick. When no stream
1169 	 * is present, returning with ERR will cause lingering to be disabled.
1170 	 */
1171 	if (strm)
1172 		strm->si[0].flags |= SI_FL_NOLINGER;
1173 
1174 	/* We're on the client-facing side, we must force to disable lingering to
1175 	 * ensure we will use an RST exclusively and kill any pending data.
1176 	 */
1177 	fdtab[conn->t.sock.fd].linger_risk = 1;
1178 
1179 #ifdef TCP_REPAIR
1180 	if (setsockopt(conn->t.sock.fd, SOL_TCP, TCP_REPAIR, &one, sizeof(one)) == 0) {
1181 		/* socket will be quiet now */
1182 		goto out;
1183 	}
1184 #endif
1185 	/* either TCP_REPAIR is not defined or it failed (eg: permissions).
1186 	 * Let's fall back on the TTL trick, though it only works for routed
1187 	 * network and has no effect on local net.
1188 	 */
1189 #ifdef IP_TTL
1190 	if (conn->addr.from.ss_family == AF_INET)
1191 		setsockopt(conn->t.sock.fd, SOL_IP, IP_TTL, &one, sizeof(one));
1192 #endif
1193 #ifdef IPV6_UNICAST_HOPS
1194 #if defined(SOL_IPV6)
1195 	if (conn->addr.from.ss_family == AF_INET6)
1196 		setsockopt(conn->t.sock.fd, SOL_IPV6, IPV6_UNICAST_HOPS, &one, sizeof(one));
1197 #elif defined(IPPROTO_IPV6)
1198 	if (conn->addr.from.ss_family == AF_INET6)
1199 		setsockopt(conn->t.sock.fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &one, sizeof(one));
1200 #endif
1201 #endif
1202  out:
1203 	/* kill the stream if any */
1204 	if (strm) {
1205 		channel_abort(&strm->req);
1206 		channel_abort(&strm->res);
1207 		strm->req.analysers &= AN_REQ_FLT_END;
1208 		strm->res.analysers &= AN_RES_FLT_END;
1209 		if (strm->flags & SF_BE_ASSIGNED)
1210 			strm->be->be_counters.denied_req++;
1211 		if (!(strm->flags & SF_ERR_MASK))
1212 			strm->flags |= SF_ERR_PRXCOND;
1213 		if (!(strm->flags & SF_FINST_MASK))
1214 			strm->flags |= SF_FINST_R;
1215 	}
1216 
1217 	sess->fe->fe_counters.denied_req++;
1218 	if (sess->listener->counters)
1219 		sess->listener->counters->denied_req++;
1220 
1221 	return ACT_RET_STOP;
1222 }
1223 
1224 /* parse "set-{src,dst}[-port]" action */
tcp_parse_set_src_dst(const char ** args,int * orig_arg,struct proxy * px,struct act_rule * rule,char ** err)1225 enum act_parse_ret tcp_parse_set_src_dst(const char **args, int *orig_arg, struct proxy *px, struct act_rule *rule, char **err)
1226 {
1227 	int cur_arg;
1228 	struct sample_expr *expr;
1229 	unsigned int where;
1230 
1231 	cur_arg = *orig_arg;
1232 	expr = sample_parse_expr((char **)args, &cur_arg, px->conf.args.file, px->conf.args.line, err, &px->conf.args);
1233 	if (!expr)
1234 		return ACT_RET_PRS_ERR;
1235 
1236 	where = 0;
1237 	if (proxy->cap & PR_CAP_FE)
1238 		where |= SMP_VAL_FE_HRQ_HDR;
1239 	if (proxy->cap & PR_CAP_BE)
1240 		where |= SMP_VAL_BE_HRQ_HDR;
1241 
1242 	if (!(expr->fetch->val & where)) {
1243 		memprintf(err,
1244 			  "fetch method '%s' extracts information from '%s', none of which is available here",
1245 			  args[cur_arg-1], sample_src_names(expr->fetch->use));
1246 		free(expr);
1247 		return ACT_RET_PRS_ERR;
1248 	}
1249 	rule->arg.expr = expr;
1250 	rule->action = ACT_CUSTOM;
1251 
1252 	if (!strcmp(args[*orig_arg-1], "set-src")) {
1253 		rule->action_ptr = tcp_action_req_set_src;
1254 	} else if (!strcmp(args[*orig_arg-1], "set-src-port")) {
1255 		rule->action_ptr = tcp_action_req_set_src_port;
1256 	} else if (!strcmp(args[*orig_arg-1], "set-dst")) {
1257 		rule->action_ptr = tcp_action_req_set_dst;
1258 	} else if (!strcmp(args[*orig_arg-1], "set-dst-port")) {
1259 		rule->action_ptr = tcp_action_req_set_dst_port;
1260 	} else {
1261 		return ACT_RET_PRS_ERR;
1262 	}
1263 
1264 	(*orig_arg)++;
1265 
1266 	return ACT_RET_PRS_OK;
1267 }
1268 
1269 
1270 /* Parse a "silent-drop" action. It takes no argument. It returns ACT_RET_PRS_OK on
1271  * success, ACT_RET_PRS_ERR on error.
1272  */
tcp_parse_silent_drop(const char ** args,int * orig_arg,struct proxy * px,struct act_rule * rule,char ** err)1273 static enum act_parse_ret tcp_parse_silent_drop(const char **args, int *orig_arg, struct proxy *px,
1274                                                 struct act_rule *rule, char **err)
1275 {
1276 	rule->action     = ACT_CUSTOM;
1277 	rule->action_ptr = tcp_exec_action_silent_drop;
1278 	return ACT_RET_PRS_OK;
1279 }
1280 
1281 
1282 /************************************************************************/
1283 /*       All supported sample fetch functions must be declared here     */
1284 /************************************************************************/
1285 
1286 /* fetch the connection's source IPv4/IPv6 address */
smp_fetch_src(const struct arg * args,struct sample * smp,const char * kw,void * private)1287 int smp_fetch_src(const struct arg *args, struct sample *smp, const char *kw, void *private)
1288 {
1289 	struct connection *cli_conn = objt_conn(smp->sess->origin);
1290 
1291 	if (!cli_conn)
1292 		return 0;
1293 
1294 	switch (cli_conn->addr.from.ss_family) {
1295 	case AF_INET:
1296 		smp->data.u.ipv4 = ((struct sockaddr_in *)&cli_conn->addr.from)->sin_addr;
1297 		smp->data.type = SMP_T_IPV4;
1298 		break;
1299 	case AF_INET6:
1300 		smp->data.u.ipv6 = ((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_addr;
1301 		smp->data.type = SMP_T_IPV6;
1302 		break;
1303 	default:
1304 		return 0;
1305 	}
1306 
1307 	smp->flags = 0;
1308 	return 1;
1309 }
1310 
1311 /* set temp integer to the connection's source port */
1312 static int
smp_fetch_sport(const struct arg * args,struct sample * smp,const char * k,void * private)1313 smp_fetch_sport(const struct arg *args, struct sample *smp, const char *k, void *private)
1314 {
1315 	struct connection *cli_conn = objt_conn(smp->sess->origin);
1316 
1317 	if (!cli_conn)
1318 		return 0;
1319 
1320 	smp->data.type = SMP_T_SINT;
1321 	if (!(smp->data.u.sint = get_host_port(&cli_conn->addr.from)))
1322 		return 0;
1323 
1324 	smp->flags = 0;
1325 	return 1;
1326 }
1327 
1328 /* fetch the connection's destination IPv4/IPv6 address */
1329 static int
smp_fetch_dst(const struct arg * args,struct sample * smp,const char * kw,void * private)1330 smp_fetch_dst(const struct arg *args, struct sample *smp, const char *kw, void *private)
1331 {
1332 	struct connection *cli_conn = objt_conn(smp->sess->origin);
1333 
1334 	if (!cli_conn)
1335 		return 0;
1336 
1337 	conn_get_to_addr(cli_conn);
1338 
1339 	switch (cli_conn->addr.to.ss_family) {
1340 	case AF_INET:
1341 		smp->data.u.ipv4 = ((struct sockaddr_in *)&cli_conn->addr.to)->sin_addr;
1342 		smp->data.type = SMP_T_IPV4;
1343 		break;
1344 	case AF_INET6:
1345 		smp->data.u.ipv6 = ((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_addr;
1346 		smp->data.type = SMP_T_IPV6;
1347 		break;
1348 	default:
1349 		return 0;
1350 	}
1351 
1352 	smp->flags = 0;
1353 	return 1;
1354 }
1355 
1356 /* check if the destination address of the front connection is local to the
1357  * system or if it was intercepted.
1358  */
smp_fetch_dst_is_local(const struct arg * args,struct sample * smp,const char * kw,void * private)1359 int smp_fetch_dst_is_local(const struct arg *args, struct sample *smp, const char *kw, void *private)
1360 {
1361 	struct connection *conn = objt_conn(smp->sess->origin);
1362 	struct listener *li = smp->sess->listener;
1363 
1364 	if (!conn)
1365 		return 0;
1366 
1367 	conn_get_to_addr(conn);
1368 	if (!(conn->flags & CO_FL_ADDR_TO_SET))
1369 		return 0;
1370 
1371 	smp->data.type = SMP_T_BOOL;
1372 	smp->flags = 0;
1373 	smp->data.u.sint = addr_is_local(li->netns, &conn->addr.to);
1374 	return smp->data.u.sint >= 0;
1375 }
1376 
1377 /* check if the source address of the front connection is local to the system
1378  * or not.
1379  */
smp_fetch_src_is_local(const struct arg * args,struct sample * smp,const char * kw,void * private)1380 int smp_fetch_src_is_local(const struct arg *args, struct sample *smp, const char *kw, void *private)
1381 {
1382 	struct connection *conn = objt_conn(smp->sess->origin);
1383 	struct listener *li = smp->sess->listener;
1384 
1385 	if (!conn)
1386 		return 0;
1387 
1388 	conn_get_from_addr(conn);
1389 	if (!(conn->flags & CO_FL_ADDR_FROM_SET))
1390 		return 0;
1391 
1392 	smp->data.type = SMP_T_BOOL;
1393 	smp->flags = 0;
1394 	smp->data.u.sint = addr_is_local(li->netns, &conn->addr.from);
1395 	return smp->data.u.sint >= 0;
1396 }
1397 
1398 /* set temp integer to the frontend connexion's destination port */
1399 static int
smp_fetch_dport(const struct arg * args,struct sample * smp,const char * kw,void * private)1400 smp_fetch_dport(const struct arg *args, struct sample *smp, const char *kw, void *private)
1401 {
1402 	struct connection *cli_conn = objt_conn(smp->sess->origin);
1403 
1404 	if (!cli_conn)
1405 		return 0;
1406 
1407 	conn_get_to_addr(cli_conn);
1408 
1409 	smp->data.type = SMP_T_SINT;
1410 	if (!(smp->data.u.sint = get_host_port(&cli_conn->addr.to)))
1411 		return 0;
1412 
1413 	smp->flags = 0;
1414 	return 1;
1415 }
1416 
1417 #ifdef TCP_INFO
1418 
1419 /* Validates the arguments passed to "fc_*" fetch keywords returning a time
1420  * value. These keywords support an optional string representing the unit of the
1421  * result: "us" for microseconds and "ms" for milliseconds". Returns 0 on error
1422  * and non-zero if OK.
1423  */
val_fc_time_value(struct arg * args,char ** err)1424 static int val_fc_time_value(struct arg *args, char **err)
1425 {
1426 	if (args[0].type == ARGT_STR) {
1427 		if (strcmp(args[0].data.str.str, "us") == 0) {
1428 			free(args[0].data.str.str);
1429 			args[0].type = ARGT_SINT;
1430 			args[0].data.sint = TIME_UNIT_US;
1431 		}
1432 		else if (strcmp(args[0].data.str.str, "ms") == 0) {
1433 			free(args[0].data.str.str);
1434 			args[0].type = ARGT_SINT;
1435 			args[0].data.sint = TIME_UNIT_MS;
1436 		}
1437 		else {
1438 			memprintf(err, "expects 'us' or 'ms', got '%s'",
1439 				  args[0].data.str.str);
1440 			return 0;
1441 		}
1442 	}
1443 	else {
1444 		memprintf(err, "Unexpected arg type");
1445 		return 0;
1446 	}
1447 
1448 	return 1;
1449 }
1450 
1451 /* Validates the arguments passed to "fc_*" fetch keywords returning a
1452  * counter. These keywords should be used without any keyword, but because of a
1453  * bug in previous versions, an optional string argument may be passed. In such
1454  * case, the argument is ignored and a warning is emitted. Returns 0 on error
1455  * and non-zero if OK.
1456  */
var_fc_counter(struct arg * args,char ** err)1457 static int var_fc_counter(struct arg *args, char **err)
1458 {
1459 	if (args[0].type != ARGT_STOP) {
1460 		Warning("no argument supported for 'fc_*' sample expressions returning counters.\n");
1461 		if (args[0].type == ARGT_STR)
1462 			free(args[0].data.str.str);
1463 		args[0].type = ARGT_STOP;
1464 	}
1465 
1466 	return 1;
1467 }
1468 
1469 /* Returns some tcp_info data if it's available. "dir" must be set to 0 if
1470  * the client connection is required, otherwise it is set to 1. "val" represents
1471  * the required value.
1472  * If the function fails it returns 0, otherwise it returns 1 and "result" is filled.
1473  */
get_tcp_info(const struct arg * args,struct sample * smp,int dir,int val)1474 static inline int get_tcp_info(const struct arg *args, struct sample *smp,
1475                                int dir, int val)
1476 {
1477 	struct connection *conn;
1478 	struct tcp_info info;
1479 	socklen_t optlen;
1480 
1481 	/* strm can be null. */
1482 	if (!smp->strm)
1483 		return 0;
1484 
1485 	/* get the object associated with the stream interface.The
1486 	 * object can be other thing than a connection. For example,
1487 	 * it be a appctx. */
1488 	conn = objt_conn(smp->strm->si[dir].end);
1489 	if (!conn)
1490 		return 0;
1491 
1492 	/* The fd may not be avalaible for the tcp_info struct, and the
1493 	  syscal can fail. */
1494 	optlen = sizeof(info);
1495 	if (getsockopt(conn->t.sock.fd, SOL_TCP, TCP_INFO, &info, &optlen) == -1)
1496 		return 0;
1497 
1498 	/* extract the value. */
1499 	smp->data.type = SMP_T_SINT;
1500 	switch (val) {
1501 	case 0:  smp->data.u.sint = info.tcpi_rtt;            break;
1502 	case 1:  smp->data.u.sint = info.tcpi_rttvar;         break;
1503 #if defined(__linux__)
1504 	/* these ones are common to all Linux versions */
1505 	case 2:  smp->data.u.sint = info.tcpi_unacked;        break;
1506 	case 3:  smp->data.u.sint = info.tcpi_sacked;         break;
1507 	case 4:  smp->data.u.sint = info.tcpi_lost;           break;
1508 	case 5:  smp->data.u.sint = info.tcpi_retrans;        break;
1509 	case 6:  smp->data.u.sint = info.tcpi_fackets;        break;
1510 	case 7:  smp->data.u.sint = info.tcpi_reordering;     break;
1511 #elif defined(__FreeBSD__) || defined(__NetBSD__)
1512 	/* the ones are found on FreeBSD and NetBSD featuring TCP_INFO */
1513 	case 2:  smp->data.u.sint = info.__tcpi_unacked;      break;
1514 	case 3:  smp->data.u.sint = info.__tcpi_sacked;       break;
1515 	case 4:  smp->data.u.sint = info.__tcpi_lost;         break;
1516 	case 5:  smp->data.u.sint = info.__tcpi_retrans;      break;
1517 	case 6:  smp->data.u.sint = info.__tcpi_fackets;      break;
1518 	case 7:  smp->data.u.sint = info.__tcpi_reordering;   break;
1519 #endif
1520 	default: return 0;
1521 	}
1522 
1523 	return 1;
1524 }
1525 
1526 /* get the mean rtt of a client connexion */
1527 static int
smp_fetch_fc_rtt(const struct arg * args,struct sample * smp,const char * kw,void * private)1528 smp_fetch_fc_rtt(const struct arg *args, struct sample *smp, const char *kw, void *private)
1529 {
1530 	if (!get_tcp_info(args, smp, 0, 0))
1531 		return 0;
1532 
1533 	/* By default or if explicitly specified, convert rtt to ms */
1534 	if (!args || args[0].type == ARGT_STOP || args[0].data.sint == TIME_UNIT_MS)
1535 		smp->data.u.sint = (smp->data.u.sint + 500) / 1000;
1536 
1537 	return 1;
1538 }
1539 
1540 /* get the variance of the mean rtt of a client connexion */
1541 static int
smp_fetch_fc_rttvar(const struct arg * args,struct sample * smp,const char * kw,void * private)1542 smp_fetch_fc_rttvar(const struct arg *args, struct sample *smp, const char *kw, void *private)
1543 {
1544 	if (!get_tcp_info(args, smp, 0, 1))
1545 		return 0;
1546 
1547 	/* By default or if explicitly specified, convert rttvar to ms */
1548 	if (!args || args[0].type == ARGT_STOP || args[0].data.sint == TIME_UNIT_MS)
1549 		smp->data.u.sint = (smp->data.u.sint + 500) / 1000;
1550 
1551 	return 1;
1552 }
1553 
1554 #if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__)
1555 
1556 /* get the unacked counter on a client connexion */
1557 static int
smp_fetch_fc_unacked(const struct arg * args,struct sample * smp,const char * kw,void * private)1558 smp_fetch_fc_unacked(const struct arg *args, struct sample *smp, const char *kw, void *private)
1559 {
1560 	if (!get_tcp_info(args, smp, 0, 2))
1561 		return 0;
1562 	return 1;
1563 }
1564 
1565 /* get the sacked counter on a client connexion */
1566 static int
smp_fetch_fc_sacked(const struct arg * args,struct sample * smp,const char * kw,void * private)1567 smp_fetch_fc_sacked(const struct arg *args, struct sample *smp, const char *kw, void *private)
1568 {
1569 	if (!get_tcp_info(args, smp, 0, 3))
1570 		return 0;
1571 	return 1;
1572 }
1573 
1574 /* get the lost counter on a client connexion */
1575 static int
smp_fetch_fc_lost(const struct arg * args,struct sample * smp,const char * kw,void * private)1576 smp_fetch_fc_lost(const struct arg *args, struct sample *smp, const char *kw, void *private)
1577 {
1578 	if (!get_tcp_info(args, smp, 0, 4))
1579 		return 0;
1580 	return 1;
1581 }
1582 
1583 /* get the retrans counter on a client connexion */
1584 static int
smp_fetch_fc_retrans(const struct arg * args,struct sample * smp,const char * kw,void * private)1585 smp_fetch_fc_retrans(const struct arg *args, struct sample *smp, const char *kw, void *private)
1586 {
1587 	if (!get_tcp_info(args, smp, 0, 5))
1588 		return 0;
1589 	return 1;
1590 }
1591 
1592 /* get the fackets counter on a client connexion */
1593 static int
smp_fetch_fc_fackets(const struct arg * args,struct sample * smp,const char * kw,void * private)1594 smp_fetch_fc_fackets(const struct arg *args, struct sample *smp, const char *kw, void *private)
1595 {
1596 	if (!get_tcp_info(args, smp, 0, 6))
1597 		return 0;
1598 	return 1;
1599 }
1600 
1601 /* get the reordering counter on a client connexion */
1602 static int
smp_fetch_fc_reordering(const struct arg * args,struct sample * smp,const char * kw,void * private)1603 smp_fetch_fc_reordering(const struct arg *args, struct sample *smp, const char *kw, void *private)
1604 {
1605 	if (!get_tcp_info(args, smp, 0, 7))
1606 		return 0;
1607 	return 1;
1608 }
1609 #endif // linux || freebsd || netbsd
1610 #endif // TCP_INFO
1611 
1612 #ifdef IPV6_V6ONLY
1613 /* parse the "v4v6" bind keyword */
bind_parse_v4v6(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1614 static int bind_parse_v4v6(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1615 {
1616 	struct listener *l;
1617 
1618 	list_for_each_entry(l, &conf->listeners, by_bind) {
1619 		if (l->addr.ss_family == AF_INET6)
1620 			l->options |= LI_O_V4V6;
1621 	}
1622 
1623 	return 0;
1624 }
1625 
1626 /* parse the "v6only" bind keyword */
bind_parse_v6only(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1627 static int bind_parse_v6only(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1628 {
1629 	struct listener *l;
1630 
1631 	list_for_each_entry(l, &conf->listeners, by_bind) {
1632 		if (l->addr.ss_family == AF_INET6)
1633 			l->options |= LI_O_V6ONLY;
1634 	}
1635 
1636 	return 0;
1637 }
1638 #endif
1639 
1640 #ifdef CONFIG_HAP_TRANSPARENT
1641 /* parse the "transparent" bind keyword */
bind_parse_transparent(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1642 static int bind_parse_transparent(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1643 {
1644 	struct listener *l;
1645 
1646 	list_for_each_entry(l, &conf->listeners, by_bind) {
1647 		if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1648 			l->options |= LI_O_FOREIGN;
1649 	}
1650 
1651 	return 0;
1652 }
1653 #endif
1654 
1655 #ifdef TCP_DEFER_ACCEPT
1656 /* parse the "defer-accept" bind keyword */
bind_parse_defer_accept(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1657 static int bind_parse_defer_accept(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1658 {
1659 	struct listener *l;
1660 
1661 	list_for_each_entry(l, &conf->listeners, by_bind) {
1662 		if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1663 			l->options |= LI_O_DEF_ACCEPT;
1664 	}
1665 
1666 	return 0;
1667 }
1668 #endif
1669 
1670 #ifdef TCP_FASTOPEN
1671 /* parse the "tfo" bind keyword */
bind_parse_tfo(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1672 static int bind_parse_tfo(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1673 {
1674 	struct listener *l;
1675 
1676 	list_for_each_entry(l, &conf->listeners, by_bind) {
1677 		if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1678 			l->options |= LI_O_TCP_FO;
1679 	}
1680 
1681 	return 0;
1682 }
1683 #endif
1684 
1685 #ifdef TCP_MAXSEG
1686 /* parse the "mss" bind keyword */
bind_parse_mss(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1687 static int bind_parse_mss(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1688 {
1689 	struct listener *l;
1690 	int mss;
1691 
1692 	if (!*args[cur_arg + 1]) {
1693 		memprintf(err, "'%s' : missing MSS value", args[cur_arg]);
1694 		return ERR_ALERT | ERR_FATAL;
1695 	}
1696 
1697 	mss = atoi(args[cur_arg + 1]);
1698 	if (!mss || abs(mss) > 65535) {
1699 		memprintf(err, "'%s' : expects an MSS with and absolute value between 1 and 65535", args[cur_arg]);
1700 		return ERR_ALERT | ERR_FATAL;
1701 	}
1702 
1703 	list_for_each_entry(l, &conf->listeners, by_bind) {
1704 		if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1705 			l->maxseg = mss;
1706 	}
1707 
1708 	return 0;
1709 }
1710 #endif
1711 
1712 #ifdef TCP_USER_TIMEOUT
1713 /* parse the "tcp-ut" bind keyword */
bind_parse_tcp_ut(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1714 static int bind_parse_tcp_ut(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1715 {
1716 	const char *ptr = NULL;
1717 	struct listener *l;
1718 	unsigned int timeout;
1719 
1720 	if (!*args[cur_arg + 1]) {
1721 		memprintf(err, "'%s' : missing TCP User Timeout value", args[cur_arg]);
1722 		return ERR_ALERT | ERR_FATAL;
1723 	}
1724 
1725 	ptr = parse_time_err(args[cur_arg + 1], &timeout, TIME_UNIT_MS);
1726 	if (ptr) {
1727 		memprintf(err, "'%s' : expects a positive delay in milliseconds", args[cur_arg]);
1728 		return ERR_ALERT | ERR_FATAL;
1729 	}
1730 
1731 	list_for_each_entry(l, &conf->listeners, by_bind) {
1732 		if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1733 			l->tcp_ut = timeout;
1734 	}
1735 
1736 	return 0;
1737 }
1738 #endif
1739 
1740 #ifdef SO_BINDTODEVICE
1741 /* parse the "interface" bind keyword */
bind_parse_interface(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1742 static int bind_parse_interface(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1743 {
1744 	struct listener *l;
1745 
1746 	if (!*args[cur_arg + 1]) {
1747 		memprintf(err, "'%s' : missing interface name", args[cur_arg]);
1748 		return ERR_ALERT | ERR_FATAL;
1749 	}
1750 
1751 	list_for_each_entry(l, &conf->listeners, by_bind) {
1752 		if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1753 			l->interface = strdup(args[cur_arg + 1]);
1754 	}
1755 
1756 	return 0;
1757 }
1758 #endif
1759 
1760 #ifdef CONFIG_HAP_NS
1761 /* parse the "namespace" bind keyword */
bind_parse_namespace(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1762 static int bind_parse_namespace(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1763 {
1764 	struct listener *l;
1765 	char *namespace = NULL;
1766 
1767 	if (!*args[cur_arg + 1]) {
1768 		memprintf(err, "'%s' : missing namespace id", args[cur_arg]);
1769 		return ERR_ALERT | ERR_FATAL;
1770 	}
1771 	namespace = args[cur_arg + 1];
1772 
1773 	list_for_each_entry(l, &conf->listeners, by_bind) {
1774 		l->netns = netns_store_lookup(namespace, strlen(namespace));
1775 
1776 		if (l->netns == NULL)
1777 			l->netns = netns_store_insert(namespace);
1778 
1779 		if (l->netns == NULL) {
1780 			Alert("Cannot open namespace '%s'.\n", args[cur_arg + 1]);
1781 			return ERR_ALERT | ERR_FATAL;
1782 		}
1783 	}
1784 	return 0;
1785 }
1786 #endif
1787 
1788 #ifdef TCP_USER_TIMEOUT
1789 /* parse the "tcp-ut" server keyword */
srv_parse_tcp_ut(char ** args,int * cur_arg,struct proxy * px,struct server * newsrv,char ** err)1790 static int srv_parse_tcp_ut(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
1791 {
1792 	const char *ptr = NULL;
1793 	unsigned int timeout;
1794 
1795 	if (!*args[*cur_arg + 1]) {
1796 		memprintf(err, "'%s' : missing TCP User Timeout value", args[*cur_arg]);
1797 		return ERR_ALERT | ERR_FATAL;
1798 	}
1799 
1800 	ptr = parse_time_err(args[*cur_arg + 1], &timeout, TIME_UNIT_MS);
1801 	if (ptr) {
1802 		memprintf(err, "'%s' : expects a positive delay in milliseconds", args[*cur_arg]);
1803 		return ERR_ALERT | ERR_FATAL;
1804 	}
1805 
1806 	if (newsrv->addr.ss_family == AF_INET || newsrv->addr.ss_family == AF_INET6)
1807 		newsrv->tcp_ut = timeout;
1808 
1809 	return 0;
1810 }
1811 #endif
1812 
1813 
1814 /* Note: must not be declared <const> as its list will be overwritten.
1815  * Note: fetches that may return multiple types must be declared as the lowest
1816  * common denominator, the type that can be casted into all other ones. For
1817  * instance v4/v6 must be declared v4.
1818  */
1819 static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
1820 	{ "dst",      smp_fetch_dst,   0, NULL, SMP_T_IPV4, SMP_USE_L4CLI },
1821 	{ "dst_is_local", smp_fetch_dst_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
1822 	{ "dst_port", smp_fetch_dport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
1823 	{ "src",      smp_fetch_src,   0, NULL, SMP_T_IPV4, SMP_USE_L4CLI },
1824 	{ "src_is_local", smp_fetch_src_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
1825 	{ "src_port", smp_fetch_sport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
1826 #ifdef TCP_INFO
1827 	{ "fc_rtt",           smp_fetch_fc_rtt,           ARG1(0,STR), val_fc_time_value, SMP_T_SINT, SMP_USE_L4CLI },
1828 	{ "fc_rttvar",        smp_fetch_fc_rttvar,        ARG1(0,STR), val_fc_time_value, SMP_T_SINT, SMP_USE_L4CLI },
1829 #if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__)
1830 	{ "fc_unacked",       smp_fetch_fc_unacked,       ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1831 	{ "fc_sacked",        smp_fetch_fc_sacked,        ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1832 	{ "fc_retrans",       smp_fetch_fc_retrans,       ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1833 	{ "fc_fackets",       smp_fetch_fc_fackets,       ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1834 	{ "fc_lost",          smp_fetch_fc_lost,          ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1835 	{ "fc_reordering",    smp_fetch_fc_reordering,    ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1836 #endif // linux || freebsd || netbsd
1837 #endif // TCP_INFO
1838 	{ /* END */ },
1839 }};
1840 
1841 /************************************************************************/
1842 /*           All supported bind keywords must be declared here.         */
1843 /************************************************************************/
1844 
1845 /* Note: must not be declared <const> as its list will be overwritten.
1846  * Please take care of keeping this list alphabetically sorted, doing so helps
1847  * all code contributors.
1848  * Optional keywords are also declared with a NULL ->parse() function so that
1849  * the config parser can report an appropriate error when a known keyword was
1850  * not enabled.
1851  */
1852 static struct bind_kw_list bind_kws = { "TCP", { }, {
1853 #ifdef TCP_DEFER_ACCEPT
1854 	{ "defer-accept",  bind_parse_defer_accept, 0 }, /* wait for some data for 1 second max before doing accept */
1855 #endif
1856 #ifdef SO_BINDTODEVICE
1857 	{ "interface",     bind_parse_interface,    1 }, /* specifically bind to this interface */
1858 #endif
1859 #ifdef TCP_MAXSEG
1860 	{ "mss",           bind_parse_mss,          1 }, /* set MSS of listening socket */
1861 #endif
1862 #ifdef TCP_USER_TIMEOUT
1863 	{ "tcp-ut",        bind_parse_tcp_ut,       1 }, /* set User Timeout on listening socket */
1864 #endif
1865 #ifdef TCP_FASTOPEN
1866 	{ "tfo",           bind_parse_tfo,          0 }, /* enable TCP_FASTOPEN of listening socket */
1867 #endif
1868 #ifdef CONFIG_HAP_TRANSPARENT
1869 	{ "transparent",   bind_parse_transparent,  0 }, /* transparently bind to the specified addresses */
1870 #endif
1871 #ifdef IPV6_V6ONLY
1872 	{ "v4v6",          bind_parse_v4v6,         0 }, /* force socket to bind to IPv4+IPv6 */
1873 	{ "v6only",        bind_parse_v6only,       0 }, /* force socket to bind to IPv6 only */
1874 #endif
1875 #ifdef CONFIG_HAP_NS
1876 	{ "namespace",     bind_parse_namespace,    1 },
1877 #endif
1878 	/* the versions with the NULL parse function*/
1879 	{ "defer-accept",  NULL,  0 },
1880 	{ "interface",     NULL,  1 },
1881 	{ "mss",           NULL,  1 },
1882 	{ "transparent",   NULL,  0 },
1883 	{ "v4v6",          NULL,  0 },
1884 	{ "v6only",        NULL,  0 },
1885 	{ NULL, NULL, 0 },
1886 }};
1887 
1888 static struct srv_kw_list srv_kws = { "TCP", { }, {
1889 #ifdef TCP_USER_TIMEOUT
1890 	{ "tcp-ut",        srv_parse_tcp_ut,        1,  0 }, /* set TCP user timeout on server */
1891 #endif
1892 	{ NULL, NULL, 0 },
1893 }};
1894 
1895 static struct action_kw_list tcp_req_conn_actions = {ILH, {
1896 	{ "silent-drop",  tcp_parse_silent_drop },
1897 	{ "set-src",      tcp_parse_set_src_dst },
1898 	{ "set-src-port", tcp_parse_set_src_dst },
1899 	{ "set-dst"     , tcp_parse_set_src_dst },
1900 	{ "set-dst-port", tcp_parse_set_src_dst },
1901 	{ /* END */ }
1902 }};
1903 
1904 static struct action_kw_list tcp_req_sess_actions = {ILH, {
1905 	{ "silent-drop",  tcp_parse_silent_drop },
1906 	{ "set-src",      tcp_parse_set_src_dst },
1907 	{ "set-src-port", tcp_parse_set_src_dst },
1908 	{ "set-dst"     , tcp_parse_set_src_dst },
1909 	{ "set-dst-port", tcp_parse_set_src_dst },
1910 	{ /* END */ }
1911 }};
1912 
1913 static struct action_kw_list tcp_req_cont_actions = {ILH, {
1914 	{ "silent-drop", tcp_parse_silent_drop },
1915 	{ /* END */ }
1916 }};
1917 
1918 static struct action_kw_list tcp_res_cont_actions = {ILH, {
1919 	{ "silent-drop", tcp_parse_silent_drop },
1920 	{ /* END */ }
1921 }};
1922 
1923 static struct action_kw_list http_req_actions = {ILH, {
1924 	{ "silent-drop",  tcp_parse_silent_drop },
1925 	{ "set-src",      tcp_parse_set_src_dst },
1926 	{ "set-src-port", tcp_parse_set_src_dst },
1927 	{ "set-dst",      tcp_parse_set_src_dst },
1928 	{ "set-dst-port", tcp_parse_set_src_dst },
1929 	{ /* END */ }
1930 }};
1931 
1932 static struct action_kw_list http_res_actions = {ILH, {
1933 	{ "silent-drop", tcp_parse_silent_drop },
1934 	{ /* END */ }
1935 }};
1936 
1937 
1938 __attribute__((constructor))
__tcp_protocol_init(void)1939 static void __tcp_protocol_init(void)
1940 {
1941 	protocol_register(&proto_tcpv4);
1942 	protocol_register(&proto_tcpv6);
1943 	sample_register_fetches(&sample_fetch_keywords);
1944 	bind_register_keywords(&bind_kws);
1945 	srv_register_keywords(&srv_kws);
1946 	tcp_req_conn_keywords_register(&tcp_req_conn_actions);
1947 	tcp_req_sess_keywords_register(&tcp_req_sess_actions);
1948 	tcp_req_cont_keywords_register(&tcp_req_cont_actions);
1949 	tcp_res_cont_keywords_register(&tcp_res_cont_actions);
1950 	http_req_keywords_register(&http_req_actions);
1951 	http_res_keywords_register(&http_res_actions);
1952 }
1953 
1954 
1955 /*
1956  * Local variables:
1957  *  c-indent-level: 8
1958  *  c-basic-offset: 8
1959  * End:
1960  */
1961