1 /*
2 * AF_INET/AF_INET6 SOCK_STREAM protocol layer (tcp)
3 *
4 * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13 /* this is to have tcp_info defined on systems using musl
14 * library, such as Alpine Linux
15 */
16 #define _GNU_SOURCE
17
18 #include <ctype.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <time.h>
25
26 #include <sys/param.h>
27 #include <sys/socket.h>
28 #include <sys/stat.h>
29 #include <sys/types.h>
30 #include <sys/un.h>
31
32 #include <netinet/tcp.h>
33 #include <netinet/in.h>
34
35 #include <common/compat.h>
36 #include <common/config.h>
37 #include <common/debug.h>
38 #include <common/errors.h>
39 #include <common/mini-clist.h>
40 #include <common/standard.h>
41 #include <common/namespace.h>
42
43 #include <types/action.h>
44 #include <types/connection.h>
45 #include <types/global.h>
46 #include <types/stream.h>
47
48 #include <proto/arg.h>
49 #include <proto/channel.h>
50 #include <proto/connection.h>
51 #include <proto/fd.h>
52 #include <proto/listener.h>
53 #include <proto/log.h>
54 #include <proto/port_range.h>
55 #include <proto/protocol.h>
56 #include <proto/proto_http.h>
57 #include <proto/proto_tcp.h>
58 #include <proto/proxy.h>
59 #include <proto/sample.h>
60 #include <proto/server.h>
61 #include <proto/task.h>
62 #include <proto/tcp_rules.h>
63
64 static int tcp_bind_listeners(struct protocol *proto, char *errmsg, int errlen);
65 static int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen);
66 static void tcpv4_add_listener(struct listener *listener, int port);
67 static void tcpv6_add_listener(struct listener *listener, int port);
68
69 /* Note: must not be declared <const> as its list will be overwritten */
70 static struct protocol proto_tcpv4 = {
71 .name = "tcpv4",
72 .sock_domain = AF_INET,
73 .sock_type = SOCK_STREAM,
74 .sock_prot = IPPROTO_TCP,
75 .sock_family = AF_INET,
76 .sock_addrlen = sizeof(struct sockaddr_in),
77 .l3_addrlen = 32/8,
78 .accept = &listener_accept,
79 .connect = tcp_connect_server,
80 .bind = tcp_bind_listener,
81 .bind_all = tcp_bind_listeners,
82 .unbind_all = unbind_all_listeners,
83 .enable_all = enable_all_listeners,
84 .get_src = tcp_get_src,
85 .get_dst = tcp_get_dst,
86 .drain = tcp_drain,
87 .pause = tcp_pause_listener,
88 .add = tcpv4_add_listener,
89 .listeners = LIST_HEAD_INIT(proto_tcpv4.listeners),
90 .nb_listeners = 0,
91 };
92
93 /* Note: must not be declared <const> as its list will be overwritten */
94 static struct protocol proto_tcpv6 = {
95 .name = "tcpv6",
96 .sock_domain = AF_INET6,
97 .sock_type = SOCK_STREAM,
98 .sock_prot = IPPROTO_TCP,
99 .sock_family = AF_INET6,
100 .sock_addrlen = sizeof(struct sockaddr_in6),
101 .l3_addrlen = 128/8,
102 .accept = &listener_accept,
103 .connect = tcp_connect_server,
104 .bind = tcp_bind_listener,
105 .bind_all = tcp_bind_listeners,
106 .unbind_all = unbind_all_listeners,
107 .enable_all = enable_all_listeners,
108 .get_src = tcp_get_src,
109 .get_dst = tcp_get_dst,
110 .drain = tcp_drain,
111 .pause = tcp_pause_listener,
112 .add = tcpv6_add_listener,
113 .listeners = LIST_HEAD_INIT(proto_tcpv6.listeners),
114 .nb_listeners = 0,
115 };
116
117 /* Default TCP parameters, got by opening a temporary TCP socket. */
118 #ifdef TCP_MAXSEG
119 static THREAD_LOCAL int default_tcp_maxseg = -1;
120 static THREAD_LOCAL int default_tcp6_maxseg = -1;
121 #endif
122
123 /* Binds ipv4/ipv6 address <local> to socket <fd>, unless <flags> is set, in which
124 * case we try to bind <remote>. <flags> is a 2-bit field consisting of :
125 * - 0 : ignore remote address (may even be a NULL pointer)
126 * - 1 : use provided address
127 * - 2 : use provided port
128 * - 3 : use both
129 *
130 * The function supports multiple foreign binding methods :
131 * - linux_tproxy: we directly bind to the foreign address
132 * The second one can be used as a fallback for the first one.
133 * This function returns 0 when everything's OK, 1 if it could not bind, to the
134 * local address, 2 if it could not bind to the foreign address.
135 */
tcp_bind_socket(int fd,int flags,struct sockaddr_storage * local,struct sockaddr_storage * remote)136 int tcp_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct sockaddr_storage *remote)
137 {
138 struct sockaddr_storage bind_addr;
139 int foreign_ok = 0;
140 int ret;
141 static THREAD_LOCAL int ip_transp_working = 1;
142 static THREAD_LOCAL int ip6_transp_working = 1;
143
144 switch (local->ss_family) {
145 case AF_INET:
146 if (flags && ip_transp_working) {
147 /* This deserves some explanation. Some platforms will support
148 * multiple combinations of certain methods, so we try the
149 * supported ones until one succeeds.
150 */
151 if (0
152 #if defined(IP_TRANSPARENT)
153 || (setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)) == 0)
154 #endif
155 #if defined(IP_FREEBIND)
156 || (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == 0)
157 #endif
158 #if defined(IP_BINDANY)
159 || (setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == 0)
160 #endif
161 #if defined(SO_BINDANY)
162 || (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0)
163 #endif
164 )
165 foreign_ok = 1;
166 else
167 ip_transp_working = 0;
168 }
169 break;
170 case AF_INET6:
171 if (flags && ip6_transp_working) {
172 if (0
173 #if defined(IPV6_TRANSPARENT) && defined(SOL_IPV6)
174 || (setsockopt(fd, SOL_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == 0)
175 #endif
176 #if defined(IP_FREEBIND)
177 || (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == 0)
178 #endif
179 #if defined(IPV6_BINDANY)
180 || (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == 0)
181 #endif
182 #if defined(SO_BINDANY)
183 || (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0)
184 #endif
185 )
186 foreign_ok = 1;
187 else
188 ip6_transp_working = 0;
189 }
190 break;
191 }
192
193 if (flags) {
194 memset(&bind_addr, 0, sizeof(bind_addr));
195 bind_addr.ss_family = remote->ss_family;
196 switch (remote->ss_family) {
197 case AF_INET:
198 if (flags & 1)
199 ((struct sockaddr_in *)&bind_addr)->sin_addr = ((struct sockaddr_in *)remote)->sin_addr;
200 if (flags & 2)
201 ((struct sockaddr_in *)&bind_addr)->sin_port = ((struct sockaddr_in *)remote)->sin_port;
202 break;
203 case AF_INET6:
204 if (flags & 1)
205 ((struct sockaddr_in6 *)&bind_addr)->sin6_addr = ((struct sockaddr_in6 *)remote)->sin6_addr;
206 if (flags & 2)
207 ((struct sockaddr_in6 *)&bind_addr)->sin6_port = ((struct sockaddr_in6 *)remote)->sin6_port;
208 break;
209 default:
210 /* we don't want to try to bind to an unknown address family */
211 foreign_ok = 0;
212 }
213 }
214
215 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
216 if (foreign_ok) {
217 if (is_inet_addr(&bind_addr)) {
218 ret = bind(fd, (struct sockaddr *)&bind_addr, get_addr_len(&bind_addr));
219 if (ret < 0)
220 return 2;
221 }
222 }
223 else {
224 if (is_inet_addr(local)) {
225 ret = bind(fd, (struct sockaddr *)local, get_addr_len(local));
226 if (ret < 0)
227 return 1;
228 }
229 }
230
231 if (!flags)
232 return 0;
233
234 if (!foreign_ok)
235 /* we could not bind to a foreign address */
236 return 2;
237
238 return 0;
239 }
240
create_server_socket(struct connection * conn)241 static int create_server_socket(struct connection *conn)
242 {
243 const struct netns_entry *ns = NULL;
244
245 #ifdef CONFIG_HAP_NS
246 if (objt_server(conn->target)) {
247 if (__objt_server(conn->target)->flags & SRV_F_USE_NS_FROM_PP)
248 ns = conn->proxy_netns;
249 else
250 ns = __objt_server(conn->target)->netns;
251 }
252 #endif
253 return my_socketat(ns, conn->addr.to.ss_family, SOCK_STREAM, IPPROTO_TCP);
254 }
255
256 /*
257 * This function initiates a TCP connection establishment to the target assigned
258 * to connection <conn> using (si->{target,addr.to}). A source address may be
259 * pointed to by conn->addr.from in case of transparent proxying. Normal source
260 * bind addresses are still determined locally (due to the possible need of a
261 * source port). conn->target may point either to a valid server or to a backend,
262 * depending on conn->target. Only OBJ_TYPE_PROXY and OBJ_TYPE_SERVER are
263 * supported. The <data> parameter is a boolean indicating whether there are data
264 * waiting for being sent or not, in order to adjust data write polling and on
265 * some platforms, the ability to avoid an empty initial ACK. The <delack> argument
266 * allows the caller to force using a delayed ACK when establishing the connection :
267 * - 0 = no delayed ACK unless data are advertised and backend has tcp-smart-connect
268 * - 1 = delayed ACK if backend has tcp-smart-connect, regardless of data
269 * - 2 = delayed ACK regardless of backend options
270 *
271 * Note that a pending send_proxy message accounts for data.
272 *
273 * It can return one of :
274 * - SF_ERR_NONE if everything's OK
275 * - SF_ERR_SRVTO if there are no more servers
276 * - SF_ERR_SRVCL if the connection was refused by the server
277 * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
278 * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
279 * - SF_ERR_INTERNAL for any other purely internal errors
280 * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
281 *
282 * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise
283 * it's invalid and the caller has nothing to do.
284 */
285
tcp_connect_server(struct connection * conn,int data,int delack)286 int tcp_connect_server(struct connection *conn, int data, int delack)
287 {
288 int fd;
289 struct server *srv;
290 struct proxy *be;
291 struct conn_src *src;
292
293 conn->flags = CO_FL_WAIT_L4_CONN; /* connection in progress */
294
295 switch (obj_type(conn->target)) {
296 case OBJ_TYPE_PROXY:
297 be = objt_proxy(conn->target);
298 srv = NULL;
299 break;
300 case OBJ_TYPE_SERVER:
301 srv = objt_server(conn->target);
302 be = srv->proxy;
303 break;
304 default:
305 conn->flags |= CO_FL_ERROR;
306 return SF_ERR_INTERNAL;
307 }
308
309 fd = conn->handle.fd = create_server_socket(conn);
310
311 if (fd == -1) {
312 qfprintf(stderr, "Cannot get a server socket.\n");
313
314 if (errno == ENFILE) {
315 conn->err_code = CO_ER_SYS_FDLIM;
316 send_log(be, LOG_EMERG,
317 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
318 be->id, maxfd);
319 }
320 else if (errno == EMFILE) {
321 conn->err_code = CO_ER_PROC_FDLIM;
322 send_log(be, LOG_EMERG,
323 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
324 be->id, maxfd);
325 }
326 else if (errno == ENOBUFS || errno == ENOMEM) {
327 conn->err_code = CO_ER_SYS_MEMLIM;
328 send_log(be, LOG_EMERG,
329 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
330 be->id, maxfd);
331 }
332 else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
333 conn->err_code = CO_ER_NOPROTO;
334 }
335 else
336 conn->err_code = CO_ER_SOCK_ERR;
337
338 /* this is a resource error */
339 conn->flags |= CO_FL_ERROR;
340 return SF_ERR_RESOURCE;
341 }
342
343 if (fd >= global.maxsock) {
344 /* do not log anything there, it's a normal condition when this option
345 * is used to serialize connections to a server !
346 */
347 ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
348 close(fd);
349 conn->err_code = CO_ER_CONF_FDLIM;
350 conn->flags |= CO_FL_ERROR;
351 return SF_ERR_PRXCOND; /* it is a configuration limit */
352 }
353
354 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
355 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) == -1)) {
356 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
357 close(fd);
358 conn->err_code = CO_ER_SOCK_ERR;
359 conn->flags |= CO_FL_ERROR;
360 return SF_ERR_INTERNAL;
361 }
362
363 if (be->options & PR_O_TCP_SRV_KA)
364 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one));
365
366 /* allow specific binding :
367 * - server-specific at first
368 * - proxy-specific next
369 */
370 if (srv && srv->conn_src.opts & CO_SRC_BIND)
371 src = &srv->conn_src;
372 else if (be->conn_src.opts & CO_SRC_BIND)
373 src = &be->conn_src;
374 else
375 src = NULL;
376
377 if (src) {
378 int ret, flags = 0;
379
380 if (is_inet_addr(&conn->addr.from)) {
381 switch (src->opts & CO_SRC_TPROXY_MASK) {
382 case CO_SRC_TPROXY_CLI:
383 conn->flags |= CO_FL_PRIVATE;
384 /* fall through */
385 case CO_SRC_TPROXY_ADDR:
386 flags = 3;
387 break;
388 case CO_SRC_TPROXY_CIP:
389 case CO_SRC_TPROXY_DYN:
390 conn->flags |= CO_FL_PRIVATE;
391 flags = 1;
392 break;
393 }
394 }
395
396 #ifdef SO_BINDTODEVICE
397 /* Note: this might fail if not CAP_NET_RAW */
398 if (src->iface_name)
399 setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, src->iface_name, src->iface_len + 1);
400 #endif
401
402 if (src->sport_range) {
403 int attempts = 10; /* should be more than enough to find a spare port */
404 struct sockaddr_storage sa;
405
406 ret = 1;
407 memcpy(&sa, &src->source_addr, sizeof(sa));
408
409 do {
410 /* note: in case of retry, we may have to release a previously
411 * allocated port, hence this loop's construct.
412 */
413 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
414 fdinfo[fd].port_range = NULL;
415
416 if (!attempts)
417 break;
418 attempts--;
419
420 fdinfo[fd].local_port = port_range_alloc_port(src->sport_range);
421 if (!fdinfo[fd].local_port) {
422 conn->err_code = CO_ER_PORT_RANGE;
423 break;
424 }
425
426 fdinfo[fd].port_range = src->sport_range;
427 set_host_port(&sa, fdinfo[fd].local_port);
428
429 ret = tcp_bind_socket(fd, flags, &sa, &conn->addr.from);
430 if (ret != 0)
431 conn->err_code = CO_ER_CANT_BIND;
432 } while (ret != 0); /* binding NOK */
433 }
434 else {
435 #ifdef IP_BIND_ADDRESS_NO_PORT
436 static THREAD_LOCAL int bind_address_no_port = 1;
437 setsockopt(fd, SOL_IP, IP_BIND_ADDRESS_NO_PORT, (const void *) &bind_address_no_port, sizeof(int));
438 #endif
439 ret = tcp_bind_socket(fd, flags, &src->source_addr, &conn->addr.from);
440 if (ret != 0)
441 conn->err_code = CO_ER_CANT_BIND;
442 }
443
444 if (unlikely(ret != 0)) {
445 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
446 fdinfo[fd].port_range = NULL;
447 close(fd);
448
449 if (ret == 1) {
450 ha_alert("Cannot bind to source address before connect() for backend %s. Aborting.\n",
451 be->id);
452 send_log(be, LOG_EMERG,
453 "Cannot bind to source address before connect() for backend %s.\n",
454 be->id);
455 } else {
456 ha_alert("Cannot bind to tproxy source address before connect() for backend %s. Aborting.\n",
457 be->id);
458 send_log(be, LOG_EMERG,
459 "Cannot bind to tproxy source address before connect() for backend %s.\n",
460 be->id);
461 }
462 conn->flags |= CO_FL_ERROR;
463 return SF_ERR_RESOURCE;
464 }
465 }
466
467 #if defined(TCP_QUICKACK)
468 /* disabling tcp quick ack now allows the first request to leave the
469 * machine with the first ACK. We only do this if there are pending
470 * data in the buffer.
471 */
472 if (delack == 2 || ((delack || data || conn->send_proxy_ofs) && (be->options2 & PR_O2_SMARTCON)))
473 setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &zero, sizeof(zero));
474 #endif
475
476 #ifdef TCP_USER_TIMEOUT
477 /* there is not much more we can do here when it fails, it's still minor */
478 if (srv && srv->tcp_ut)
479 setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &srv->tcp_ut, sizeof(srv->tcp_ut));
480 #endif
481 if (global.tune.server_sndbuf)
482 setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf));
483
484 if (global.tune.server_rcvbuf)
485 setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
486
487 if (connect(fd, (struct sockaddr *)&conn->addr.to, get_addr_len(&conn->addr.to)) == -1) {
488 if (errno == EINPROGRESS || errno == EALREADY) {
489 /* common case, let's wait for connect status */
490 conn->flags |= CO_FL_WAIT_L4_CONN;
491 }
492 else if (errno == EISCONN) {
493 /* should normally not happen but if so, indicates that it's OK */
494 conn->flags &= ~CO_FL_WAIT_L4_CONN;
495 }
496 else if (errno == EAGAIN || errno == EADDRINUSE || errno == EADDRNOTAVAIL) {
497 char *msg;
498 if (errno == EAGAIN || errno == EADDRNOTAVAIL) {
499 msg = "no free ports";
500 conn->err_code = CO_ER_FREE_PORTS;
501 }
502 else {
503 msg = "local address already in use";
504 conn->err_code = CO_ER_ADDR_INUSE;
505 }
506
507 qfprintf(stderr,"Connect() failed for backend %s: %s.\n", be->id, msg);
508 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
509 fdinfo[fd].port_range = NULL;
510 close(fd);
511 send_log(be, LOG_ERR, "Connect() failed for backend %s: %s.\n", be->id, msg);
512 conn->flags |= CO_FL_ERROR;
513 return SF_ERR_RESOURCE;
514 } else if (errno == ETIMEDOUT) {
515 //qfprintf(stderr,"Connect(): ETIMEDOUT");
516 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
517 fdinfo[fd].port_range = NULL;
518 close(fd);
519 conn->err_code = CO_ER_SOCK_ERR;
520 conn->flags |= CO_FL_ERROR;
521 return SF_ERR_SRVTO;
522 } else {
523 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
524 //qfprintf(stderr,"Connect(): %d", errno);
525 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
526 fdinfo[fd].port_range = NULL;
527 close(fd);
528 conn->err_code = CO_ER_SOCK_ERR;
529 conn->flags |= CO_FL_ERROR;
530 return SF_ERR_SRVCL;
531 }
532 }
533 else {
534 /* connect() == 0, this is great! */
535 conn->flags &= ~CO_FL_WAIT_L4_CONN;
536 }
537
538 conn->flags |= CO_FL_ADDR_TO_SET;
539
540 /* Prepare to send a few handshakes related to the on-wire protocol. */
541 if (conn->send_proxy_ofs)
542 conn->flags |= CO_FL_SEND_PROXY;
543
544 conn_ctrl_init(conn); /* registers the FD */
545 fdtab[fd].linger_risk = 1; /* close hard if needed */
546
547 if (conn_xprt_init(conn) < 0) {
548 conn_full_close(conn);
549 conn->flags |= CO_FL_ERROR;
550 return SF_ERR_RESOURCE;
551 }
552
553 if (conn->flags & (CO_FL_HANDSHAKE | CO_FL_WAIT_L4_CONN | CO_FL_EARLY_SSL_HS)) {
554 conn_sock_want_send(conn); /* for connect status, proxy protocol or SSL */
555 if (conn->flags & CO_FL_EARLY_SSL_HS)
556 conn_xprt_want_send(conn);
557 }
558 else {
559 /* If there's no more handshake, we need to notify the data
560 * layer when the connection is already OK otherwise we'll have
561 * no other opportunity to do it later (eg: health checks).
562 */
563 data = 1;
564 }
565
566 if (data)
567 conn_xprt_want_send(conn); /* prepare to send data if any */
568
569 return SF_ERR_NONE; /* connection is OK */
570 }
571
572
573 /*
574 * Retrieves the source address for the socket <fd>, with <dir> indicating
575 * if we're a listener (=0) or an initiator (!=0). It returns 0 in case of
576 * success, -1 in case of error. The socket's source address is stored in
577 * <sa> for <salen> bytes.
578 */
tcp_get_src(int fd,struct sockaddr * sa,socklen_t salen,int dir)579 int tcp_get_src(int fd, struct sockaddr *sa, socklen_t salen, int dir)
580 {
581 if (dir)
582 return getsockname(fd, sa, &salen);
583 else
584 return getpeername(fd, sa, &salen);
585 }
586
587
588 /*
589 * Retrieves the original destination address for the socket <fd>, with <dir>
590 * indicating if we're a listener (=0) or an initiator (!=0). In the case of a
591 * listener, if the original destination address was translated, the original
592 * address is retrieved. It returns 0 in case of success, -1 in case of error.
593 * The socket's source address is stored in <sa> for <salen> bytes.
594 */
tcp_get_dst(int fd,struct sockaddr * sa,socklen_t salen,int dir)595 int tcp_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir)
596 {
597 if (dir)
598 return getpeername(fd, sa, &salen);
599 else {
600 int ret = getsockname(fd, sa, &salen);
601
602 if (ret < 0)
603 return ret;
604
605 #if defined(TPROXY) && defined(SO_ORIGINAL_DST)
606 /* For TPROXY and Netfilter's NAT, we can retrieve the original
607 * IPv4 address before DNAT/REDIRECT. We must not do that with
608 * other families because v6-mapped IPv4 addresses are still
609 * reported as v4.
610 */
611 if (((struct sockaddr_storage *)sa)->ss_family == AF_INET
612 && getsockopt(fd, SOL_IP, SO_ORIGINAL_DST, sa, &salen) == 0)
613 return 0;
614 #endif
615 return ret;
616 }
617 }
618
619 /* Tries to drain any pending incoming data from the socket to reach the
620 * receive shutdown. Returns positive if the shutdown was found, negative
621 * if EAGAIN was hit, otherwise zero. This is useful to decide whether we
622 * can close a connection cleanly are we must kill it hard.
623 */
tcp_drain(int fd)624 int tcp_drain(int fd)
625 {
626 int turns = 2;
627 int len;
628
629 while (turns) {
630 #ifdef MSG_TRUNC_CLEARS_INPUT
631 len = recv(fd, NULL, INT_MAX, MSG_DONTWAIT | MSG_NOSIGNAL | MSG_TRUNC);
632 if (len == -1 && errno == EFAULT)
633 #endif
634 len = recv(fd, trash.str, trash.size, MSG_DONTWAIT | MSG_NOSIGNAL);
635
636 if (len == 0) {
637 /* cool, shutdown received */
638 fdtab[fd].linger_risk = 0;
639 return 1;
640 }
641
642 if (len < 0) {
643 if (errno == EAGAIN) {
644 /* connection not closed yet */
645 fd_cant_recv(fd);
646 return -1;
647 }
648 if (errno == EINTR) /* oops, try again */
649 continue;
650 /* other errors indicate a dead connection, fine. */
651 fdtab[fd].linger_risk = 0;
652 return 1;
653 }
654 /* OK we read some data, let's try again once */
655 turns--;
656 }
657 /* some data are still present, give up */
658 return 0;
659 }
660
661 /* This is the callback which is set when a connection establishment is pending
662 * and we have nothing to send. It updates the FD polling status. It returns 0
663 * if it fails in a fatal way or needs to poll to go further, otherwise it
664 * returns non-zero and removes the CO_FL_WAIT_L4_CONN flag from the connection's
665 * flags. In case of error, it sets CO_FL_ERROR and leaves the error code in
666 * errno. The error checking is done in two passes in order to limit the number
667 * of syscalls in the normal case :
668 * - if POLL_ERR was reported by the poller, we check for a pending error on
669 * the socket before proceeding. If found, it's assigned to errno so that
670 * upper layers can see it.
671 * - otherwise connect() is used to check the connection state again, since
672 * the getsockopt return cannot reliably be used to know if the connection
673 * is still pending or ready. This one may often return an error as well,
674 * since we don't always have POLL_ERR (eg: OSX or cached events).
675 */
tcp_connect_probe(struct connection * conn)676 int tcp_connect_probe(struct connection *conn)
677 {
678 int fd = conn->handle.fd;
679 socklen_t lskerr;
680 int skerr;
681
682 if (conn->flags & CO_FL_ERROR)
683 return 0;
684
685 if (!conn_ctrl_ready(conn))
686 return 0;
687
688 if (!(conn->flags & CO_FL_WAIT_L4_CONN))
689 return 1; /* strange we were called while ready */
690
691 if (!fd_send_ready(fd))
692 return 0;
693
694 /* we might be the first witness of FD_POLL_ERR. Note that FD_POLL_HUP
695 * without FD_POLL_IN also indicates a hangup without input data meaning
696 * there was no connection.
697 */
698 if (fdtab[fd].ev & FD_POLL_ERR ||
699 (fdtab[fd].ev & (FD_POLL_IN|FD_POLL_HUP)) == FD_POLL_HUP) {
700 skerr = 0;
701 lskerr = sizeof(skerr);
702 getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
703 errno = skerr;
704 if (errno == EAGAIN)
705 errno = 0;
706 if (errno)
707 goto out_error;
708 }
709
710 /* Use connect() to check the state of the socket. This has the
711 * advantage of giving us the following info :
712 * - error
713 * - connecting (EALREADY, EINPROGRESS)
714 * - connected (EISCONN, 0)
715 */
716 if (connect(fd, (struct sockaddr *)&conn->addr.to, get_addr_len(&conn->addr.to)) < 0) {
717 if (errno == EALREADY || errno == EINPROGRESS) {
718 __conn_sock_stop_recv(conn);
719 fd_cant_send(fd);
720 return 0;
721 }
722
723 if (errno && errno != EISCONN)
724 goto out_error;
725
726 /* otherwise we're connected */
727 }
728
729 /* The FD is ready now, we'll mark the connection as complete and
730 * forward the event to the transport layer which will notify the
731 * data layer.
732 */
733 conn->flags &= ~CO_FL_WAIT_L4_CONN;
734 return 1;
735
736 out_error:
737 /* Write error on the file descriptor. Report it to the connection
738 * and disable polling on this FD.
739 */
740 fdtab[fd].linger_risk = 0;
741 conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
742 __conn_sock_stop_both(conn);
743 return 0;
744 }
745
746 /* XXX: Should probably be elsewhere */
compare_sockaddr(struct sockaddr_storage * a,struct sockaddr_storage * b)747 static int compare_sockaddr(struct sockaddr_storage *a, struct sockaddr_storage *b)
748 {
749 if (a->ss_family != b->ss_family) {
750 return (-1);
751 }
752 switch (a->ss_family) {
753 case AF_INET:
754 {
755 struct sockaddr_in *a4 = (void *)a, *b4 = (void *)b;
756 if (a4->sin_port != b4->sin_port)
757 return (-1);
758 return (memcmp(&a4->sin_addr, &b4->sin_addr,
759 sizeof(a4->sin_addr)));
760 }
761 case AF_INET6:
762 {
763 struct sockaddr_in6 *a6 = (void *)a, *b6 = (void *)b;
764 if (a6->sin6_port != b6->sin6_port)
765 return (-1);
766 return (memcmp(&a6->sin6_addr, &b6->sin6_addr,
767 sizeof(a6->sin6_addr)));
768 }
769 default:
770 return (-1);
771 }
772
773 }
774
775 #define LI_MANDATORY_FLAGS (LI_O_FOREIGN | LI_O_V6ONLY | LI_O_V4V6)
776 /* When binding the listeners, check if a socket has been sent to us by the
777 * previous process that we could reuse, instead of creating a new one.
778 */
tcp_find_compatible_fd(struct listener * l)779 static int tcp_find_compatible_fd(struct listener *l)
780 {
781 struct xfer_sock_list *xfer_sock = xfer_sock_list;
782 int ret = -1;
783
784 while (xfer_sock) {
785 if (!compare_sockaddr(&xfer_sock->addr, &l->addr)) {
786 if ((l->interface == NULL && xfer_sock->iface == NULL) ||
787 (l->interface != NULL && xfer_sock->iface != NULL &&
788 !strcmp(l->interface, xfer_sock->iface))) {
789 if ((l->options & LI_MANDATORY_FLAGS) ==
790 (xfer_sock->options & LI_MANDATORY_FLAGS)) {
791 if ((xfer_sock->namespace == NULL &&
792 l->netns == NULL)
793 #ifdef CONFIG_HAP_NS
794 || (xfer_sock->namespace != NULL &&
795 l->netns != NULL &&
796 !strcmp(xfer_sock->namespace,
797 l->netns->node.key))
798 #endif
799 ) {
800 break;
801 }
802
803 }
804 }
805 }
806 xfer_sock = xfer_sock->next;
807 }
808 if (xfer_sock != NULL) {
809 ret = xfer_sock->fd;
810 if (xfer_sock == xfer_sock_list)
811 xfer_sock_list = xfer_sock->next;
812 if (xfer_sock->prev)
813 xfer_sock->prev->next = xfer_sock->next;
814 if (xfer_sock->next)
815 xfer_sock->next->prev = xfer_sock->prev;
816 free(xfer_sock->iface);
817 free(xfer_sock->namespace);
818 free(xfer_sock);
819 }
820 return ret;
821 }
822 #undef L1_MANDATORY_FLAGS
823
824 /* This function tries to bind a TCPv4/v6 listener. It may return a warning or
825 * an error message in <errmsg> if the message is at most <errlen> bytes long
826 * (including '\0'). Note that <errmsg> may be NULL if <errlen> is also zero.
827 * The return value is composed from ERR_ABORT, ERR_WARN,
828 * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
829 * was alright and that no message was returned. ERR_RETRYABLE means that an
830 * error occurred but that it may vanish after a retry (eg: port in use), and
831 * ERR_FATAL indicates a non-fixable error. ERR_WARN and ERR_ALERT do not alter
832 * the meaning of the error, but just indicate that a message is present which
833 * should be displayed with the respective level. Last, ERR_ABORT indicates
834 * that it's pointless to try to start other listeners. No error message is
835 * returned if errlen is NULL.
836 */
tcp_bind_listener(struct listener * listener,char * errmsg,int errlen)837 int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen)
838 {
839 __label__ tcp_return, tcp_close_return;
840 int fd, err;
841 int ext, ready;
842 socklen_t ready_len;
843 const char *msg = NULL;
844 #ifdef TCP_MAXSEG
845
846 /* Create a temporary TCP socket to get default parameters we can't
847 * guess.
848 * */
849 ready_len = sizeof(default_tcp_maxseg);
850 if (default_tcp_maxseg == -1) {
851 default_tcp_maxseg = -2;
852 fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
853 if (fd < 0)
854 ha_warning("Failed to create a temporary socket!\n");
855 else {
856 if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &default_tcp_maxseg,
857 &ready_len) == -1)
858 ha_warning("Failed to get the default value of TCP_MAXSEG\n");
859 close(fd);
860 }
861 }
862 if (default_tcp6_maxseg == -1) {
863 default_tcp6_maxseg = -2;
864 fd = socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP);
865 if (fd >= 0) {
866 if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &default_tcp6_maxseg,
867 &ready_len) == -1)
868 ha_warning("Failed ot get the default value of TCP_MAXSEG for IPv6\n");
869 close(fd);
870 }
871 }
872 #endif
873
874
875 /* ensure we never return garbage */
876 if (errlen)
877 *errmsg = 0;
878
879 if (listener->state != LI_ASSIGNED)
880 return ERR_NONE; /* already bound */
881
882 err = ERR_NONE;
883
884 if (listener->fd == -1)
885 listener->fd = tcp_find_compatible_fd(listener);
886
887 /* if the listener already has an fd assigned, then we were offered the
888 * fd by an external process (most likely the parent), and we don't want
889 * to create a new socket. However we still want to set a few flags on
890 * the socket.
891 */
892 fd = listener->fd;
893 ext = (fd >= 0);
894
895 if (!ext) {
896 fd = my_socketat(listener->netns, listener->addr.ss_family, SOCK_STREAM, IPPROTO_TCP);
897
898 if (fd == -1) {
899 err |= ERR_RETRYABLE | ERR_ALERT;
900 msg = "cannot create listening socket";
901 goto tcp_return;
902 }
903 }
904
905 if (fd >= global.maxsock) {
906 err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
907 msg = "not enough free sockets (raise '-n' parameter)";
908 goto tcp_close_return;
909 }
910
911 if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
912 err |= ERR_FATAL | ERR_ALERT;
913 msg = "cannot make socket non-blocking";
914 goto tcp_close_return;
915 }
916
917 if (!ext && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) {
918 /* not fatal but should be reported */
919 msg = "cannot do so_reuseaddr";
920 err |= ERR_ALERT;
921 }
922
923 if (listener->options & LI_O_NOLINGER)
924 setsockopt(fd, SOL_SOCKET, SO_LINGER, &nolinger, sizeof(struct linger));
925 else {
926 struct linger tmplinger;
927 socklen_t len = sizeof(tmplinger);
928 if (getsockopt(fd, SOL_SOCKET, SO_LINGER, &tmplinger, &len) == 0 &&
929 (tmplinger.l_onoff == 1 || tmplinger.l_linger == 0)) {
930 tmplinger.l_onoff = 0;
931 tmplinger.l_linger = 0;
932 setsockopt(fd, SOL_SOCKET, SO_LINGER, &tmplinger,
933 sizeof(tmplinger));
934 }
935 }
936
937 #ifdef SO_REUSEPORT
938 /* OpenBSD and Linux 3.9 support this. As it's present in old libc versions of
939 * Linux, it might return an error that we will silently ignore.
940 */
941 if (!ext && (global.tune.options & GTUNE_USE_REUSEPORT))
942 setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
943 #endif
944
945 if (!ext && (listener->options & LI_O_FOREIGN)) {
946 switch (listener->addr.ss_family) {
947 case AF_INET:
948 if (1
949 #if defined(IP_TRANSPARENT)
950 && (setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)) == -1)
951 #endif
952 #if defined(IP_FREEBIND)
953 && (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == -1)
954 #endif
955 #if defined(IP_BINDANY)
956 && (setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == -1)
957 #endif
958 #if defined(SO_BINDANY)
959 && (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == -1)
960 #endif
961 ) {
962 msg = "cannot make listening socket transparent";
963 err |= ERR_ALERT;
964 }
965 break;
966 case AF_INET6:
967 if (1
968 #if defined(IPV6_TRANSPARENT) && defined(SOL_IPV6)
969 && (setsockopt(fd, SOL_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == -1)
970 #endif
971 #if defined(IP_FREEBIND)
972 && (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == -1)
973 #endif
974 #if defined(IPV6_BINDANY)
975 && (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == -1)
976 #endif
977 #if defined(SO_BINDANY)
978 && (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == -1)
979 #endif
980 ) {
981 msg = "cannot make listening socket transparent";
982 err |= ERR_ALERT;
983 }
984 break;
985 }
986 }
987
988 #ifdef SO_BINDTODEVICE
989 /* Note: this might fail if not CAP_NET_RAW */
990 if (!ext && listener->interface) {
991 if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
992 listener->interface, strlen(listener->interface) + 1) == -1) {
993 msg = "cannot bind listener to device";
994 err |= ERR_WARN;
995 }
996 }
997 #endif
998 #if defined(TCP_MAXSEG)
999 if (listener->maxseg > 0) {
1000 if (setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG,
1001 &listener->maxseg, sizeof(listener->maxseg)) == -1) {
1002 msg = "cannot set MSS";
1003 err |= ERR_WARN;
1004 }
1005 } else if (ext) {
1006 int tmpmaxseg = -1;
1007 int defaultmss;
1008 socklen_t len = sizeof(tmpmaxseg);
1009
1010 if (listener->addr.ss_family == AF_INET)
1011 defaultmss = default_tcp_maxseg;
1012 else
1013 defaultmss = default_tcp6_maxseg;
1014
1015 getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &tmpmaxseg, &len);
1016 if (defaultmss > 0 &&
1017 tmpmaxseg != defaultmss &&
1018 setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &defaultmss, sizeof(defaultmss)) == -1) {
1019 msg = "cannot set MSS";
1020 err |= ERR_WARN;
1021 }
1022 }
1023 #endif
1024 #if defined(TCP_USER_TIMEOUT)
1025 if (listener->tcp_ut) {
1026 if (setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT,
1027 &listener->tcp_ut, sizeof(listener->tcp_ut)) == -1) {
1028 msg = "cannot set TCP User Timeout";
1029 err |= ERR_WARN;
1030 }
1031 } else
1032 setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &zero,
1033 sizeof(zero));
1034 #endif
1035 #if defined(TCP_DEFER_ACCEPT)
1036 if (listener->options & LI_O_DEF_ACCEPT) {
1037 /* defer accept by up to one second */
1038 int accept_delay = 1;
1039 if (setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &accept_delay, sizeof(accept_delay)) == -1) {
1040 msg = "cannot enable DEFER_ACCEPT";
1041 err |= ERR_WARN;
1042 }
1043 } else
1044 setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &zero,
1045 sizeof(zero));
1046 #endif
1047 #if defined(TCP_FASTOPEN)
1048 if (listener->options & LI_O_TCP_FO) {
1049 /* TFO needs a queue length, let's use the configured backlog */
1050 int qlen = listener->backlog ? listener->backlog : listener->maxconn;
1051 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) {
1052 msg = "cannot enable TCP_FASTOPEN";
1053 err |= ERR_WARN;
1054 }
1055 } else {
1056 socklen_t len;
1057 int qlen;
1058 len = sizeof(qlen);
1059 /* Only disable fast open if it was enabled, we don't want
1060 * the kernel to create a fast open queue if there's none.
1061 */
1062 if (getsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, &len) == 0 &&
1063 qlen != 0) {
1064 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &zero,
1065 sizeof(zero)) == -1) {
1066 msg = "cannot disable TCP_FASTOPEN";
1067 err |= ERR_WARN;
1068 }
1069 }
1070 }
1071 #endif
1072 #if defined(IPV6_V6ONLY)
1073 if (listener->options & LI_O_V6ONLY)
1074 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one));
1075 else if (listener->options & LI_O_V4V6)
1076 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero));
1077 #endif
1078
1079 if (!ext && bind(fd, (struct sockaddr *)&listener->addr, listener->proto->sock_addrlen) == -1) {
1080 err |= ERR_RETRYABLE | ERR_ALERT;
1081 msg = "cannot bind socket";
1082 goto tcp_close_return;
1083 }
1084
1085 ready = 0;
1086 ready_len = sizeof(ready);
1087 if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &ready, &ready_len) == -1)
1088 ready = 0;
1089
1090 if (!(ext && ready) && /* only listen if not already done by external process */
1091 listen(fd, listener->backlog ? listener->backlog : listener->maxconn) == -1) {
1092 err |= ERR_RETRYABLE | ERR_ALERT;
1093 msg = "cannot listen to socket";
1094 goto tcp_close_return;
1095 }
1096
1097 #if defined(TCP_QUICKACK)
1098 if (listener->options & LI_O_NOQUICKACK)
1099 setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &zero, sizeof(zero));
1100 else
1101 setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &one, sizeof(one));
1102 #endif
1103
1104 /* the socket is ready */
1105 listener->fd = fd;
1106 listener->state = LI_LISTEN;
1107
1108 fdtab[fd].owner = listener; /* reference the listener instead of a task */
1109 fdtab[fd].iocb = listener->proto->accept;
1110 if (listener->bind_conf->bind_thread[relative_pid-1])
1111 fd_insert(fd, listener->bind_conf->bind_thread[relative_pid-1]);
1112 else
1113 fd_insert(fd, MAX_THREADS_MASK);
1114
1115 tcp_return:
1116 if (msg && errlen) {
1117 char pn[INET6_ADDRSTRLEN];
1118
1119 addr_to_str(&listener->addr, pn, sizeof(pn));
1120 snprintf(errmsg, errlen, "%s [%s:%d]", msg, pn, get_host_port(&listener->addr));
1121 }
1122 return err;
1123
1124 tcp_close_return:
1125 close(fd);
1126 goto tcp_return;
1127 }
1128
1129 /* This function creates all TCP sockets bound to the protocol entry <proto>.
1130 * It is intended to be used as the protocol's bind_all() function.
1131 * The sockets will be registered but not added to any fd_set, in order not to
1132 * loose them across the fork(). A call to enable_all_listeners() is needed
1133 * to complete initialization. The return value is composed from ERR_*.
1134 *
1135 * Must be called with proto_lock held.
1136 *
1137 */
tcp_bind_listeners(struct protocol * proto,char * errmsg,int errlen)1138 static int tcp_bind_listeners(struct protocol *proto, char *errmsg, int errlen)
1139 {
1140 struct listener *listener;
1141 int err = ERR_NONE;
1142
1143 list_for_each_entry(listener, &proto->listeners, proto_list) {
1144 err |= tcp_bind_listener(listener, errmsg, errlen);
1145 if (err & ERR_ABORT)
1146 break;
1147 }
1148
1149 return err;
1150 }
1151
1152 /* Add <listener> to the list of tcpv4 listeners, on port <port>. The
1153 * listener's state is automatically updated from LI_INIT to LI_ASSIGNED.
1154 * The number of listeners for the protocol is updated.
1155 *
1156 * Must be called with proto_lock held.
1157 *
1158 */
tcpv4_add_listener(struct listener * listener,int port)1159 static void tcpv4_add_listener(struct listener *listener, int port)
1160 {
1161 if (listener->state != LI_INIT)
1162 return;
1163 listener->state = LI_ASSIGNED;
1164 listener->proto = &proto_tcpv4;
1165 ((struct sockaddr_in *)(&listener->addr))->sin_port = htons(port);
1166 LIST_ADDQ(&proto_tcpv4.listeners, &listener->proto_list);
1167 proto_tcpv4.nb_listeners++;
1168 }
1169
1170 /* Add <listener> to the list of tcpv6 listeners, on port <port>. The
1171 * listener's state is automatically updated from LI_INIT to LI_ASSIGNED.
1172 * The number of listeners for the protocol is updated.
1173 *
1174 * Must be called with proto_lock held.
1175 *
1176 */
tcpv6_add_listener(struct listener * listener,int port)1177 static void tcpv6_add_listener(struct listener *listener, int port)
1178 {
1179 if (listener->state != LI_INIT)
1180 return;
1181 listener->state = LI_ASSIGNED;
1182 listener->proto = &proto_tcpv6;
1183 ((struct sockaddr_in *)(&listener->addr))->sin_port = htons(port);
1184 LIST_ADDQ(&proto_tcpv6.listeners, &listener->proto_list);
1185 proto_tcpv6.nb_listeners++;
1186 }
1187
1188 /* Pause a listener. Returns < 0 in case of failure, 0 if the listener
1189 * was totally stopped, or > 0 if correctly paused.
1190 */
tcp_pause_listener(struct listener * l)1191 int tcp_pause_listener(struct listener *l)
1192 {
1193 if (shutdown(l->fd, SHUT_WR) != 0)
1194 return -1; /* Solaris dies here */
1195
1196 if (listen(l->fd, l->backlog ? l->backlog : l->maxconn) != 0)
1197 return -1; /* OpenBSD dies here */
1198
1199 if (shutdown(l->fd, SHUT_RD) != 0)
1200 return -1; /* should always be OK */
1201 return 1;
1202 }
1203
1204 /*
1205 * Execute the "set-src" action. May be called from {tcp,http}request.
1206 * It only changes the address and tries to preserve the original port. If the
1207 * previous family was neither AF_INET nor AF_INET6, the port is set to zero.
1208 */
tcp_action_req_set_src(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * s,int flags)1209 enum act_return tcp_action_req_set_src(struct act_rule *rule, struct proxy *px,
1210 struct session *sess, struct stream *s, int flags)
1211 {
1212 struct connection *cli_conn;
1213
1214 if ((cli_conn = objt_conn(sess->origin)) && conn_ctrl_ready(cli_conn)) {
1215 struct sample *smp;
1216
1217 smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_ADDR);
1218 if (smp) {
1219 int port = get_net_port(&cli_conn->addr.from);
1220
1221 if (smp->data.type == SMP_T_IPV4) {
1222 ((struct sockaddr_in *)&cli_conn->addr.from)->sin_family = AF_INET;
1223 ((struct sockaddr_in *)&cli_conn->addr.from)->sin_addr.s_addr = smp->data.u.ipv4.s_addr;
1224 ((struct sockaddr_in *)&cli_conn->addr.from)->sin_port = port;
1225 } else if (smp->data.type == SMP_T_IPV6) {
1226 ((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_family = AF_INET6;
1227 memcpy(&((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_addr, &smp->data.u.ipv6, sizeof(struct in6_addr));
1228 ((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_port = port;
1229 }
1230 }
1231 cli_conn->flags |= CO_FL_ADDR_FROM_SET;
1232 }
1233 return ACT_RET_CONT;
1234 }
1235
1236 /*
1237 * Execute the "set-dst" action. May be called from {tcp,http}request.
1238 * It only changes the address and tries to preserve the original port. If the
1239 * previous family was neither AF_INET nor AF_INET6, the port is set to zero.
1240 */
tcp_action_req_set_dst(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * s,int flags)1241 enum act_return tcp_action_req_set_dst(struct act_rule *rule, struct proxy *px,
1242 struct session *sess, struct stream *s, int flags)
1243 {
1244 struct connection *cli_conn;
1245
1246 if ((cli_conn = objt_conn(sess->origin)) && conn_ctrl_ready(cli_conn)) {
1247 struct sample *smp;
1248
1249 smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_ADDR);
1250 if (smp) {
1251 int port = get_net_port(&cli_conn->addr.to);
1252
1253 if (smp->data.type == SMP_T_IPV4) {
1254 ((struct sockaddr_in *)&cli_conn->addr.to)->sin_family = AF_INET;
1255 ((struct sockaddr_in *)&cli_conn->addr.to)->sin_addr.s_addr = smp->data.u.ipv4.s_addr;
1256 ((struct sockaddr_in *)&cli_conn->addr.to)->sin_port = port;
1257 } else if (smp->data.type == SMP_T_IPV6) {
1258 ((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_family = AF_INET6;
1259 memcpy(&((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_addr, &smp->data.u.ipv6, sizeof(struct in6_addr));
1260 ((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_port = port;
1261 }
1262 cli_conn->flags |= CO_FL_ADDR_TO_SET;
1263 }
1264 }
1265 return ACT_RET_CONT;
1266 }
1267
1268 /*
1269 * Execute the "set-src-port" action. May be called from {tcp,http}request.
1270 * We must test the sin_family before setting the port. If the address family
1271 * is neither AF_INET nor AF_INET6, the address is forced to AF_INET "0.0.0.0"
1272 * and the port is assigned.
1273 */
tcp_action_req_set_src_port(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * s,int flags)1274 enum act_return tcp_action_req_set_src_port(struct act_rule *rule, struct proxy *px,
1275 struct session *sess, struct stream *s, int flags)
1276 {
1277 struct connection *cli_conn;
1278
1279 if ((cli_conn = objt_conn(sess->origin)) && conn_ctrl_ready(cli_conn)) {
1280 struct sample *smp;
1281
1282 conn_get_from_addr(cli_conn);
1283
1284 smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
1285 if (smp) {
1286 if (cli_conn->addr.from.ss_family == AF_INET6) {
1287 ((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_port = htons(smp->data.u.sint);
1288 } else {
1289 if (cli_conn->addr.from.ss_family != AF_INET) {
1290 cli_conn->addr.from.ss_family = AF_INET;
1291 ((struct sockaddr_in *)&cli_conn->addr.from)->sin_addr.s_addr = 0;
1292 }
1293 ((struct sockaddr_in *)&cli_conn->addr.from)->sin_port = htons(smp->data.u.sint);
1294 }
1295 }
1296 }
1297 return ACT_RET_CONT;
1298 }
1299
1300 /*
1301 * Execute the "set-dst-port" action. May be called from {tcp,http}request.
1302 * We must test the sin_family before setting the port. If the address family
1303 * is neither AF_INET nor AF_INET6, the address is forced to AF_INET "0.0.0.0"
1304 * and the port is assigned.
1305 */
tcp_action_req_set_dst_port(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * s,int flags)1306 enum act_return tcp_action_req_set_dst_port(struct act_rule *rule, struct proxy *px,
1307 struct session *sess, struct stream *s, int flags)
1308 {
1309 struct connection *cli_conn;
1310
1311 if ((cli_conn = objt_conn(sess->origin)) && conn_ctrl_ready(cli_conn)) {
1312 struct sample *smp;
1313
1314 conn_get_to_addr(cli_conn);
1315
1316 smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
1317 if (smp) {
1318 if (cli_conn->addr.to.ss_family == AF_INET6) {
1319 ((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_port = htons(smp->data.u.sint);
1320 } else {
1321 if (cli_conn->addr.to.ss_family != AF_INET) {
1322 cli_conn->addr.to.ss_family = AF_INET;
1323 ((struct sockaddr_in *)&cli_conn->addr.to)->sin_addr.s_addr = 0;
1324 }
1325 ((struct sockaddr_in *)&cli_conn->addr.to)->sin_port = htons(smp->data.u.sint);
1326 }
1327 }
1328 }
1329 return ACT_RET_CONT;
1330 }
1331
1332 /* Executes the "silent-drop" action. May be called from {tcp,http}{request,response} */
tcp_exec_action_silent_drop(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * strm,int flags)1333 static enum act_return tcp_exec_action_silent_drop(struct act_rule *rule, struct proxy *px, struct session *sess, struct stream *strm, int flags)
1334 {
1335 struct connection *conn = objt_conn(sess->origin);
1336
1337 if (!conn)
1338 goto out;
1339
1340 if (!conn_ctrl_ready(conn))
1341 goto out;
1342
1343 #ifdef TCP_QUICKACK
1344 /* drain is needed only to send the quick ACK */
1345 conn_sock_drain(conn);
1346
1347 /* re-enable quickack if it was disabled to ack all data and avoid
1348 * retransmits from the client that might trigger a real reset.
1349 */
1350 setsockopt(conn->handle.fd, SOL_TCP, TCP_QUICKACK, &one, sizeof(one));
1351 #endif
1352 /* lingering must absolutely be disabled so that we don't send a
1353 * shutdown(), this is critical to the TCP_REPAIR trick. When no stream
1354 * is present, returning with ERR will cause lingering to be disabled.
1355 */
1356 if (strm)
1357 strm->si[0].flags |= SI_FL_NOLINGER;
1358
1359 /* We're on the client-facing side, we must force to disable lingering to
1360 * ensure we will use an RST exclusively and kill any pending data.
1361 */
1362 fdtab[conn->handle.fd].linger_risk = 1;
1363
1364 #ifdef TCP_REPAIR
1365 if (setsockopt(conn->handle.fd, SOL_TCP, TCP_REPAIR, &one, sizeof(one)) == 0) {
1366 /* socket will be quiet now */
1367 goto out;
1368 }
1369 #endif
1370 /* either TCP_REPAIR is not defined or it failed (eg: permissions).
1371 * Let's fall back on the TTL trick, though it only works for routed
1372 * network and has no effect on local net.
1373 */
1374 #ifdef IP_TTL
1375 if (conn->addr.from.ss_family == AF_INET)
1376 setsockopt(conn->handle.fd, SOL_IP, IP_TTL, &one, sizeof(one));
1377 #endif
1378 #ifdef IPV6_UNICAST_HOPS
1379 #if defined(SOL_IPV6)
1380 if (conn->addr.from.ss_family == AF_INET6)
1381 setsockopt(conn->handle.fd, SOL_IPV6, IPV6_UNICAST_HOPS, &one, sizeof(one));
1382 #elif defined(IPPROTO_IPV6)
1383 if (conn->addr.from.ss_family == AF_INET6)
1384 setsockopt(conn->handle.fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &one, sizeof(one));
1385 #endif
1386 #endif
1387 out:
1388 /* kill the stream if any */
1389 if (strm) {
1390 channel_abort(&strm->req);
1391 channel_abort(&strm->res);
1392 strm->req.analysers &= AN_REQ_FLT_END;
1393 strm->res.analysers &= AN_RES_FLT_END;
1394 if (strm->flags & SF_BE_ASSIGNED)
1395 HA_ATOMIC_ADD(&strm->be->be_counters.denied_req, 1);
1396 if (!(strm->flags & SF_ERR_MASK))
1397 strm->flags |= SF_ERR_PRXCOND;
1398 if (!(strm->flags & SF_FINST_MASK))
1399 strm->flags |= SF_FINST_R;
1400 }
1401
1402 HA_ATOMIC_ADD(&sess->fe->fe_counters.denied_req, 1);
1403 if (sess->listener && sess->listener->counters)
1404 HA_ATOMIC_ADD(&sess->listener->counters->denied_req, 1);
1405
1406 return ACT_RET_STOP;
1407 }
1408
1409 /* parse "set-{src,dst}[-port]" action */
tcp_parse_set_src_dst(const char ** args,int * orig_arg,struct proxy * px,struct act_rule * rule,char ** err)1410 enum act_parse_ret tcp_parse_set_src_dst(const char **args, int *orig_arg, struct proxy *px, struct act_rule *rule, char **err)
1411 {
1412 int cur_arg;
1413 struct sample_expr *expr;
1414 unsigned int where;
1415
1416 cur_arg = *orig_arg;
1417 expr = sample_parse_expr((char **)args, &cur_arg, px->conf.args.file, px->conf.args.line, err, &px->conf.args);
1418 if (!expr)
1419 return ACT_RET_PRS_ERR;
1420
1421 where = 0;
1422 if (px->cap & PR_CAP_FE)
1423 where |= SMP_VAL_FE_HRQ_HDR;
1424 if (px->cap & PR_CAP_BE)
1425 where |= SMP_VAL_BE_HRQ_HDR;
1426
1427 if (!(expr->fetch->val & where)) {
1428 memprintf(err,
1429 "fetch method '%s' extracts information from '%s', none of which is available here",
1430 args[cur_arg-1], sample_src_names(expr->fetch->use));
1431 free(expr);
1432 return ACT_RET_PRS_ERR;
1433 }
1434 rule->arg.expr = expr;
1435 rule->action = ACT_CUSTOM;
1436
1437 if (!strcmp(args[*orig_arg-1], "set-src")) {
1438 rule->action_ptr = tcp_action_req_set_src;
1439 } else if (!strcmp(args[*orig_arg-1], "set-src-port")) {
1440 rule->action_ptr = tcp_action_req_set_src_port;
1441 } else if (!strcmp(args[*orig_arg-1], "set-dst")) {
1442 rule->action_ptr = tcp_action_req_set_dst;
1443 } else if (!strcmp(args[*orig_arg-1], "set-dst-port")) {
1444 rule->action_ptr = tcp_action_req_set_dst_port;
1445 } else {
1446 return ACT_RET_PRS_ERR;
1447 }
1448
1449 (*orig_arg)++;
1450
1451 return ACT_RET_PRS_OK;
1452 }
1453
1454
1455 /* Parse a "silent-drop" action. It takes no argument. It returns ACT_RET_PRS_OK on
1456 * success, ACT_RET_PRS_ERR on error.
1457 */
tcp_parse_silent_drop(const char ** args,int * orig_arg,struct proxy * px,struct act_rule * rule,char ** err)1458 static enum act_parse_ret tcp_parse_silent_drop(const char **args, int *orig_arg, struct proxy *px,
1459 struct act_rule *rule, char **err)
1460 {
1461 rule->action = ACT_CUSTOM;
1462 rule->action_ptr = tcp_exec_action_silent_drop;
1463 return ACT_RET_PRS_OK;
1464 }
1465
1466
1467 /************************************************************************/
1468 /* All supported sample fetch functions must be declared here */
1469 /************************************************************************/
1470
1471 /* fetch the connection's source IPv4/IPv6 address */
smp_fetch_src(const struct arg * args,struct sample * smp,const char * kw,void * private)1472 int smp_fetch_src(const struct arg *args, struct sample *smp, const char *kw, void *private)
1473 {
1474 struct connection *cli_conn = objt_conn(smp->sess->origin);
1475
1476 if (!cli_conn)
1477 return 0;
1478
1479 switch (cli_conn->addr.from.ss_family) {
1480 case AF_INET:
1481 smp->data.u.ipv4 = ((struct sockaddr_in *)&cli_conn->addr.from)->sin_addr;
1482 smp->data.type = SMP_T_IPV4;
1483 break;
1484 case AF_INET6:
1485 smp->data.u.ipv6 = ((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_addr;
1486 smp->data.type = SMP_T_IPV6;
1487 break;
1488 default:
1489 return 0;
1490 }
1491
1492 smp->flags = 0;
1493 return 1;
1494 }
1495
1496 /* set temp integer to the connection's source port */
1497 static int
smp_fetch_sport(const struct arg * args,struct sample * smp,const char * k,void * private)1498 smp_fetch_sport(const struct arg *args, struct sample *smp, const char *k, void *private)
1499 {
1500 struct connection *cli_conn = objt_conn(smp->sess->origin);
1501
1502 if (!cli_conn)
1503 return 0;
1504
1505 smp->data.type = SMP_T_SINT;
1506 if (!(smp->data.u.sint = get_host_port(&cli_conn->addr.from)))
1507 return 0;
1508
1509 smp->flags = 0;
1510 return 1;
1511 }
1512
1513 /* fetch the connection's destination IPv4/IPv6 address */
1514 static int
smp_fetch_dst(const struct arg * args,struct sample * smp,const char * kw,void * private)1515 smp_fetch_dst(const struct arg *args, struct sample *smp, const char *kw, void *private)
1516 {
1517 struct connection *cli_conn = objt_conn(smp->sess->origin);
1518
1519 if (!cli_conn)
1520 return 0;
1521
1522 conn_get_to_addr(cli_conn);
1523
1524 switch (cli_conn->addr.to.ss_family) {
1525 case AF_INET:
1526 smp->data.u.ipv4 = ((struct sockaddr_in *)&cli_conn->addr.to)->sin_addr;
1527 smp->data.type = SMP_T_IPV4;
1528 break;
1529 case AF_INET6:
1530 smp->data.u.ipv6 = ((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_addr;
1531 smp->data.type = SMP_T_IPV6;
1532 break;
1533 default:
1534 return 0;
1535 }
1536
1537 smp->flags = 0;
1538 return 1;
1539 }
1540
1541 /* check if the destination address of the front connection is local to the
1542 * system or if it was intercepted.
1543 */
smp_fetch_dst_is_local(const struct arg * args,struct sample * smp,const char * kw,void * private)1544 int smp_fetch_dst_is_local(const struct arg *args, struct sample *smp, const char *kw, void *private)
1545 {
1546 struct connection *conn = objt_conn(smp->sess->origin);
1547 struct listener *li = smp->sess->listener;
1548
1549 if (!conn)
1550 return 0;
1551
1552 conn_get_to_addr(conn);
1553 if (!(conn->flags & CO_FL_ADDR_TO_SET))
1554 return 0;
1555
1556 smp->data.type = SMP_T_BOOL;
1557 smp->flags = 0;
1558 smp->data.u.sint = addr_is_local(li->netns, &conn->addr.to);
1559 return smp->data.u.sint >= 0;
1560 }
1561
1562 /* check if the source address of the front connection is local to the system
1563 * or not.
1564 */
smp_fetch_src_is_local(const struct arg * args,struct sample * smp,const char * kw,void * private)1565 int smp_fetch_src_is_local(const struct arg *args, struct sample *smp, const char *kw, void *private)
1566 {
1567 struct connection *conn = objt_conn(smp->sess->origin);
1568 struct listener *li = smp->sess->listener;
1569
1570 if (!conn)
1571 return 0;
1572
1573 conn_get_from_addr(conn);
1574 if (!(conn->flags & CO_FL_ADDR_FROM_SET))
1575 return 0;
1576
1577 smp->data.type = SMP_T_BOOL;
1578 smp->flags = 0;
1579 smp->data.u.sint = addr_is_local(li->netns, &conn->addr.from);
1580 return smp->data.u.sint >= 0;
1581 }
1582
1583 /* set temp integer to the frontend connexion's destination port */
1584 static int
smp_fetch_dport(const struct arg * args,struct sample * smp,const char * kw,void * private)1585 smp_fetch_dport(const struct arg *args, struct sample *smp, const char *kw, void *private)
1586 {
1587 struct connection *cli_conn = objt_conn(smp->sess->origin);
1588
1589 if (!cli_conn)
1590 return 0;
1591
1592 conn_get_to_addr(cli_conn);
1593
1594 smp->data.type = SMP_T_SINT;
1595 if (!(smp->data.u.sint = get_host_port(&cli_conn->addr.to)))
1596 return 0;
1597
1598 smp->flags = 0;
1599 return 1;
1600 }
1601
1602 #ifdef TCP_INFO
1603
1604 /* Validates the arguments passed to "fc_*" fetch keywords returning a time
1605 * value. These keywords support an optional string representing the unit of the
1606 * result: "us" for microseconds and "ms" for milliseconds". Returns 0 on error
1607 * and non-zero if OK.
1608 */
val_fc_time_value(struct arg * args,char ** err)1609 static int val_fc_time_value(struct arg *args, char **err)
1610 {
1611 if (args[0].type == ARGT_STR) {
1612 if (strcmp(args[0].data.str.str, "us") == 0) {
1613 free(args[0].data.str.str);
1614 args[0].type = ARGT_SINT;
1615 args[0].data.sint = TIME_UNIT_US;
1616 }
1617 else if (strcmp(args[0].data.str.str, "ms") == 0) {
1618 free(args[0].data.str.str);
1619 args[0].type = ARGT_SINT;
1620 args[0].data.sint = TIME_UNIT_MS;
1621 }
1622 else {
1623 memprintf(err, "expects 'us' or 'ms', got '%s'",
1624 args[0].data.str.str);
1625 return 0;
1626 }
1627 }
1628 else {
1629 memprintf(err, "Unexpected arg type");
1630 return 0;
1631 }
1632
1633 return 1;
1634 }
1635
1636 /* Validates the arguments passed to "fc_*" fetch keywords returning a
1637 * counter. These keywords should be used without any keyword, but because of a
1638 * bug in previous versions, an optional string argument may be passed. In such
1639 * case, the argument is ignored and a warning is emitted. Returns 0 on error
1640 * and non-zero if OK.
1641 */
var_fc_counter(struct arg * args,char ** err)1642 static int var_fc_counter(struct arg *args, char **err)
1643 {
1644 if (args[0].type != ARGT_STOP) {
1645 ha_warning("no argument supported for 'fc_*' sample expressions returning counters.\n");
1646 if (args[0].type == ARGT_STR)
1647 free(args[0].data.str.str);
1648 args[0].type = ARGT_STOP;
1649 }
1650
1651 return 1;
1652 }
1653
1654 /* Returns some tcp_info data if it's available. "dir" must be set to 0 if
1655 * the client connection is required, otherwise it is set to 1. "val" represents
1656 * the required value.
1657 * If the function fails it returns 0, otherwise it returns 1 and "result" is filled.
1658 */
get_tcp_info(const struct arg * args,struct sample * smp,int dir,int val)1659 static inline int get_tcp_info(const struct arg *args, struct sample *smp,
1660 int dir, int val)
1661 {
1662 struct connection *conn;
1663 struct tcp_info info;
1664 socklen_t optlen;
1665
1666 /* strm can be null. */
1667 if (!smp->strm)
1668 return 0;
1669
1670 /* get the object associated with the stream interface.The
1671 * object can be other thing than a connection. For example,
1672 * it be a appctx. */
1673 conn = cs_conn(objt_cs(smp->strm->si[dir].end));
1674 if (!conn)
1675 return 0;
1676
1677 /* The fd may not be available for the tcp_info struct, and the
1678 syscal can fail. */
1679 optlen = sizeof(info);
1680 if (getsockopt(conn->handle.fd, SOL_TCP, TCP_INFO, &info, &optlen) == -1)
1681 return 0;
1682
1683 /* extract the value. */
1684 smp->data.type = SMP_T_SINT;
1685 switch (val) {
1686 case 0: smp->data.u.sint = info.tcpi_rtt; break;
1687 case 1: smp->data.u.sint = info.tcpi_rttvar; break;
1688 #if defined(__linux__)
1689 /* these ones are common to all Linux versions */
1690 case 2: smp->data.u.sint = info.tcpi_unacked; break;
1691 case 3: smp->data.u.sint = info.tcpi_sacked; break;
1692 case 4: smp->data.u.sint = info.tcpi_lost; break;
1693 case 5: smp->data.u.sint = info.tcpi_retrans; break;
1694 case 6: smp->data.u.sint = info.tcpi_fackets; break;
1695 case 7: smp->data.u.sint = info.tcpi_reordering; break;
1696 #elif defined(__FreeBSD__) || defined(__NetBSD__)
1697 /* the ones are found on FreeBSD and NetBSD featuring TCP_INFO */
1698 case 2: smp->data.u.sint = info.__tcpi_unacked; break;
1699 case 3: smp->data.u.sint = info.__tcpi_sacked; break;
1700 case 4: smp->data.u.sint = info.__tcpi_lost; break;
1701 case 5: smp->data.u.sint = info.__tcpi_retrans; break;
1702 case 6: smp->data.u.sint = info.__tcpi_fackets; break;
1703 case 7: smp->data.u.sint = info.__tcpi_reordering; break;
1704 #endif
1705 default: return 0;
1706 }
1707
1708 return 1;
1709 }
1710
1711 /* get the mean rtt of a client connexion */
1712 static int
smp_fetch_fc_rtt(const struct arg * args,struct sample * smp,const char * kw,void * private)1713 smp_fetch_fc_rtt(const struct arg *args, struct sample *smp, const char *kw, void *private)
1714 {
1715 if (!get_tcp_info(args, smp, 0, 0))
1716 return 0;
1717
1718 /* By default or if explicitly specified, convert rtt to ms */
1719 if (!args || args[0].type == ARGT_STOP || args[0].data.sint == TIME_UNIT_MS)
1720 smp->data.u.sint = (smp->data.u.sint + 500) / 1000;
1721
1722 return 1;
1723 }
1724
1725 /* get the variance of the mean rtt of a client connexion */
1726 static int
smp_fetch_fc_rttvar(const struct arg * args,struct sample * smp,const char * kw,void * private)1727 smp_fetch_fc_rttvar(const struct arg *args, struct sample *smp, const char *kw, void *private)
1728 {
1729 if (!get_tcp_info(args, smp, 0, 1))
1730 return 0;
1731
1732 /* By default or if explicitly specified, convert rttvar to ms */
1733 if (!args || args[0].type == ARGT_STOP || args[0].data.sint == TIME_UNIT_MS)
1734 smp->data.u.sint = (smp->data.u.sint + 500) / 1000;
1735
1736 return 1;
1737 }
1738
1739 #if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__)
1740
1741 /* get the unacked counter on a client connexion */
1742 static int
smp_fetch_fc_unacked(const struct arg * args,struct sample * smp,const char * kw,void * private)1743 smp_fetch_fc_unacked(const struct arg *args, struct sample *smp, const char *kw, void *private)
1744 {
1745 if (!get_tcp_info(args, smp, 0, 2))
1746 return 0;
1747 return 1;
1748 }
1749
1750 /* get the sacked counter on a client connexion */
1751 static int
smp_fetch_fc_sacked(const struct arg * args,struct sample * smp,const char * kw,void * private)1752 smp_fetch_fc_sacked(const struct arg *args, struct sample *smp, const char *kw, void *private)
1753 {
1754 if (!get_tcp_info(args, smp, 0, 3))
1755 return 0;
1756 return 1;
1757 }
1758
1759 /* get the lost counter on a client connexion */
1760 static int
smp_fetch_fc_lost(const struct arg * args,struct sample * smp,const char * kw,void * private)1761 smp_fetch_fc_lost(const struct arg *args, struct sample *smp, const char *kw, void *private)
1762 {
1763 if (!get_tcp_info(args, smp, 0, 4))
1764 return 0;
1765 return 1;
1766 }
1767
1768 /* get the retrans counter on a client connexion */
1769 static int
smp_fetch_fc_retrans(const struct arg * args,struct sample * smp,const char * kw,void * private)1770 smp_fetch_fc_retrans(const struct arg *args, struct sample *smp, const char *kw, void *private)
1771 {
1772 if (!get_tcp_info(args, smp, 0, 5))
1773 return 0;
1774 return 1;
1775 }
1776
1777 /* get the fackets counter on a client connexion */
1778 static int
smp_fetch_fc_fackets(const struct arg * args,struct sample * smp,const char * kw,void * private)1779 smp_fetch_fc_fackets(const struct arg *args, struct sample *smp, const char *kw, void *private)
1780 {
1781 if (!get_tcp_info(args, smp, 0, 6))
1782 return 0;
1783 return 1;
1784 }
1785
1786 /* get the reordering counter on a client connexion */
1787 static int
smp_fetch_fc_reordering(const struct arg * args,struct sample * smp,const char * kw,void * private)1788 smp_fetch_fc_reordering(const struct arg *args, struct sample *smp, const char *kw, void *private)
1789 {
1790 if (!get_tcp_info(args, smp, 0, 7))
1791 return 0;
1792 return 1;
1793 }
1794 #endif // linux || freebsd || netbsd
1795 #endif // TCP_INFO
1796
1797 #ifdef IPV6_V6ONLY
1798 /* parse the "v4v6" bind keyword */
bind_parse_v4v6(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1799 static int bind_parse_v4v6(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1800 {
1801 struct listener *l;
1802
1803 list_for_each_entry(l, &conf->listeners, by_bind) {
1804 if (l->addr.ss_family == AF_INET6)
1805 l->options |= LI_O_V4V6;
1806 }
1807
1808 return 0;
1809 }
1810
1811 /* parse the "v6only" bind keyword */
bind_parse_v6only(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1812 static int bind_parse_v6only(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1813 {
1814 struct listener *l;
1815
1816 list_for_each_entry(l, &conf->listeners, by_bind) {
1817 if (l->addr.ss_family == AF_INET6)
1818 l->options |= LI_O_V6ONLY;
1819 }
1820
1821 return 0;
1822 }
1823 #endif
1824
1825 #ifdef CONFIG_HAP_TRANSPARENT
1826 /* parse the "transparent" bind keyword */
bind_parse_transparent(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1827 static int bind_parse_transparent(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1828 {
1829 struct listener *l;
1830
1831 list_for_each_entry(l, &conf->listeners, by_bind) {
1832 if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1833 l->options |= LI_O_FOREIGN;
1834 }
1835
1836 return 0;
1837 }
1838 #endif
1839
1840 #ifdef TCP_DEFER_ACCEPT
1841 /* parse the "defer-accept" bind keyword */
bind_parse_defer_accept(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1842 static int bind_parse_defer_accept(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1843 {
1844 struct listener *l;
1845
1846 list_for_each_entry(l, &conf->listeners, by_bind) {
1847 if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1848 l->options |= LI_O_DEF_ACCEPT;
1849 }
1850
1851 return 0;
1852 }
1853 #endif
1854
1855 #ifdef TCP_FASTOPEN
1856 /* parse the "tfo" bind keyword */
bind_parse_tfo(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1857 static int bind_parse_tfo(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1858 {
1859 struct listener *l;
1860
1861 list_for_each_entry(l, &conf->listeners, by_bind) {
1862 if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1863 l->options |= LI_O_TCP_FO;
1864 }
1865
1866 return 0;
1867 }
1868 #endif
1869
1870 #ifdef TCP_MAXSEG
1871 /* parse the "mss" bind keyword */
bind_parse_mss(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1872 static int bind_parse_mss(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1873 {
1874 struct listener *l;
1875 int mss;
1876
1877 if (!*args[cur_arg + 1]) {
1878 memprintf(err, "'%s' : missing MSS value", args[cur_arg]);
1879 return ERR_ALERT | ERR_FATAL;
1880 }
1881
1882 mss = atoi(args[cur_arg + 1]);
1883 if (!mss || abs(mss) > 65535) {
1884 memprintf(err, "'%s' : expects an MSS with and absolute value between 1 and 65535", args[cur_arg]);
1885 return ERR_ALERT | ERR_FATAL;
1886 }
1887
1888 list_for_each_entry(l, &conf->listeners, by_bind) {
1889 if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1890 l->maxseg = mss;
1891 }
1892
1893 return 0;
1894 }
1895 #endif
1896
1897 #ifdef TCP_USER_TIMEOUT
1898 /* parse the "tcp-ut" bind keyword */
bind_parse_tcp_ut(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1899 static int bind_parse_tcp_ut(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1900 {
1901 const char *ptr = NULL;
1902 struct listener *l;
1903 unsigned int timeout;
1904
1905 if (!*args[cur_arg + 1]) {
1906 memprintf(err, "'%s' : missing TCP User Timeout value", args[cur_arg]);
1907 return ERR_ALERT | ERR_FATAL;
1908 }
1909
1910 ptr = parse_time_err(args[cur_arg + 1], &timeout, TIME_UNIT_MS);
1911 if (ptr) {
1912 memprintf(err, "'%s' : expects a positive delay in milliseconds", args[cur_arg]);
1913 return ERR_ALERT | ERR_FATAL;
1914 }
1915
1916 list_for_each_entry(l, &conf->listeners, by_bind) {
1917 if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1918 l->tcp_ut = timeout;
1919 }
1920
1921 return 0;
1922 }
1923 #endif
1924
1925 #ifdef SO_BINDTODEVICE
1926 /* parse the "interface" bind keyword */
bind_parse_interface(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1927 static int bind_parse_interface(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1928 {
1929 struct listener *l;
1930
1931 if (!*args[cur_arg + 1]) {
1932 memprintf(err, "'%s' : missing interface name", args[cur_arg]);
1933 return ERR_ALERT | ERR_FATAL;
1934 }
1935
1936 list_for_each_entry(l, &conf->listeners, by_bind) {
1937 if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1938 l->interface = strdup(args[cur_arg + 1]);
1939 }
1940
1941 return 0;
1942 }
1943 #endif
1944
1945 #ifdef CONFIG_HAP_NS
1946 /* parse the "namespace" bind keyword */
bind_parse_namespace(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1947 static int bind_parse_namespace(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1948 {
1949 struct listener *l;
1950 char *namespace = NULL;
1951
1952 if (!*args[cur_arg + 1]) {
1953 memprintf(err, "'%s' : missing namespace id", args[cur_arg]);
1954 return ERR_ALERT | ERR_FATAL;
1955 }
1956 namespace = args[cur_arg + 1];
1957
1958 list_for_each_entry(l, &conf->listeners, by_bind) {
1959 l->netns = netns_store_lookup(namespace, strlen(namespace));
1960
1961 if (l->netns == NULL)
1962 l->netns = netns_store_insert(namespace);
1963
1964 if (l->netns == NULL) {
1965 ha_alert("Cannot open namespace '%s'.\n", args[cur_arg + 1]);
1966 return ERR_ALERT | ERR_FATAL;
1967 }
1968 }
1969 return 0;
1970 }
1971 #endif
1972
1973 #ifdef TCP_USER_TIMEOUT
1974 /* parse the "tcp-ut" server keyword */
srv_parse_tcp_ut(char ** args,int * cur_arg,struct proxy * px,struct server * newsrv,char ** err)1975 static int srv_parse_tcp_ut(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
1976 {
1977 const char *ptr = NULL;
1978 unsigned int timeout;
1979
1980 if (!*args[*cur_arg + 1]) {
1981 memprintf(err, "'%s' : missing TCP User Timeout value", args[*cur_arg]);
1982 return ERR_ALERT | ERR_FATAL;
1983 }
1984
1985 ptr = parse_time_err(args[*cur_arg + 1], &timeout, TIME_UNIT_MS);
1986 if (ptr) {
1987 memprintf(err, "'%s' : expects a positive delay in milliseconds", args[*cur_arg]);
1988 return ERR_ALERT | ERR_FATAL;
1989 }
1990
1991 if (newsrv->addr.ss_family == AF_INET || newsrv->addr.ss_family == AF_INET6)
1992 newsrv->tcp_ut = timeout;
1993
1994 return 0;
1995 }
1996 #endif
1997
1998
1999 /* Note: must not be declared <const> as its list will be overwritten.
2000 * Note: fetches that may return multiple types must be declared as the lowest
2001 * common denominator, the type that can be casted into all other ones. For
2002 * instance v4/v6 must be declared v4.
2003 */
2004 static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
2005 { "dst", smp_fetch_dst, 0, NULL, SMP_T_IPV4, SMP_USE_L4CLI },
2006 { "dst_is_local", smp_fetch_dst_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
2007 { "dst_port", smp_fetch_dport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
2008 { "src", smp_fetch_src, 0, NULL, SMP_T_IPV4, SMP_USE_L4CLI },
2009 { "src_is_local", smp_fetch_src_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
2010 { "src_port", smp_fetch_sport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
2011 #ifdef TCP_INFO
2012 { "fc_rtt", smp_fetch_fc_rtt, ARG1(0,STR), val_fc_time_value, SMP_T_SINT, SMP_USE_L4CLI },
2013 { "fc_rttvar", smp_fetch_fc_rttvar, ARG1(0,STR), val_fc_time_value, SMP_T_SINT, SMP_USE_L4CLI },
2014 #if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__)
2015 { "fc_unacked", smp_fetch_fc_unacked, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
2016 { "fc_sacked", smp_fetch_fc_sacked, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
2017 { "fc_retrans", smp_fetch_fc_retrans, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
2018 { "fc_fackets", smp_fetch_fc_fackets, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
2019 { "fc_lost", smp_fetch_fc_lost, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
2020 { "fc_reordering", smp_fetch_fc_reordering, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
2021 #endif // linux || freebsd || netbsd
2022 #endif // TCP_INFO
2023 { /* END */ },
2024 }};
2025
2026 /************************************************************************/
2027 /* All supported bind keywords must be declared here. */
2028 /************************************************************************/
2029
2030 /* Note: must not be declared <const> as its list will be overwritten.
2031 * Please take care of keeping this list alphabetically sorted, doing so helps
2032 * all code contributors.
2033 * Optional keywords are also declared with a NULL ->parse() function so that
2034 * the config parser can report an appropriate error when a known keyword was
2035 * not enabled.
2036 */
2037 static struct bind_kw_list bind_kws = { "TCP", { }, {
2038 #ifdef TCP_DEFER_ACCEPT
2039 { "defer-accept", bind_parse_defer_accept, 0 }, /* wait for some data for 1 second max before doing accept */
2040 #endif
2041 #ifdef SO_BINDTODEVICE
2042 { "interface", bind_parse_interface, 1 }, /* specifically bind to this interface */
2043 #endif
2044 #ifdef TCP_MAXSEG
2045 { "mss", bind_parse_mss, 1 }, /* set MSS of listening socket */
2046 #endif
2047 #ifdef TCP_USER_TIMEOUT
2048 { "tcp-ut", bind_parse_tcp_ut, 1 }, /* set User Timeout on listening socket */
2049 #endif
2050 #ifdef TCP_FASTOPEN
2051 { "tfo", bind_parse_tfo, 0 }, /* enable TCP_FASTOPEN of listening socket */
2052 #endif
2053 #ifdef CONFIG_HAP_TRANSPARENT
2054 { "transparent", bind_parse_transparent, 0 }, /* transparently bind to the specified addresses */
2055 #endif
2056 #ifdef IPV6_V6ONLY
2057 { "v4v6", bind_parse_v4v6, 0 }, /* force socket to bind to IPv4+IPv6 */
2058 { "v6only", bind_parse_v6only, 0 }, /* force socket to bind to IPv6 only */
2059 #endif
2060 #ifdef CONFIG_HAP_NS
2061 { "namespace", bind_parse_namespace, 1 },
2062 #endif
2063 /* the versions with the NULL parse function*/
2064 { "defer-accept", NULL, 0 },
2065 { "interface", NULL, 1 },
2066 { "mss", NULL, 1 },
2067 { "transparent", NULL, 0 },
2068 { "v4v6", NULL, 0 },
2069 { "v6only", NULL, 0 },
2070 { NULL, NULL, 0 },
2071 }};
2072
2073 static struct srv_kw_list srv_kws = { "TCP", { }, {
2074 #ifdef TCP_USER_TIMEOUT
2075 { "tcp-ut", srv_parse_tcp_ut, 1, 1 }, /* set TCP user timeout on server */
2076 #endif
2077 { NULL, NULL, 0 },
2078 }};
2079
2080 static struct action_kw_list tcp_req_conn_actions = {ILH, {
2081 { "silent-drop", tcp_parse_silent_drop },
2082 { "set-src", tcp_parse_set_src_dst },
2083 { "set-src-port", tcp_parse_set_src_dst },
2084 { "set-dst" , tcp_parse_set_src_dst },
2085 { "set-dst-port", tcp_parse_set_src_dst },
2086 { /* END */ }
2087 }};
2088
2089 static struct action_kw_list tcp_req_sess_actions = {ILH, {
2090 { "silent-drop", tcp_parse_silent_drop },
2091 { "set-src", tcp_parse_set_src_dst },
2092 { "set-src-port", tcp_parse_set_src_dst },
2093 { "set-dst" , tcp_parse_set_src_dst },
2094 { "set-dst-port", tcp_parse_set_src_dst },
2095 { /* END */ }
2096 }};
2097
2098 static struct action_kw_list tcp_req_cont_actions = {ILH, {
2099 { "silent-drop", tcp_parse_silent_drop },
2100 { /* END */ }
2101 }};
2102
2103 static struct action_kw_list tcp_res_cont_actions = {ILH, {
2104 { "silent-drop", tcp_parse_silent_drop },
2105 { /* END */ }
2106 }};
2107
2108 static struct action_kw_list http_req_actions = {ILH, {
2109 { "silent-drop", tcp_parse_silent_drop },
2110 { "set-src", tcp_parse_set_src_dst },
2111 { "set-src-port", tcp_parse_set_src_dst },
2112 { "set-dst", tcp_parse_set_src_dst },
2113 { "set-dst-port", tcp_parse_set_src_dst },
2114 { /* END */ }
2115 }};
2116
2117 static struct action_kw_list http_res_actions = {ILH, {
2118 { "silent-drop", tcp_parse_silent_drop },
2119 { /* END */ }
2120 }};
2121
2122
2123 __attribute__((constructor))
__tcp_protocol_init(void)2124 static void __tcp_protocol_init(void)
2125 {
2126 protocol_register(&proto_tcpv4);
2127 protocol_register(&proto_tcpv6);
2128 sample_register_fetches(&sample_fetch_keywords);
2129 bind_register_keywords(&bind_kws);
2130 srv_register_keywords(&srv_kws);
2131 tcp_req_conn_keywords_register(&tcp_req_conn_actions);
2132 tcp_req_sess_keywords_register(&tcp_req_sess_actions);
2133 tcp_req_cont_keywords_register(&tcp_req_cont_actions);
2134 tcp_res_cont_keywords_register(&tcp_res_cont_actions);
2135 http_req_keywords_register(&http_req_actions);
2136 http_res_keywords_register(&http_res_actions);
2137
2138
2139 hap_register_build_opts("Built with transparent proxy support using:"
2140 #if defined(IP_TRANSPARENT)
2141 " IP_TRANSPARENT"
2142 #endif
2143 #if defined(IPV6_TRANSPARENT)
2144 " IPV6_TRANSPARENT"
2145 #endif
2146 #if defined(IP_FREEBIND)
2147 " IP_FREEBIND"
2148 #endif
2149 #if defined(IP_BINDANY)
2150 " IP_BINDANY"
2151 #endif
2152 #if defined(IPV6_BINDANY)
2153 " IPV6_BINDANY"
2154 #endif
2155 #if defined(SO_BINDANY)
2156 " SO_BINDANY"
2157 #endif
2158 "", 0);
2159 }
2160
2161
2162 /*
2163 * Local variables:
2164 * c-indent-level: 8
2165 * c-basic-offset: 8
2166 * End:
2167 */
2168