1 /*
2 * AF_INET/AF_INET6 SOCK_STREAM protocol layer (tcp)
3 *
4 * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13 /* this is to have tcp_info defined on systems using musl
14 * library, such as Alpine Linux
15 */
16 #define _GNU_SOURCE
17
18 #include <ctype.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <time.h>
25
26 #include <sys/param.h>
27 #include <sys/socket.h>
28 #include <sys/stat.h>
29 #include <sys/types.h>
30 #include <sys/un.h>
31
32 #include <netinet/tcp.h>
33 #include <netinet/in.h>
34
35 #include <common/compat.h>
36 #include <common/config.h>
37 #include <common/debug.h>
38 #include <common/errors.h>
39 #include <common/mini-clist.h>
40 #include <common/standard.h>
41 #include <common/namespace.h>
42
43 #include <types/action.h>
44 #include <types/connection.h>
45 #include <types/global.h>
46 #include <types/stream.h>
47
48 #include <proto/arg.h>
49 #include <proto/channel.h>
50 #include <proto/connection.h>
51 #include <proto/fd.h>
52 #include <proto/listener.h>
53 #include <proto/log.h>
54 #include <proto/port_range.h>
55 #include <proto/protocol.h>
56 #include <proto/proto_http.h>
57 #include <proto/proto_tcp.h>
58 #include <proto/proxy.h>
59 #include <proto/sample.h>
60 #include <proto/server.h>
61 #include <proto/task.h>
62 #include <proto/tcp_rules.h>
63
64 static int tcp_bind_listeners(struct protocol *proto, char *errmsg, int errlen);
65 static int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen);
66
67 /* Note: must not be declared <const> as its list will be overwritten */
68 static struct protocol proto_tcpv4 = {
69 .name = "tcpv4",
70 .sock_domain = AF_INET,
71 .sock_type = SOCK_STREAM,
72 .sock_prot = IPPROTO_TCP,
73 .sock_family = AF_INET,
74 .sock_addrlen = sizeof(struct sockaddr_in),
75 .l3_addrlen = 32/8,
76 .accept = &listener_accept,
77 .connect = tcp_connect_server,
78 .bind = tcp_bind_listener,
79 .bind_all = tcp_bind_listeners,
80 .unbind_all = unbind_all_listeners,
81 .enable_all = enable_all_listeners,
82 .get_src = tcp_get_src,
83 .get_dst = tcp_get_dst,
84 .drain = tcp_drain,
85 .pause = tcp_pause_listener,
86 .listeners = LIST_HEAD_INIT(proto_tcpv4.listeners),
87 .nb_listeners = 0,
88 };
89
90 /* Note: must not be declared <const> as its list will be overwritten */
91 static struct protocol proto_tcpv6 = {
92 .name = "tcpv6",
93 .sock_domain = AF_INET6,
94 .sock_type = SOCK_STREAM,
95 .sock_prot = IPPROTO_TCP,
96 .sock_family = AF_INET6,
97 .sock_addrlen = sizeof(struct sockaddr_in6),
98 .l3_addrlen = 128/8,
99 .accept = &listener_accept,
100 .connect = tcp_connect_server,
101 .bind = tcp_bind_listener,
102 .bind_all = tcp_bind_listeners,
103 .unbind_all = unbind_all_listeners,
104 .enable_all = enable_all_listeners,
105 .get_src = tcp_get_src,
106 .get_dst = tcp_get_dst,
107 .drain = tcp_drain,
108 .pause = tcp_pause_listener,
109 .listeners = LIST_HEAD_INIT(proto_tcpv6.listeners),
110 .nb_listeners = 0,
111 };
112
113 /* Binds ipv4/ipv6 address <local> to socket <fd>, unless <flags> is set, in which
114 * case we try to bind <remote>. <flags> is a 2-bit field consisting of :
115 * - 0 : ignore remote address (may even be a NULL pointer)
116 * - 1 : use provided address
117 * - 2 : use provided port
118 * - 3 : use both
119 *
120 * The function supports multiple foreign binding methods :
121 * - linux_tproxy: we directly bind to the foreign address
122 * The second one can be used as a fallback for the first one.
123 * This function returns 0 when everything's OK, 1 if it could not bind, to the
124 * local address, 2 if it could not bind to the foreign address.
125 */
tcp_bind_socket(int fd,int flags,struct sockaddr_storage * local,struct sockaddr_storage * remote)126 int tcp_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct sockaddr_storage *remote)
127 {
128 struct sockaddr_storage bind_addr;
129 int foreign_ok = 0;
130 int ret;
131 static int ip_transp_working = 1;
132 static int ip6_transp_working = 1;
133
134 switch (local->ss_family) {
135 case AF_INET:
136 if (flags && ip_transp_working) {
137 /* This deserves some explanation. Some platforms will support
138 * multiple combinations of certain methods, so we try the
139 * supported ones until one succeeds.
140 */
141 if (0
142 #if defined(IP_TRANSPARENT)
143 || (setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)) == 0)
144 #endif
145 #if defined(IP_FREEBIND)
146 || (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == 0)
147 #endif
148 #if defined(IP_BINDANY)
149 || (setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == 0)
150 #endif
151 #if defined(SO_BINDANY)
152 || (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0)
153 #endif
154 )
155 foreign_ok = 1;
156 else
157 ip_transp_working = 0;
158 }
159 break;
160 case AF_INET6:
161 if (flags && ip6_transp_working) {
162 if (0
163 #if defined(IPV6_TRANSPARENT) && defined(SOL_IPV6)
164 || (setsockopt(fd, SOL_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == 0)
165 #endif
166 #if defined(IP_FREEBIND)
167 || (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == 0)
168 #endif
169 #if defined(IPV6_BINDANY)
170 || (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == 0)
171 #endif
172 #if defined(SO_BINDANY)
173 || (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0)
174 #endif
175 )
176 foreign_ok = 1;
177 else
178 ip6_transp_working = 0;
179 }
180 break;
181 }
182
183 if (flags) {
184 memset(&bind_addr, 0, sizeof(bind_addr));
185 bind_addr.ss_family = remote->ss_family;
186 switch (remote->ss_family) {
187 case AF_INET:
188 if (flags & 1)
189 ((struct sockaddr_in *)&bind_addr)->sin_addr = ((struct sockaddr_in *)remote)->sin_addr;
190 if (flags & 2)
191 ((struct sockaddr_in *)&bind_addr)->sin_port = ((struct sockaddr_in *)remote)->sin_port;
192 break;
193 case AF_INET6:
194 if (flags & 1)
195 ((struct sockaddr_in6 *)&bind_addr)->sin6_addr = ((struct sockaddr_in6 *)remote)->sin6_addr;
196 if (flags & 2)
197 ((struct sockaddr_in6 *)&bind_addr)->sin6_port = ((struct sockaddr_in6 *)remote)->sin6_port;
198 break;
199 default:
200 /* we don't want to try to bind to an unknown address family */
201 foreign_ok = 0;
202 }
203 }
204
205 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
206 if (foreign_ok) {
207 if (is_inet_addr(&bind_addr)) {
208 ret = bind(fd, (struct sockaddr *)&bind_addr, get_addr_len(&bind_addr));
209 if (ret < 0)
210 return 2;
211 }
212 }
213 else {
214 if (is_inet_addr(local)) {
215 ret = bind(fd, (struct sockaddr *)local, get_addr_len(local));
216 if (ret < 0)
217 return 1;
218 }
219 }
220
221 if (!flags)
222 return 0;
223
224 if (!foreign_ok)
225 /* we could not bind to a foreign address */
226 return 2;
227
228 return 0;
229 }
230
create_server_socket(struct connection * conn)231 static int create_server_socket(struct connection *conn)
232 {
233 const struct netns_entry *ns = NULL;
234
235 #ifdef CONFIG_HAP_NS
236 if (objt_server(conn->target)) {
237 if (__objt_server(conn->target)->flags & SRV_F_USE_NS_FROM_PP)
238 ns = conn->proxy_netns;
239 else
240 ns = __objt_server(conn->target)->netns;
241 }
242 #endif
243 return my_socketat(ns, conn->addr.to.ss_family, SOCK_STREAM, IPPROTO_TCP);
244 }
245
246 /*
247 * This function initiates a TCP connection establishment to the target assigned
248 * to connection <conn> using (si->{target,addr.to}). A source address may be
249 * pointed to by conn->addr.from in case of transparent proxying. Normal source
250 * bind addresses are still determined locally (due to the possible need of a
251 * source port). conn->target may point either to a valid server or to a backend,
252 * depending on conn->target. Only OBJ_TYPE_PROXY and OBJ_TYPE_SERVER are
253 * supported. The <data> parameter is a boolean indicating whether there are data
254 * waiting for being sent or not, in order to adjust data write polling and on
255 * some platforms, the ability to avoid an empty initial ACK. The <delack> argument
256 * allows the caller to force using a delayed ACK when establishing the connection :
257 * - 0 = no delayed ACK unless data are advertised and backend has tcp-smart-connect
258 * - 1 = delayed ACK if backend has tcp-smart-connect, regardless of data
259 * - 2 = delayed ACK regardless of backend options
260 *
261 * Note that a pending send_proxy message accounts for data.
262 *
263 * It can return one of :
264 * - SF_ERR_NONE if everything's OK
265 * - SF_ERR_SRVTO if there are no more servers
266 * - SF_ERR_SRVCL if the connection was refused by the server
267 * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
268 * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
269 * - SF_ERR_INTERNAL for any other purely internal errors
270 * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
271 *
272 * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise
273 * it's invalid and the caller has nothing to do.
274 */
275
tcp_connect_server(struct connection * conn,int data,int delack)276 int tcp_connect_server(struct connection *conn, int data, int delack)
277 {
278 int fd;
279 struct server *srv;
280 struct proxy *be;
281 struct conn_src *src;
282
283 conn->flags = CO_FL_WAIT_L4_CONN; /* connection in progress */
284
285 switch (obj_type(conn->target)) {
286 case OBJ_TYPE_PROXY:
287 be = objt_proxy(conn->target);
288 srv = NULL;
289 break;
290 case OBJ_TYPE_SERVER:
291 srv = objt_server(conn->target);
292 be = srv->proxy;
293 break;
294 default:
295 conn->flags |= CO_FL_ERROR;
296 return SF_ERR_INTERNAL;
297 }
298
299 fd = conn->t.sock.fd = create_server_socket(conn);
300
301 if (fd == -1) {
302 qfprintf(stderr, "Cannot get a server socket.\n");
303
304 if (errno == ENFILE) {
305 conn->err_code = CO_ER_SYS_FDLIM;
306 send_log(be, LOG_EMERG,
307 "Proxy %s reached system FD limit at %d. Please check system tunables.\n",
308 be->id, maxfd);
309 }
310 else if (errno == EMFILE) {
311 conn->err_code = CO_ER_PROC_FDLIM;
312 send_log(be, LOG_EMERG,
313 "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n",
314 be->id, maxfd);
315 }
316 else if (errno == ENOBUFS || errno == ENOMEM) {
317 conn->err_code = CO_ER_SYS_MEMLIM;
318 send_log(be, LOG_EMERG,
319 "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n",
320 be->id, maxfd);
321 }
322 else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
323 conn->err_code = CO_ER_NOPROTO;
324 }
325 else
326 conn->err_code = CO_ER_SOCK_ERR;
327
328 /* this is a resource error */
329 conn->flags |= CO_FL_ERROR;
330 return SF_ERR_RESOURCE;
331 }
332
333 if (fd >= global.maxsock) {
334 /* do not log anything there, it's a normal condition when this option
335 * is used to serialize connections to a server !
336 */
337 Alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
338 close(fd);
339 conn->err_code = CO_ER_CONF_FDLIM;
340 conn->flags |= CO_FL_ERROR;
341 return SF_ERR_PRXCOND; /* it is a configuration limit */
342 }
343
344 if ((fcntl(fd, F_SETFL, O_NONBLOCK)==-1) ||
345 (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) == -1)) {
346 qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
347 close(fd);
348 conn->err_code = CO_ER_SOCK_ERR;
349 conn->flags |= CO_FL_ERROR;
350 return SF_ERR_INTERNAL;
351 }
352
353 if (be->options & PR_O_TCP_SRV_KA)
354 setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one));
355
356 /* allow specific binding :
357 * - server-specific at first
358 * - proxy-specific next
359 */
360 if (srv && srv->conn_src.opts & CO_SRC_BIND)
361 src = &srv->conn_src;
362 else if (be->conn_src.opts & CO_SRC_BIND)
363 src = &be->conn_src;
364 else
365 src = NULL;
366
367 if (src) {
368 int ret, flags = 0;
369
370 if (is_inet_addr(&conn->addr.from)) {
371 switch (src->opts & CO_SRC_TPROXY_MASK) {
372 case CO_SRC_TPROXY_CLI:
373 conn->flags |= CO_FL_PRIVATE;
374 /* fall through */
375 case CO_SRC_TPROXY_ADDR:
376 flags = 3;
377 break;
378 case CO_SRC_TPROXY_CIP:
379 case CO_SRC_TPROXY_DYN:
380 conn->flags |= CO_FL_PRIVATE;
381 flags = 1;
382 break;
383 }
384 }
385
386 #ifdef SO_BINDTODEVICE
387 /* Note: this might fail if not CAP_NET_RAW */
388 if (src->iface_name)
389 setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, src->iface_name, src->iface_len + 1);
390 #endif
391
392 if (src->sport_range) {
393 int attempts = 10; /* should be more than enough to find a spare port */
394 struct sockaddr_storage sa;
395
396 ret = 1;
397 memcpy(&sa, &src->source_addr, sizeof(sa));
398
399 do {
400 /* note: in case of retry, we may have to release a previously
401 * allocated port, hence this loop's construct.
402 */
403 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
404 fdinfo[fd].port_range = NULL;
405
406 if (!attempts)
407 break;
408 attempts--;
409
410 fdinfo[fd].local_port = port_range_alloc_port(src->sport_range);
411 if (!fdinfo[fd].local_port) {
412 conn->err_code = CO_ER_PORT_RANGE;
413 break;
414 }
415
416 fdinfo[fd].port_range = src->sport_range;
417 set_host_port(&sa, fdinfo[fd].local_port);
418
419 ret = tcp_bind_socket(fd, flags, &sa, &conn->addr.from);
420 if (ret != 0)
421 conn->err_code = CO_ER_CANT_BIND;
422 } while (ret != 0); /* binding NOK */
423 }
424 else {
425 #ifdef IP_BIND_ADDRESS_NO_PORT
426 static int bind_address_no_port = 1;
427 setsockopt(fd, SOL_IP, IP_BIND_ADDRESS_NO_PORT, (const void *) &bind_address_no_port, sizeof(int));
428 #endif
429 ret = tcp_bind_socket(fd, flags, &src->source_addr, &conn->addr.from);
430 if (ret != 0)
431 conn->err_code = CO_ER_CANT_BIND;
432 }
433
434 if (unlikely(ret != 0)) {
435 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
436 fdinfo[fd].port_range = NULL;
437 close(fd);
438
439 if (ret == 1) {
440 Alert("Cannot bind to source address before connect() for backend %s. Aborting.\n",
441 be->id);
442 send_log(be, LOG_EMERG,
443 "Cannot bind to source address before connect() for backend %s.\n",
444 be->id);
445 } else {
446 Alert("Cannot bind to tproxy source address before connect() for backend %s. Aborting.\n",
447 be->id);
448 send_log(be, LOG_EMERG,
449 "Cannot bind to tproxy source address before connect() for backend %s.\n",
450 be->id);
451 }
452 conn->flags |= CO_FL_ERROR;
453 return SF_ERR_RESOURCE;
454 }
455 }
456
457 #if defined(TCP_QUICKACK)
458 /* disabling tcp quick ack now allows the first request to leave the
459 * machine with the first ACK. We only do this if there are pending
460 * data in the buffer.
461 */
462 if (delack == 2 || ((delack || data || conn->send_proxy_ofs) && (be->options2 & PR_O2_SMARTCON)))
463 setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &zero, sizeof(zero));
464 #endif
465
466 #ifdef TCP_USER_TIMEOUT
467 /* there is not much more we can do here when it fails, it's still minor */
468 if (srv && srv->tcp_ut)
469 setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &srv->tcp_ut, sizeof(srv->tcp_ut));
470 #endif
471 if (global.tune.server_sndbuf)
472 setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf));
473
474 if (global.tune.server_rcvbuf)
475 setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
476
477 if (connect(fd, (struct sockaddr *)&conn->addr.to, get_addr_len(&conn->addr.to)) == -1) {
478 if (errno == EINPROGRESS || errno == EALREADY) {
479 /* common case, let's wait for connect status */
480 conn->flags |= CO_FL_WAIT_L4_CONN;
481 }
482 else if (errno == EISCONN) {
483 /* should normally not happen but if so, indicates that it's OK */
484 conn->flags &= ~CO_FL_WAIT_L4_CONN;
485 }
486 else if (errno == EAGAIN || errno == EADDRINUSE || errno == EADDRNOTAVAIL) {
487 char *msg;
488 if (errno == EAGAIN || errno == EADDRNOTAVAIL) {
489 msg = "no free ports";
490 conn->err_code = CO_ER_FREE_PORTS;
491 }
492 else {
493 msg = "local address already in use";
494 conn->err_code = CO_ER_ADDR_INUSE;
495 }
496
497 qfprintf(stderr,"Connect() failed for backend %s: %s.\n", be->id, msg);
498 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
499 fdinfo[fd].port_range = NULL;
500 close(fd);
501 send_log(be, LOG_ERR, "Connect() failed for backend %s: %s.\n", be->id, msg);
502 conn->flags |= CO_FL_ERROR;
503 return SF_ERR_RESOURCE;
504 } else if (errno == ETIMEDOUT) {
505 //qfprintf(stderr,"Connect(): ETIMEDOUT");
506 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
507 fdinfo[fd].port_range = NULL;
508 close(fd);
509 conn->err_code = CO_ER_SOCK_ERR;
510 conn->flags |= CO_FL_ERROR;
511 return SF_ERR_SRVTO;
512 } else {
513 // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
514 //qfprintf(stderr,"Connect(): %d", errno);
515 port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
516 fdinfo[fd].port_range = NULL;
517 close(fd);
518 conn->err_code = CO_ER_SOCK_ERR;
519 conn->flags |= CO_FL_ERROR;
520 return SF_ERR_SRVCL;
521 }
522 }
523 else {
524 /* connect() == 0, this is great! */
525 conn->flags &= ~CO_FL_WAIT_L4_CONN;
526 }
527
528 conn->flags |= CO_FL_ADDR_TO_SET;
529
530 /* Prepare to send a few handshakes related to the on-wire protocol. */
531 if (conn->send_proxy_ofs)
532 conn->flags |= CO_FL_SEND_PROXY;
533
534 conn_ctrl_init(conn); /* registers the FD */
535 fdtab[fd].linger_risk = 1; /* close hard if needed */
536
537 if (conn_xprt_init(conn) < 0) {
538 conn_force_close(conn);
539 conn->flags |= CO_FL_ERROR;
540 return SF_ERR_RESOURCE;
541 }
542
543 if (conn->flags & (CO_FL_HANDSHAKE | CO_FL_WAIT_L4_CONN)) {
544 conn_sock_want_send(conn); /* for connect status, proxy protocol or SSL */
545 }
546 else {
547 /* If there's no more handshake, we need to notify the data
548 * layer when the connection is already OK otherwise we'll have
549 * no other opportunity to do it later (eg: health checks).
550 */
551 data = 1;
552 }
553
554 if (data)
555 conn_data_want_send(conn); /* prepare to send data if any */
556
557 return SF_ERR_NONE; /* connection is OK */
558 }
559
560
561 /*
562 * Retrieves the source address for the socket <fd>, with <dir> indicating
563 * if we're a listener (=0) or an initiator (!=0). It returns 0 in case of
564 * success, -1 in case of error. The socket's source address is stored in
565 * <sa> for <salen> bytes.
566 */
tcp_get_src(int fd,struct sockaddr * sa,socklen_t salen,int dir)567 int tcp_get_src(int fd, struct sockaddr *sa, socklen_t salen, int dir)
568 {
569 if (dir)
570 return getsockname(fd, sa, &salen);
571 else
572 return getpeername(fd, sa, &salen);
573 }
574
575
576 /*
577 * Retrieves the original destination address for the socket <fd>, with <dir>
578 * indicating if we're a listener (=0) or an initiator (!=0). In the case of a
579 * listener, if the original destination address was translated, the original
580 * address is retrieved. It returns 0 in case of success, -1 in case of error.
581 * The socket's source address is stored in <sa> for <salen> bytes.
582 */
tcp_get_dst(int fd,struct sockaddr * sa,socklen_t salen,int dir)583 int tcp_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir)
584 {
585 if (dir)
586 return getpeername(fd, sa, &salen);
587 else {
588 int ret = getsockname(fd, sa, &salen);
589
590 if (ret < 0)
591 return ret;
592
593 #if defined(TPROXY) && defined(SO_ORIGINAL_DST)
594 /* For TPROXY and Netfilter's NAT, we can retrieve the original
595 * IPv4 address before DNAT/REDIRECT. We must not do that with
596 * other families because v6-mapped IPv4 addresses are still
597 * reported as v4.
598 */
599 if (((struct sockaddr_storage *)sa)->ss_family == AF_INET
600 && getsockopt(fd, SOL_IP, SO_ORIGINAL_DST, sa, &salen) == 0)
601 return 0;
602 #endif
603 return ret;
604 }
605 }
606
607 /* Tries to drain any pending incoming data from the socket to reach the
608 * receive shutdown. Returns positive if the shutdown was found, negative
609 * if EAGAIN was hit, otherwise zero. This is useful to decide whether we
610 * can close a connection cleanly are we must kill it hard.
611 */
tcp_drain(int fd)612 int tcp_drain(int fd)
613 {
614 int turns = 2;
615 int len;
616
617 while (turns) {
618 #ifdef MSG_TRUNC_CLEARS_INPUT
619 len = recv(fd, NULL, INT_MAX, MSG_DONTWAIT | MSG_NOSIGNAL | MSG_TRUNC);
620 if (len == -1 && errno == EFAULT)
621 #endif
622 len = recv(fd, trash.str, trash.size, MSG_DONTWAIT | MSG_NOSIGNAL);
623
624 if (len == 0) {
625 /* cool, shutdown received */
626 fdtab[fd].linger_risk = 0;
627 return 1;
628 }
629
630 if (len < 0) {
631 if (errno == EAGAIN) {
632 /* connection not closed yet */
633 fd_cant_recv(fd);
634 return -1;
635 }
636 if (errno == EINTR) /* oops, try again */
637 continue;
638 /* other errors indicate a dead connection, fine. */
639 fdtab[fd].linger_risk = 0;
640 return 1;
641 }
642 /* OK we read some data, let's try again once */
643 turns--;
644 }
645 /* some data are still present, give up */
646 return 0;
647 }
648
649 /* This is the callback which is set when a connection establishment is pending
650 * and we have nothing to send. It updates the FD polling status. It returns 0
651 * if it fails in a fatal way or needs to poll to go further, otherwise it
652 * returns non-zero and removes the CO_FL_WAIT_L4_CONN flag from the connection's
653 * flags. In case of error, it sets CO_FL_ERROR and leaves the error code in
654 * errno. The error checking is done in two passes in order to limit the number
655 * of syscalls in the normal case :
656 * - if POLL_ERR was reported by the poller, we check for a pending error on
657 * the socket before proceeding. If found, it's assigned to errno so that
658 * upper layers can see it.
659 * - otherwise connect() is used to check the connection state again, since
660 * the getsockopt return cannot reliably be used to know if the connection
661 * is still pending or ready. This one may often return an error as well,
662 * since we don't always have POLL_ERR (eg: OSX or cached events).
663 */
tcp_connect_probe(struct connection * conn)664 int tcp_connect_probe(struct connection *conn)
665 {
666 int fd = conn->t.sock.fd;
667 socklen_t lskerr;
668 int skerr;
669
670 if (conn->flags & CO_FL_ERROR)
671 return 0;
672
673 if (!conn_ctrl_ready(conn))
674 return 0;
675
676 if (!(conn->flags & CO_FL_WAIT_L4_CONN))
677 return 1; /* strange we were called while ready */
678
679 if (!fd_send_ready(fd))
680 return 0;
681
682 /* we might be the first witness of FD_POLL_ERR. Note that FD_POLL_HUP
683 * without FD_POLL_IN also indicates a hangup without input data meaning
684 * there was no connection.
685 */
686 if (fdtab[fd].ev & FD_POLL_ERR ||
687 (fdtab[fd].ev & (FD_POLL_IN|FD_POLL_HUP)) == FD_POLL_HUP) {
688 skerr = 0;
689 lskerr = sizeof(skerr);
690 getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
691 errno = skerr;
692 if (errno == EAGAIN)
693 errno = 0;
694 if (errno)
695 goto out_error;
696 }
697
698 /* Use connect() to check the state of the socket. This has the
699 * advantage of giving us the following info :
700 * - error
701 * - connecting (EALREADY, EINPROGRESS)
702 * - connected (EISCONN, 0)
703 */
704 if (connect(fd, (struct sockaddr *)&conn->addr.to, get_addr_len(&conn->addr.to)) < 0) {
705 if (errno == EALREADY || errno == EINPROGRESS) {
706 __conn_sock_stop_recv(conn);
707 fd_cant_send(fd);
708 return 0;
709 }
710
711 if (errno && errno != EISCONN)
712 goto out_error;
713
714 /* otherwise we're connected */
715 }
716
717 /* The FD is ready now, we'll mark the connection as complete and
718 * forward the event to the transport layer which will notify the
719 * data layer.
720 */
721 conn->flags &= ~CO_FL_WAIT_L4_CONN;
722 return 1;
723
724 out_error:
725 /* Write error on the file descriptor. Report it to the connection
726 * and disable polling on this FD.
727 */
728 fdtab[fd].linger_risk = 0;
729 conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
730 __conn_sock_stop_both(conn);
731 return 0;
732 }
733
734
735 /* This function tries to bind a TCPv4/v6 listener. It may return a warning or
736 * an error message in <errmsg> if the message is at most <errlen> bytes long
737 * (including '\0'). Note that <errmsg> may be NULL if <errlen> is also zero.
738 * The return value is composed from ERR_ABORT, ERR_WARN,
739 * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
740 * was alright and that no message was returned. ERR_RETRYABLE means that an
741 * error occurred but that it may vanish after a retry (eg: port in use), and
742 * ERR_FATAL indicates a non-fixable error. ERR_WARN and ERR_ALERT do not alter
743 * the meaning of the error, but just indicate that a message is present which
744 * should be displayed with the respective level. Last, ERR_ABORT indicates
745 * that it's pointless to try to start other listeners. No error message is
746 * returned if errlen is NULL.
747 */
tcp_bind_listener(struct listener * listener,char * errmsg,int errlen)748 int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen)
749 {
750 __label__ tcp_return, tcp_close_return;
751 int fd, err;
752 int ext, ready;
753 socklen_t ready_len;
754 const char *msg = NULL;
755
756 /* ensure we never return garbage */
757 if (errlen)
758 *errmsg = 0;
759
760 if (listener->state != LI_ASSIGNED)
761 return ERR_NONE; /* already bound */
762
763 err = ERR_NONE;
764
765 /* if the listener already has an fd assigned, then we were offered the
766 * fd by an external process (most likely the parent), and we don't want
767 * to create a new socket. However we still want to set a few flags on
768 * the socket.
769 */
770 fd = listener->fd;
771 ext = (fd >= 0);
772
773 if (!ext) {
774 fd = my_socketat(listener->netns, listener->addr.ss_family, SOCK_STREAM, IPPROTO_TCP);
775
776 if (fd == -1) {
777 err |= ERR_RETRYABLE | ERR_ALERT;
778 msg = "cannot create listening socket";
779 goto tcp_return;
780 }
781 }
782
783 if (fd >= global.maxsock) {
784 err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
785 msg = "not enough free sockets (raise '-n' parameter)";
786 goto tcp_close_return;
787 }
788
789 if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
790 err |= ERR_FATAL | ERR_ALERT;
791 msg = "cannot make socket non-blocking";
792 goto tcp_close_return;
793 }
794
795 if (!ext && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) {
796 /* not fatal but should be reported */
797 msg = "cannot do so_reuseaddr";
798 err |= ERR_ALERT;
799 }
800
801 if (listener->options & LI_O_NOLINGER)
802 setsockopt(fd, SOL_SOCKET, SO_LINGER, &nolinger, sizeof(struct linger));
803
804 #ifdef SO_REUSEPORT
805 /* OpenBSD and Linux 3.9 support this. As it's present in old libc versions of
806 * Linux, it might return an error that we will silently ignore.
807 */
808 if (!ext && (global.tune.options & GTUNE_USE_REUSEPORT))
809 setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
810 #endif
811
812 if (!ext && (listener->options & LI_O_FOREIGN)) {
813 switch (listener->addr.ss_family) {
814 case AF_INET:
815 if (1
816 #if defined(IP_TRANSPARENT)
817 && (setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)) == -1)
818 #endif
819 #if defined(IP_FREEBIND)
820 && (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == -1)
821 #endif
822 #if defined(IP_BINDANY)
823 && (setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == -1)
824 #endif
825 #if defined(SO_BINDANY)
826 && (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == -1)
827 #endif
828 ) {
829 msg = "cannot make listening socket transparent";
830 err |= ERR_ALERT;
831 }
832 break;
833 case AF_INET6:
834 if (1
835 #if defined(IPV6_TRANSPARENT) && defined(SOL_IPV6)
836 && (setsockopt(fd, SOL_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == -1)
837 #endif
838 #if defined(IP_FREEBIND)
839 && (setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == -1)
840 #endif
841 #if defined(IPV6_BINDANY)
842 && (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == -1)
843 #endif
844 #if defined(SO_BINDANY)
845 && (setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == -1)
846 #endif
847 ) {
848 msg = "cannot make listening socket transparent";
849 err |= ERR_ALERT;
850 }
851 break;
852 }
853 }
854
855 #ifdef SO_BINDTODEVICE
856 /* Note: this might fail if not CAP_NET_RAW */
857 if (!ext && listener->interface) {
858 if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
859 listener->interface, strlen(listener->interface) + 1) == -1) {
860 msg = "cannot bind listener to device";
861 err |= ERR_WARN;
862 }
863 }
864 #endif
865 #if defined(TCP_MAXSEG)
866 if (listener->maxseg > 0) {
867 if (setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG,
868 &listener->maxseg, sizeof(listener->maxseg)) == -1) {
869 msg = "cannot set MSS";
870 err |= ERR_WARN;
871 }
872 }
873 #endif
874 #if defined(TCP_USER_TIMEOUT)
875 if (listener->tcp_ut) {
876 if (setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT,
877 &listener->tcp_ut, sizeof(listener->tcp_ut)) == -1) {
878 msg = "cannot set TCP User Timeout";
879 err |= ERR_WARN;
880 }
881 }
882 #endif
883 #if defined(TCP_DEFER_ACCEPT)
884 if (listener->options & LI_O_DEF_ACCEPT) {
885 /* defer accept by up to one second */
886 int accept_delay = 1;
887 if (setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &accept_delay, sizeof(accept_delay)) == -1) {
888 msg = "cannot enable DEFER_ACCEPT";
889 err |= ERR_WARN;
890 }
891 }
892 #endif
893 #if defined(TCP_FASTOPEN)
894 if (listener->options & LI_O_TCP_FO) {
895 /* TFO needs a queue length, let's use the configured backlog */
896 int qlen = listener->backlog ? listener->backlog : listener->maxconn;
897 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) {
898 msg = "cannot enable TCP_FASTOPEN";
899 err |= ERR_WARN;
900 }
901 }
902 #endif
903 #if defined(IPV6_V6ONLY)
904 if (listener->options & LI_O_V6ONLY)
905 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one));
906 else if (listener->options & LI_O_V4V6)
907 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero));
908 #endif
909
910 if (!ext && bind(fd, (struct sockaddr *)&listener->addr, listener->proto->sock_addrlen) == -1) {
911 err |= ERR_RETRYABLE | ERR_ALERT;
912 msg = "cannot bind socket";
913 goto tcp_close_return;
914 }
915
916 ready = 0;
917 ready_len = sizeof(ready);
918 if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &ready, &ready_len) == -1)
919 ready = 0;
920
921 if (!(ext && ready) && /* only listen if not already done by external process */
922 listen(fd, listener->backlog ? listener->backlog : listener->maxconn) == -1) {
923 err |= ERR_RETRYABLE | ERR_ALERT;
924 msg = "cannot listen to socket";
925 goto tcp_close_return;
926 }
927
928 #if defined(TCP_QUICKACK)
929 if (listener->options & LI_O_NOQUICKACK)
930 setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &zero, sizeof(zero));
931 #endif
932
933 /* the socket is ready */
934 listener->fd = fd;
935 listener->state = LI_LISTEN;
936
937 fdtab[fd].owner = listener; /* reference the listener instead of a task */
938 fdtab[fd].iocb = listener->proto->accept;
939 fd_insert(fd);
940
941 tcp_return:
942 if (msg && errlen) {
943 char pn[INET6_ADDRSTRLEN];
944
945 addr_to_str(&listener->addr, pn, sizeof(pn));
946 snprintf(errmsg, errlen, "%s [%s:%d]", msg, pn, get_host_port(&listener->addr));
947 }
948 return err;
949
950 tcp_close_return:
951 close(fd);
952 goto tcp_return;
953 }
954
955 /* This function creates all TCP sockets bound to the protocol entry <proto>.
956 * It is intended to be used as the protocol's bind_all() function.
957 * The sockets will be registered but not added to any fd_set, in order not to
958 * loose them across the fork(). A call to enable_all_listeners() is needed
959 * to complete initialization. The return value is composed from ERR_*.
960 */
tcp_bind_listeners(struct protocol * proto,char * errmsg,int errlen)961 static int tcp_bind_listeners(struct protocol *proto, char *errmsg, int errlen)
962 {
963 struct listener *listener;
964 int err = ERR_NONE;
965
966 list_for_each_entry(listener, &proto->listeners, proto_list) {
967 err |= tcp_bind_listener(listener, errmsg, errlen);
968 if (err & ERR_ABORT)
969 break;
970 }
971
972 return err;
973 }
974
975 /* Add listener to the list of tcpv4 listeners. The listener's state
976 * is automatically updated from LI_INIT to LI_ASSIGNED. The number of
977 * listeners is updated. This is the function to use to add a new listener.
978 */
tcpv4_add_listener(struct listener * listener)979 void tcpv4_add_listener(struct listener *listener)
980 {
981 if (listener->state != LI_INIT)
982 return;
983 listener->state = LI_ASSIGNED;
984 listener->proto = &proto_tcpv4;
985 LIST_ADDQ(&proto_tcpv4.listeners, &listener->proto_list);
986 proto_tcpv4.nb_listeners++;
987 }
988
989 /* Add listener to the list of tcpv4 listeners. The listener's state
990 * is automatically updated from LI_INIT to LI_ASSIGNED. The number of
991 * listeners is updated. This is the function to use to add a new listener.
992 */
tcpv6_add_listener(struct listener * listener)993 void tcpv6_add_listener(struct listener *listener)
994 {
995 if (listener->state != LI_INIT)
996 return;
997 listener->state = LI_ASSIGNED;
998 listener->proto = &proto_tcpv6;
999 LIST_ADDQ(&proto_tcpv6.listeners, &listener->proto_list);
1000 proto_tcpv6.nb_listeners++;
1001 }
1002
1003 /* Pause a listener. Returns < 0 in case of failure, 0 if the listener
1004 * was totally stopped, or > 0 if correctly paused.
1005 */
tcp_pause_listener(struct listener * l)1006 int tcp_pause_listener(struct listener *l)
1007 {
1008 if (shutdown(l->fd, SHUT_WR) != 0)
1009 return -1; /* Solaris dies here */
1010
1011 if (listen(l->fd, l->backlog ? l->backlog : l->maxconn) != 0)
1012 return -1; /* OpenBSD dies here */
1013
1014 if (shutdown(l->fd, SHUT_RD) != 0)
1015 return -1; /* should always be OK */
1016 return 1;
1017 }
1018
1019 /*
1020 * Execute the "set-src" action. May be called from {tcp,http}request.
1021 * It only changes the address and tries to preserve the original port. If the
1022 * previous family was neither AF_INET nor AF_INET6, the port is set to zero.
1023 */
tcp_action_req_set_src(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * s,int flags)1024 enum act_return tcp_action_req_set_src(struct act_rule *rule, struct proxy *px,
1025 struct session *sess, struct stream *s, int flags)
1026 {
1027 struct connection *cli_conn;
1028
1029 if ((cli_conn = objt_conn(sess->origin)) && conn_ctrl_ready(cli_conn)) {
1030 struct sample *smp;
1031
1032 smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_ADDR);
1033 if (smp) {
1034 int port = get_net_port(&cli_conn->addr.from);
1035
1036 if (smp->data.type == SMP_T_IPV4) {
1037 ((struct sockaddr_in *)&cli_conn->addr.from)->sin_family = AF_INET;
1038 ((struct sockaddr_in *)&cli_conn->addr.from)->sin_addr.s_addr = smp->data.u.ipv4.s_addr;
1039 ((struct sockaddr_in *)&cli_conn->addr.from)->sin_port = port;
1040 } else if (smp->data.type == SMP_T_IPV6) {
1041 ((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_family = AF_INET6;
1042 memcpy(&((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_addr, &smp->data.u.ipv6, sizeof(struct in6_addr));
1043 ((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_port = port;
1044 }
1045 }
1046 cli_conn->flags |= CO_FL_ADDR_FROM_SET;
1047 }
1048 return ACT_RET_CONT;
1049 }
1050
1051 /*
1052 * Execute the "set-dst" action. May be called from {tcp,http}request.
1053 * It only changes the address and tries to preserve the original port. If the
1054 * previous family was neither AF_INET nor AF_INET6, the port is set to zero.
1055 */
tcp_action_req_set_dst(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * s,int flags)1056 enum act_return tcp_action_req_set_dst(struct act_rule *rule, struct proxy *px,
1057 struct session *sess, struct stream *s, int flags)
1058 {
1059 struct connection *cli_conn;
1060
1061 if ((cli_conn = objt_conn(sess->origin)) && conn_ctrl_ready(cli_conn)) {
1062 struct sample *smp;
1063
1064 smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_ADDR);
1065 if (smp) {
1066 int port = get_net_port(&cli_conn->addr.to);
1067
1068 if (smp->data.type == SMP_T_IPV4) {
1069 ((struct sockaddr_in *)&cli_conn->addr.to)->sin_family = AF_INET;
1070 ((struct sockaddr_in *)&cli_conn->addr.to)->sin_addr.s_addr = smp->data.u.ipv4.s_addr;
1071 ((struct sockaddr_in *)&cli_conn->addr.to)->sin_port = port;
1072 } else if (smp->data.type == SMP_T_IPV6) {
1073 ((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_family = AF_INET6;
1074 memcpy(&((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_addr, &smp->data.u.ipv6, sizeof(struct in6_addr));
1075 ((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_port = port;
1076 }
1077 cli_conn->flags |= CO_FL_ADDR_TO_SET;
1078 }
1079 }
1080 return ACT_RET_CONT;
1081 }
1082
1083 /*
1084 * Execute the "set-src-port" action. May be called from {tcp,http}request.
1085 * We must test the sin_family before setting the port. If the address family
1086 * is neither AF_INET nor AF_INET6, the address is forced to AF_INET "0.0.0.0"
1087 * and the port is assigned.
1088 */
tcp_action_req_set_src_port(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * s,int flags)1089 enum act_return tcp_action_req_set_src_port(struct act_rule *rule, struct proxy *px,
1090 struct session *sess, struct stream *s, int flags)
1091 {
1092 struct connection *cli_conn;
1093
1094 if ((cli_conn = objt_conn(sess->origin)) && conn_ctrl_ready(cli_conn)) {
1095 struct sample *smp;
1096
1097 conn_get_from_addr(cli_conn);
1098
1099 smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
1100 if (smp) {
1101 if (cli_conn->addr.from.ss_family == AF_INET6) {
1102 ((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_port = htons(smp->data.u.sint);
1103 } else {
1104 if (cli_conn->addr.from.ss_family != AF_INET) {
1105 cli_conn->addr.from.ss_family = AF_INET;
1106 ((struct sockaddr_in *)&cli_conn->addr.from)->sin_addr.s_addr = 0;
1107 }
1108 ((struct sockaddr_in *)&cli_conn->addr.from)->sin_port = htons(smp->data.u.sint);
1109 }
1110 }
1111 }
1112 return ACT_RET_CONT;
1113 }
1114
1115 /*
1116 * Execute the "set-dst-port" action. May be called from {tcp,http}request.
1117 * We must test the sin_family before setting the port. If the address family
1118 * is neither AF_INET nor AF_INET6, the address is forced to AF_INET "0.0.0.0"
1119 * and the port is assigned.
1120 */
tcp_action_req_set_dst_port(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * s,int flags)1121 enum act_return tcp_action_req_set_dst_port(struct act_rule *rule, struct proxy *px,
1122 struct session *sess, struct stream *s, int flags)
1123 {
1124 struct connection *cli_conn;
1125
1126 if ((cli_conn = objt_conn(sess->origin)) && conn_ctrl_ready(cli_conn)) {
1127 struct sample *smp;
1128
1129 conn_get_to_addr(cli_conn);
1130
1131 smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
1132 if (smp) {
1133 if (cli_conn->addr.to.ss_family == AF_INET6) {
1134 ((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_port = htons(smp->data.u.sint);
1135 } else {
1136 if (cli_conn->addr.to.ss_family != AF_INET) {
1137 cli_conn->addr.to.ss_family = AF_INET;
1138 ((struct sockaddr_in *)&cli_conn->addr.to)->sin_addr.s_addr = 0;
1139 }
1140 ((struct sockaddr_in *)&cli_conn->addr.to)->sin_port = htons(smp->data.u.sint);
1141 }
1142 }
1143 }
1144 return ACT_RET_CONT;
1145 }
1146
1147 /* Executes the "silent-drop" action. May be called from {tcp,http}{request,response} */
tcp_exec_action_silent_drop(struct act_rule * rule,struct proxy * px,struct session * sess,struct stream * strm,int flags)1148 static enum act_return tcp_exec_action_silent_drop(struct act_rule *rule, struct proxy *px, struct session *sess, struct stream *strm, int flags)
1149 {
1150 struct connection *conn = objt_conn(sess->origin);
1151
1152 if (!conn)
1153 goto out;
1154
1155 if (!conn_ctrl_ready(conn))
1156 goto out;
1157
1158 #ifdef TCP_QUICKACK
1159 /* drain is needed only to send the quick ACK */
1160 conn_sock_drain(conn);
1161
1162 /* re-enable quickack if it was disabled to ack all data and avoid
1163 * retransmits from the client that might trigger a real reset.
1164 */
1165 setsockopt(conn->t.sock.fd, SOL_TCP, TCP_QUICKACK, &one, sizeof(one));
1166 #endif
1167 /* lingering must absolutely be disabled so that we don't send a
1168 * shutdown(), this is critical to the TCP_REPAIR trick. When no stream
1169 * is present, returning with ERR will cause lingering to be disabled.
1170 */
1171 if (strm)
1172 strm->si[0].flags |= SI_FL_NOLINGER;
1173
1174 /* We're on the client-facing side, we must force to disable lingering to
1175 * ensure we will use an RST exclusively and kill any pending data.
1176 */
1177 fdtab[conn->t.sock.fd].linger_risk = 1;
1178
1179 #ifdef TCP_REPAIR
1180 if (setsockopt(conn->t.sock.fd, SOL_TCP, TCP_REPAIR, &one, sizeof(one)) == 0) {
1181 /* socket will be quiet now */
1182 goto out;
1183 }
1184 #endif
1185 /* either TCP_REPAIR is not defined or it failed (eg: permissions).
1186 * Let's fall back on the TTL trick, though it only works for routed
1187 * network and has no effect on local net.
1188 */
1189 #ifdef IP_TTL
1190 if (conn->addr.from.ss_family == AF_INET)
1191 setsockopt(conn->t.sock.fd, SOL_IP, IP_TTL, &one, sizeof(one));
1192 #endif
1193 #ifdef IPV6_UNICAST_HOPS
1194 #if defined(SOL_IPV6)
1195 if (conn->addr.from.ss_family == AF_INET6)
1196 setsockopt(conn->t.sock.fd, SOL_IPV6, IPV6_UNICAST_HOPS, &one, sizeof(one));
1197 #elif defined(IPPROTO_IPV6)
1198 if (conn->addr.from.ss_family == AF_INET6)
1199 setsockopt(conn->t.sock.fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &one, sizeof(one));
1200 #endif
1201 #endif
1202 out:
1203 /* kill the stream if any */
1204 if (strm) {
1205 channel_abort(&strm->req);
1206 channel_abort(&strm->res);
1207 strm->req.analysers &= AN_REQ_FLT_END;
1208 strm->res.analysers &= AN_RES_FLT_END;
1209 if (strm->flags & SF_BE_ASSIGNED)
1210 strm->be->be_counters.denied_req++;
1211 if (!(strm->flags & SF_ERR_MASK))
1212 strm->flags |= SF_ERR_PRXCOND;
1213 if (!(strm->flags & SF_FINST_MASK))
1214 strm->flags |= SF_FINST_R;
1215 }
1216
1217 sess->fe->fe_counters.denied_req++;
1218 if (sess->listener->counters)
1219 sess->listener->counters->denied_req++;
1220
1221 return ACT_RET_STOP;
1222 }
1223
1224 /* parse "set-{src,dst}[-port]" action */
tcp_parse_set_src_dst(const char ** args,int * orig_arg,struct proxy * px,struct act_rule * rule,char ** err)1225 enum act_parse_ret tcp_parse_set_src_dst(const char **args, int *orig_arg, struct proxy *px, struct act_rule *rule, char **err)
1226 {
1227 int cur_arg;
1228 struct sample_expr *expr;
1229 unsigned int where;
1230
1231 cur_arg = *orig_arg;
1232 expr = sample_parse_expr((char **)args, &cur_arg, px->conf.args.file, px->conf.args.line, err, &px->conf.args);
1233 if (!expr)
1234 return ACT_RET_PRS_ERR;
1235
1236 where = 0;
1237 if (proxy->cap & PR_CAP_FE)
1238 where |= SMP_VAL_FE_HRQ_HDR;
1239 if (proxy->cap & PR_CAP_BE)
1240 where |= SMP_VAL_BE_HRQ_HDR;
1241
1242 if (!(expr->fetch->val & where)) {
1243 memprintf(err,
1244 "fetch method '%s' extracts information from '%s', none of which is available here",
1245 args[cur_arg-1], sample_src_names(expr->fetch->use));
1246 free(expr);
1247 return ACT_RET_PRS_ERR;
1248 }
1249 rule->arg.expr = expr;
1250 rule->action = ACT_CUSTOM;
1251
1252 if (!strcmp(args[*orig_arg-1], "set-src")) {
1253 rule->action_ptr = tcp_action_req_set_src;
1254 } else if (!strcmp(args[*orig_arg-1], "set-src-port")) {
1255 rule->action_ptr = tcp_action_req_set_src_port;
1256 } else if (!strcmp(args[*orig_arg-1], "set-dst")) {
1257 rule->action_ptr = tcp_action_req_set_dst;
1258 } else if (!strcmp(args[*orig_arg-1], "set-dst-port")) {
1259 rule->action_ptr = tcp_action_req_set_dst_port;
1260 } else {
1261 return ACT_RET_PRS_ERR;
1262 }
1263
1264 (*orig_arg)++;
1265
1266 return ACT_RET_PRS_OK;
1267 }
1268
1269
1270 /* Parse a "silent-drop" action. It takes no argument. It returns ACT_RET_PRS_OK on
1271 * success, ACT_RET_PRS_ERR on error.
1272 */
tcp_parse_silent_drop(const char ** args,int * orig_arg,struct proxy * px,struct act_rule * rule,char ** err)1273 static enum act_parse_ret tcp_parse_silent_drop(const char **args, int *orig_arg, struct proxy *px,
1274 struct act_rule *rule, char **err)
1275 {
1276 rule->action = ACT_CUSTOM;
1277 rule->action_ptr = tcp_exec_action_silent_drop;
1278 return ACT_RET_PRS_OK;
1279 }
1280
1281
1282 /************************************************************************/
1283 /* All supported sample fetch functions must be declared here */
1284 /************************************************************************/
1285
1286 /* fetch the connection's source IPv4/IPv6 address */
smp_fetch_src(const struct arg * args,struct sample * smp,const char * kw,void * private)1287 int smp_fetch_src(const struct arg *args, struct sample *smp, const char *kw, void *private)
1288 {
1289 struct connection *cli_conn = objt_conn(smp->sess->origin);
1290
1291 if (!cli_conn)
1292 return 0;
1293
1294 switch (cli_conn->addr.from.ss_family) {
1295 case AF_INET:
1296 smp->data.u.ipv4 = ((struct sockaddr_in *)&cli_conn->addr.from)->sin_addr;
1297 smp->data.type = SMP_T_IPV4;
1298 break;
1299 case AF_INET6:
1300 smp->data.u.ipv6 = ((struct sockaddr_in6 *)&cli_conn->addr.from)->sin6_addr;
1301 smp->data.type = SMP_T_IPV6;
1302 break;
1303 default:
1304 return 0;
1305 }
1306
1307 smp->flags = 0;
1308 return 1;
1309 }
1310
1311 /* set temp integer to the connection's source port */
1312 static int
smp_fetch_sport(const struct arg * args,struct sample * smp,const char * k,void * private)1313 smp_fetch_sport(const struct arg *args, struct sample *smp, const char *k, void *private)
1314 {
1315 struct connection *cli_conn = objt_conn(smp->sess->origin);
1316
1317 if (!cli_conn)
1318 return 0;
1319
1320 smp->data.type = SMP_T_SINT;
1321 if (!(smp->data.u.sint = get_host_port(&cli_conn->addr.from)))
1322 return 0;
1323
1324 smp->flags = 0;
1325 return 1;
1326 }
1327
1328 /* fetch the connection's destination IPv4/IPv6 address */
1329 static int
smp_fetch_dst(const struct arg * args,struct sample * smp,const char * kw,void * private)1330 smp_fetch_dst(const struct arg *args, struct sample *smp, const char *kw, void *private)
1331 {
1332 struct connection *cli_conn = objt_conn(smp->sess->origin);
1333
1334 if (!cli_conn)
1335 return 0;
1336
1337 conn_get_to_addr(cli_conn);
1338
1339 switch (cli_conn->addr.to.ss_family) {
1340 case AF_INET:
1341 smp->data.u.ipv4 = ((struct sockaddr_in *)&cli_conn->addr.to)->sin_addr;
1342 smp->data.type = SMP_T_IPV4;
1343 break;
1344 case AF_INET6:
1345 smp->data.u.ipv6 = ((struct sockaddr_in6 *)&cli_conn->addr.to)->sin6_addr;
1346 smp->data.type = SMP_T_IPV6;
1347 break;
1348 default:
1349 return 0;
1350 }
1351
1352 smp->flags = 0;
1353 return 1;
1354 }
1355
1356 /* check if the destination address of the front connection is local to the
1357 * system or if it was intercepted.
1358 */
smp_fetch_dst_is_local(const struct arg * args,struct sample * smp,const char * kw,void * private)1359 int smp_fetch_dst_is_local(const struct arg *args, struct sample *smp, const char *kw, void *private)
1360 {
1361 struct connection *conn = objt_conn(smp->sess->origin);
1362 struct listener *li = smp->sess->listener;
1363
1364 if (!conn)
1365 return 0;
1366
1367 conn_get_to_addr(conn);
1368 if (!(conn->flags & CO_FL_ADDR_TO_SET))
1369 return 0;
1370
1371 smp->data.type = SMP_T_BOOL;
1372 smp->flags = 0;
1373 smp->data.u.sint = addr_is_local(li->netns, &conn->addr.to);
1374 return smp->data.u.sint >= 0;
1375 }
1376
1377 /* check if the source address of the front connection is local to the system
1378 * or not.
1379 */
smp_fetch_src_is_local(const struct arg * args,struct sample * smp,const char * kw,void * private)1380 int smp_fetch_src_is_local(const struct arg *args, struct sample *smp, const char *kw, void *private)
1381 {
1382 struct connection *conn = objt_conn(smp->sess->origin);
1383 struct listener *li = smp->sess->listener;
1384
1385 if (!conn)
1386 return 0;
1387
1388 conn_get_from_addr(conn);
1389 if (!(conn->flags & CO_FL_ADDR_FROM_SET))
1390 return 0;
1391
1392 smp->data.type = SMP_T_BOOL;
1393 smp->flags = 0;
1394 smp->data.u.sint = addr_is_local(li->netns, &conn->addr.from);
1395 return smp->data.u.sint >= 0;
1396 }
1397
1398 /* set temp integer to the frontend connexion's destination port */
1399 static int
smp_fetch_dport(const struct arg * args,struct sample * smp,const char * kw,void * private)1400 smp_fetch_dport(const struct arg *args, struct sample *smp, const char *kw, void *private)
1401 {
1402 struct connection *cli_conn = objt_conn(smp->sess->origin);
1403
1404 if (!cli_conn)
1405 return 0;
1406
1407 conn_get_to_addr(cli_conn);
1408
1409 smp->data.type = SMP_T_SINT;
1410 if (!(smp->data.u.sint = get_host_port(&cli_conn->addr.to)))
1411 return 0;
1412
1413 smp->flags = 0;
1414 return 1;
1415 }
1416
1417 #ifdef TCP_INFO
1418
1419 /* Validates the arguments passed to "fc_*" fetch keywords returning a time
1420 * value. These keywords support an optional string representing the unit of the
1421 * result: "us" for microseconds and "ms" for milliseconds". Returns 0 on error
1422 * and non-zero if OK.
1423 */
val_fc_time_value(struct arg * args,char ** err)1424 static int val_fc_time_value(struct arg *args, char **err)
1425 {
1426 if (args[0].type == ARGT_STR) {
1427 if (strcmp(args[0].data.str.str, "us") == 0) {
1428 free(args[0].data.str.str);
1429 args[0].type = ARGT_SINT;
1430 args[0].data.sint = TIME_UNIT_US;
1431 }
1432 else if (strcmp(args[0].data.str.str, "ms") == 0) {
1433 free(args[0].data.str.str);
1434 args[0].type = ARGT_SINT;
1435 args[0].data.sint = TIME_UNIT_MS;
1436 }
1437 else {
1438 memprintf(err, "expects 'us' or 'ms', got '%s'",
1439 args[0].data.str.str);
1440 return 0;
1441 }
1442 }
1443 else {
1444 memprintf(err, "Unexpected arg type");
1445 return 0;
1446 }
1447
1448 return 1;
1449 }
1450
1451 /* Validates the arguments passed to "fc_*" fetch keywords returning a
1452 * counter. These keywords should be used without any keyword, but because of a
1453 * bug in previous versions, an optional string argument may be passed. In such
1454 * case, the argument is ignored and a warning is emitted. Returns 0 on error
1455 * and non-zero if OK.
1456 */
var_fc_counter(struct arg * args,char ** err)1457 static int var_fc_counter(struct arg *args, char **err)
1458 {
1459 if (args[0].type != ARGT_STOP) {
1460 Warning("no argument supported for 'fc_*' sample expressions returning counters.\n");
1461 if (args[0].type == ARGT_STR)
1462 free(args[0].data.str.str);
1463 args[0].type = ARGT_STOP;
1464 }
1465
1466 return 1;
1467 }
1468
1469 /* Returns some tcp_info data if it's available. "dir" must be set to 0 if
1470 * the client connection is required, otherwise it is set to 1. "val" represents
1471 * the required value.
1472 * If the function fails it returns 0, otherwise it returns 1 and "result" is filled.
1473 */
get_tcp_info(const struct arg * args,struct sample * smp,int dir,int val)1474 static inline int get_tcp_info(const struct arg *args, struct sample *smp,
1475 int dir, int val)
1476 {
1477 struct connection *conn;
1478 struct tcp_info info;
1479 socklen_t optlen;
1480
1481 /* strm can be null. */
1482 if (!smp->strm)
1483 return 0;
1484
1485 /* get the object associated with the stream interface.The
1486 * object can be other thing than a connection. For example,
1487 * it be a appctx. */
1488 conn = objt_conn(smp->strm->si[dir].end);
1489 if (!conn)
1490 return 0;
1491
1492 /* The fd may not be avalaible for the tcp_info struct, and the
1493 syscal can fail. */
1494 optlen = sizeof(info);
1495 if (getsockopt(conn->t.sock.fd, SOL_TCP, TCP_INFO, &info, &optlen) == -1)
1496 return 0;
1497
1498 /* extract the value. */
1499 smp->data.type = SMP_T_SINT;
1500 switch (val) {
1501 case 0: smp->data.u.sint = info.tcpi_rtt; break;
1502 case 1: smp->data.u.sint = info.tcpi_rttvar; break;
1503 #if defined(__linux__)
1504 /* these ones are common to all Linux versions */
1505 case 2: smp->data.u.sint = info.tcpi_unacked; break;
1506 case 3: smp->data.u.sint = info.tcpi_sacked; break;
1507 case 4: smp->data.u.sint = info.tcpi_lost; break;
1508 case 5: smp->data.u.sint = info.tcpi_retrans; break;
1509 case 6: smp->data.u.sint = info.tcpi_fackets; break;
1510 case 7: smp->data.u.sint = info.tcpi_reordering; break;
1511 #elif defined(__FreeBSD__) || defined(__NetBSD__)
1512 /* the ones are found on FreeBSD and NetBSD featuring TCP_INFO */
1513 case 2: smp->data.u.sint = info.__tcpi_unacked; break;
1514 case 3: smp->data.u.sint = info.__tcpi_sacked; break;
1515 case 4: smp->data.u.sint = info.__tcpi_lost; break;
1516 case 5: smp->data.u.sint = info.__tcpi_retrans; break;
1517 case 6: smp->data.u.sint = info.__tcpi_fackets; break;
1518 case 7: smp->data.u.sint = info.__tcpi_reordering; break;
1519 #endif
1520 default: return 0;
1521 }
1522
1523 return 1;
1524 }
1525
1526 /* get the mean rtt of a client connexion */
1527 static int
smp_fetch_fc_rtt(const struct arg * args,struct sample * smp,const char * kw,void * private)1528 smp_fetch_fc_rtt(const struct arg *args, struct sample *smp, const char *kw, void *private)
1529 {
1530 if (!get_tcp_info(args, smp, 0, 0))
1531 return 0;
1532
1533 /* By default or if explicitly specified, convert rtt to ms */
1534 if (!args || args[0].type == ARGT_STOP || args[0].data.sint == TIME_UNIT_MS)
1535 smp->data.u.sint = (smp->data.u.sint + 500) / 1000;
1536
1537 return 1;
1538 }
1539
1540 /* get the variance of the mean rtt of a client connexion */
1541 static int
smp_fetch_fc_rttvar(const struct arg * args,struct sample * smp,const char * kw,void * private)1542 smp_fetch_fc_rttvar(const struct arg *args, struct sample *smp, const char *kw, void *private)
1543 {
1544 if (!get_tcp_info(args, smp, 0, 1))
1545 return 0;
1546
1547 /* By default or if explicitly specified, convert rttvar to ms */
1548 if (!args || args[0].type == ARGT_STOP || args[0].data.sint == TIME_UNIT_MS)
1549 smp->data.u.sint = (smp->data.u.sint + 500) / 1000;
1550
1551 return 1;
1552 }
1553
1554 #if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__)
1555
1556 /* get the unacked counter on a client connexion */
1557 static int
smp_fetch_fc_unacked(const struct arg * args,struct sample * smp,const char * kw,void * private)1558 smp_fetch_fc_unacked(const struct arg *args, struct sample *smp, const char *kw, void *private)
1559 {
1560 if (!get_tcp_info(args, smp, 0, 2))
1561 return 0;
1562 return 1;
1563 }
1564
1565 /* get the sacked counter on a client connexion */
1566 static int
smp_fetch_fc_sacked(const struct arg * args,struct sample * smp,const char * kw,void * private)1567 smp_fetch_fc_sacked(const struct arg *args, struct sample *smp, const char *kw, void *private)
1568 {
1569 if (!get_tcp_info(args, smp, 0, 3))
1570 return 0;
1571 return 1;
1572 }
1573
1574 /* get the lost counter on a client connexion */
1575 static int
smp_fetch_fc_lost(const struct arg * args,struct sample * smp,const char * kw,void * private)1576 smp_fetch_fc_lost(const struct arg *args, struct sample *smp, const char *kw, void *private)
1577 {
1578 if (!get_tcp_info(args, smp, 0, 4))
1579 return 0;
1580 return 1;
1581 }
1582
1583 /* get the retrans counter on a client connexion */
1584 static int
smp_fetch_fc_retrans(const struct arg * args,struct sample * smp,const char * kw,void * private)1585 smp_fetch_fc_retrans(const struct arg *args, struct sample *smp, const char *kw, void *private)
1586 {
1587 if (!get_tcp_info(args, smp, 0, 5))
1588 return 0;
1589 return 1;
1590 }
1591
1592 /* get the fackets counter on a client connexion */
1593 static int
smp_fetch_fc_fackets(const struct arg * args,struct sample * smp,const char * kw,void * private)1594 smp_fetch_fc_fackets(const struct arg *args, struct sample *smp, const char *kw, void *private)
1595 {
1596 if (!get_tcp_info(args, smp, 0, 6))
1597 return 0;
1598 return 1;
1599 }
1600
1601 /* get the reordering counter on a client connexion */
1602 static int
smp_fetch_fc_reordering(const struct arg * args,struct sample * smp,const char * kw,void * private)1603 smp_fetch_fc_reordering(const struct arg *args, struct sample *smp, const char *kw, void *private)
1604 {
1605 if (!get_tcp_info(args, smp, 0, 7))
1606 return 0;
1607 return 1;
1608 }
1609 #endif // linux || freebsd || netbsd
1610 #endif // TCP_INFO
1611
1612 #ifdef IPV6_V6ONLY
1613 /* parse the "v4v6" bind keyword */
bind_parse_v4v6(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1614 static int bind_parse_v4v6(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1615 {
1616 struct listener *l;
1617
1618 list_for_each_entry(l, &conf->listeners, by_bind) {
1619 if (l->addr.ss_family == AF_INET6)
1620 l->options |= LI_O_V4V6;
1621 }
1622
1623 return 0;
1624 }
1625
1626 /* parse the "v6only" bind keyword */
bind_parse_v6only(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1627 static int bind_parse_v6only(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1628 {
1629 struct listener *l;
1630
1631 list_for_each_entry(l, &conf->listeners, by_bind) {
1632 if (l->addr.ss_family == AF_INET6)
1633 l->options |= LI_O_V6ONLY;
1634 }
1635
1636 return 0;
1637 }
1638 #endif
1639
1640 #ifdef CONFIG_HAP_TRANSPARENT
1641 /* parse the "transparent" bind keyword */
bind_parse_transparent(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1642 static int bind_parse_transparent(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1643 {
1644 struct listener *l;
1645
1646 list_for_each_entry(l, &conf->listeners, by_bind) {
1647 if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1648 l->options |= LI_O_FOREIGN;
1649 }
1650
1651 return 0;
1652 }
1653 #endif
1654
1655 #ifdef TCP_DEFER_ACCEPT
1656 /* parse the "defer-accept" bind keyword */
bind_parse_defer_accept(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1657 static int bind_parse_defer_accept(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1658 {
1659 struct listener *l;
1660
1661 list_for_each_entry(l, &conf->listeners, by_bind) {
1662 if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1663 l->options |= LI_O_DEF_ACCEPT;
1664 }
1665
1666 return 0;
1667 }
1668 #endif
1669
1670 #ifdef TCP_FASTOPEN
1671 /* parse the "tfo" bind keyword */
bind_parse_tfo(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1672 static int bind_parse_tfo(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1673 {
1674 struct listener *l;
1675
1676 list_for_each_entry(l, &conf->listeners, by_bind) {
1677 if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1678 l->options |= LI_O_TCP_FO;
1679 }
1680
1681 return 0;
1682 }
1683 #endif
1684
1685 #ifdef TCP_MAXSEG
1686 /* parse the "mss" bind keyword */
bind_parse_mss(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1687 static int bind_parse_mss(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1688 {
1689 struct listener *l;
1690 int mss;
1691
1692 if (!*args[cur_arg + 1]) {
1693 memprintf(err, "'%s' : missing MSS value", args[cur_arg]);
1694 return ERR_ALERT | ERR_FATAL;
1695 }
1696
1697 mss = atoi(args[cur_arg + 1]);
1698 if (!mss || abs(mss) > 65535) {
1699 memprintf(err, "'%s' : expects an MSS with and absolute value between 1 and 65535", args[cur_arg]);
1700 return ERR_ALERT | ERR_FATAL;
1701 }
1702
1703 list_for_each_entry(l, &conf->listeners, by_bind) {
1704 if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1705 l->maxseg = mss;
1706 }
1707
1708 return 0;
1709 }
1710 #endif
1711
1712 #ifdef TCP_USER_TIMEOUT
1713 /* parse the "tcp-ut" bind keyword */
bind_parse_tcp_ut(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1714 static int bind_parse_tcp_ut(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1715 {
1716 const char *ptr = NULL;
1717 struct listener *l;
1718 unsigned int timeout;
1719
1720 if (!*args[cur_arg + 1]) {
1721 memprintf(err, "'%s' : missing TCP User Timeout value", args[cur_arg]);
1722 return ERR_ALERT | ERR_FATAL;
1723 }
1724
1725 ptr = parse_time_err(args[cur_arg + 1], &timeout, TIME_UNIT_MS);
1726 if (ptr) {
1727 memprintf(err, "'%s' : expects a positive delay in milliseconds", args[cur_arg]);
1728 return ERR_ALERT | ERR_FATAL;
1729 }
1730
1731 list_for_each_entry(l, &conf->listeners, by_bind) {
1732 if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1733 l->tcp_ut = timeout;
1734 }
1735
1736 return 0;
1737 }
1738 #endif
1739
1740 #ifdef SO_BINDTODEVICE
1741 /* parse the "interface" bind keyword */
bind_parse_interface(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1742 static int bind_parse_interface(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1743 {
1744 struct listener *l;
1745
1746 if (!*args[cur_arg + 1]) {
1747 memprintf(err, "'%s' : missing interface name", args[cur_arg]);
1748 return ERR_ALERT | ERR_FATAL;
1749 }
1750
1751 list_for_each_entry(l, &conf->listeners, by_bind) {
1752 if (l->addr.ss_family == AF_INET || l->addr.ss_family == AF_INET6)
1753 l->interface = strdup(args[cur_arg + 1]);
1754 }
1755
1756 return 0;
1757 }
1758 #endif
1759
1760 #ifdef CONFIG_HAP_NS
1761 /* parse the "namespace" bind keyword */
bind_parse_namespace(char ** args,int cur_arg,struct proxy * px,struct bind_conf * conf,char ** err)1762 static int bind_parse_namespace(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
1763 {
1764 struct listener *l;
1765 char *namespace = NULL;
1766
1767 if (!*args[cur_arg + 1]) {
1768 memprintf(err, "'%s' : missing namespace id", args[cur_arg]);
1769 return ERR_ALERT | ERR_FATAL;
1770 }
1771 namespace = args[cur_arg + 1];
1772
1773 list_for_each_entry(l, &conf->listeners, by_bind) {
1774 l->netns = netns_store_lookup(namespace, strlen(namespace));
1775
1776 if (l->netns == NULL)
1777 l->netns = netns_store_insert(namespace);
1778
1779 if (l->netns == NULL) {
1780 Alert("Cannot open namespace '%s'.\n", args[cur_arg + 1]);
1781 return ERR_ALERT | ERR_FATAL;
1782 }
1783 }
1784 return 0;
1785 }
1786 #endif
1787
1788 #ifdef TCP_USER_TIMEOUT
1789 /* parse the "tcp-ut" server keyword */
srv_parse_tcp_ut(char ** args,int * cur_arg,struct proxy * px,struct server * newsrv,char ** err)1790 static int srv_parse_tcp_ut(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
1791 {
1792 const char *ptr = NULL;
1793 unsigned int timeout;
1794
1795 if (!*args[*cur_arg + 1]) {
1796 memprintf(err, "'%s' : missing TCP User Timeout value", args[*cur_arg]);
1797 return ERR_ALERT | ERR_FATAL;
1798 }
1799
1800 ptr = parse_time_err(args[*cur_arg + 1], &timeout, TIME_UNIT_MS);
1801 if (ptr) {
1802 memprintf(err, "'%s' : expects a positive delay in milliseconds", args[*cur_arg]);
1803 return ERR_ALERT | ERR_FATAL;
1804 }
1805
1806 if (newsrv->addr.ss_family == AF_INET || newsrv->addr.ss_family == AF_INET6)
1807 newsrv->tcp_ut = timeout;
1808
1809 return 0;
1810 }
1811 #endif
1812
1813
1814 /* Note: must not be declared <const> as its list will be overwritten.
1815 * Note: fetches that may return multiple types must be declared as the lowest
1816 * common denominator, the type that can be casted into all other ones. For
1817 * instance v4/v6 must be declared v4.
1818 */
1819 static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
1820 { "dst", smp_fetch_dst, 0, NULL, SMP_T_IPV4, SMP_USE_L4CLI },
1821 { "dst_is_local", smp_fetch_dst_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
1822 { "dst_port", smp_fetch_dport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
1823 { "src", smp_fetch_src, 0, NULL, SMP_T_IPV4, SMP_USE_L4CLI },
1824 { "src_is_local", smp_fetch_src_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
1825 { "src_port", smp_fetch_sport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
1826 #ifdef TCP_INFO
1827 { "fc_rtt", smp_fetch_fc_rtt, ARG1(0,STR), val_fc_time_value, SMP_T_SINT, SMP_USE_L4CLI },
1828 { "fc_rttvar", smp_fetch_fc_rttvar, ARG1(0,STR), val_fc_time_value, SMP_T_SINT, SMP_USE_L4CLI },
1829 #if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__)
1830 { "fc_unacked", smp_fetch_fc_unacked, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1831 { "fc_sacked", smp_fetch_fc_sacked, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1832 { "fc_retrans", smp_fetch_fc_retrans, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1833 { "fc_fackets", smp_fetch_fc_fackets, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1834 { "fc_lost", smp_fetch_fc_lost, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1835 { "fc_reordering", smp_fetch_fc_reordering, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
1836 #endif // linux || freebsd || netbsd
1837 #endif // TCP_INFO
1838 { /* END */ },
1839 }};
1840
1841 /************************************************************************/
1842 /* All supported bind keywords must be declared here. */
1843 /************************************************************************/
1844
1845 /* Note: must not be declared <const> as its list will be overwritten.
1846 * Please take care of keeping this list alphabetically sorted, doing so helps
1847 * all code contributors.
1848 * Optional keywords are also declared with a NULL ->parse() function so that
1849 * the config parser can report an appropriate error when a known keyword was
1850 * not enabled.
1851 */
1852 static struct bind_kw_list bind_kws = { "TCP", { }, {
1853 #ifdef TCP_DEFER_ACCEPT
1854 { "defer-accept", bind_parse_defer_accept, 0 }, /* wait for some data for 1 second max before doing accept */
1855 #endif
1856 #ifdef SO_BINDTODEVICE
1857 { "interface", bind_parse_interface, 1 }, /* specifically bind to this interface */
1858 #endif
1859 #ifdef TCP_MAXSEG
1860 { "mss", bind_parse_mss, 1 }, /* set MSS of listening socket */
1861 #endif
1862 #ifdef TCP_USER_TIMEOUT
1863 { "tcp-ut", bind_parse_tcp_ut, 1 }, /* set User Timeout on listening socket */
1864 #endif
1865 #ifdef TCP_FASTOPEN
1866 { "tfo", bind_parse_tfo, 0 }, /* enable TCP_FASTOPEN of listening socket */
1867 #endif
1868 #ifdef CONFIG_HAP_TRANSPARENT
1869 { "transparent", bind_parse_transparent, 0 }, /* transparently bind to the specified addresses */
1870 #endif
1871 #ifdef IPV6_V6ONLY
1872 { "v4v6", bind_parse_v4v6, 0 }, /* force socket to bind to IPv4+IPv6 */
1873 { "v6only", bind_parse_v6only, 0 }, /* force socket to bind to IPv6 only */
1874 #endif
1875 #ifdef CONFIG_HAP_NS
1876 { "namespace", bind_parse_namespace, 1 },
1877 #endif
1878 /* the versions with the NULL parse function*/
1879 { "defer-accept", NULL, 0 },
1880 { "interface", NULL, 1 },
1881 { "mss", NULL, 1 },
1882 { "transparent", NULL, 0 },
1883 { "v4v6", NULL, 0 },
1884 { "v6only", NULL, 0 },
1885 { NULL, NULL, 0 },
1886 }};
1887
1888 static struct srv_kw_list srv_kws = { "TCP", { }, {
1889 #ifdef TCP_USER_TIMEOUT
1890 { "tcp-ut", srv_parse_tcp_ut, 1, 0 }, /* set TCP user timeout on server */
1891 #endif
1892 { NULL, NULL, 0 },
1893 }};
1894
1895 static struct action_kw_list tcp_req_conn_actions = {ILH, {
1896 { "silent-drop", tcp_parse_silent_drop },
1897 { "set-src", tcp_parse_set_src_dst },
1898 { "set-src-port", tcp_parse_set_src_dst },
1899 { "set-dst" , tcp_parse_set_src_dst },
1900 { "set-dst-port", tcp_parse_set_src_dst },
1901 { /* END */ }
1902 }};
1903
1904 static struct action_kw_list tcp_req_sess_actions = {ILH, {
1905 { "silent-drop", tcp_parse_silent_drop },
1906 { "set-src", tcp_parse_set_src_dst },
1907 { "set-src-port", tcp_parse_set_src_dst },
1908 { "set-dst" , tcp_parse_set_src_dst },
1909 { "set-dst-port", tcp_parse_set_src_dst },
1910 { /* END */ }
1911 }};
1912
1913 static struct action_kw_list tcp_req_cont_actions = {ILH, {
1914 { "silent-drop", tcp_parse_silent_drop },
1915 { /* END */ }
1916 }};
1917
1918 static struct action_kw_list tcp_res_cont_actions = {ILH, {
1919 { "silent-drop", tcp_parse_silent_drop },
1920 { /* END */ }
1921 }};
1922
1923 static struct action_kw_list http_req_actions = {ILH, {
1924 { "silent-drop", tcp_parse_silent_drop },
1925 { "set-src", tcp_parse_set_src_dst },
1926 { "set-src-port", tcp_parse_set_src_dst },
1927 { "set-dst", tcp_parse_set_src_dst },
1928 { "set-dst-port", tcp_parse_set_src_dst },
1929 { /* END */ }
1930 }};
1931
1932 static struct action_kw_list http_res_actions = {ILH, {
1933 { "silent-drop", tcp_parse_silent_drop },
1934 { /* END */ }
1935 }};
1936
1937
1938 __attribute__((constructor))
__tcp_protocol_init(void)1939 static void __tcp_protocol_init(void)
1940 {
1941 protocol_register(&proto_tcpv4);
1942 protocol_register(&proto_tcpv6);
1943 sample_register_fetches(&sample_fetch_keywords);
1944 bind_register_keywords(&bind_kws);
1945 srv_register_keywords(&srv_kws);
1946 tcp_req_conn_keywords_register(&tcp_req_conn_actions);
1947 tcp_req_sess_keywords_register(&tcp_req_sess_actions);
1948 tcp_req_cont_keywords_register(&tcp_req_cont_actions);
1949 tcp_res_cont_keywords_register(&tcp_res_cont_actions);
1950 http_req_keywords_register(&http_req_actions);
1951 http_res_keywords_register(&http_res_actions);
1952 }
1953
1954
1955 /*
1956 * Local variables:
1957 * c-indent-level: 8
1958 * c-basic-offset: 8
1959 * End:
1960 */
1961