1 /*
2  * PgBouncer - Lightweight connection pooler for PostgreSQL.
3  *
4  * Copyright (c) 2007-2009  Marko Kreen, Skype Technologies OÜ
5  *
6  * Permission to use, copy, modify, and/or distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 /*
20  * core structures
21  */
22 
23 #include "system.h"
24 
25 #include <usual/cfparser.h>
26 #include <usual/time.h>
27 #include <usual/list.h>
28 #include <usual/statlist.h>
29 #include <usual/string.h>
30 #include <usual/logging.h>
31 #include <usual/aatree.h>
32 #include <usual/hashing/lookup3.h>
33 #include <usual/slab.h>
34 #include <usual/socket.h>
35 #include <usual/safeio.h>
36 #include <usual/mbuf.h>
37 #include <usual/strpool.h>
38 
39 #include <event2/event.h>
40 #include <event2/event_struct.h>
41 
42 #ifdef USE_SYSTEMD
43 #include <systemd/sd-daemon.h>
44 #else
45 #define SD_LISTEN_FDS_START 3
46 #define sd_is_socket(fd, f, t, l) (0)
47 #define sd_listen_fds(ue) (0)
48 #define sd_notify(ue, s)
49 #define sd_notifyf(ue, f, ...)
50 #endif
51 
52 
53 /* global libevent handle */
54 extern struct event_base *pgb_event_base;
55 
56 
57 /* each state corresponds to a list */
58 enum SocketState {
59 	CL_FREE,		/* free_client_list */
60 	CL_JUSTFREE,		/* justfree_client_list */
61 	CL_LOGIN,		/* login_client_list */
62 	CL_WAITING,		/* pool->waiting_client_list */
63 	CL_WAITING_LOGIN,	/*   - but return to CL_LOGIN instead of CL_ACTIVE */
64 	CL_ACTIVE,		/* pool->active_client_list */
65 	CL_CANCEL,		/* pool->cancel_req_list */
66 
67 	SV_FREE,		/* free_server_list */
68 	SV_JUSTFREE,		/* justfree_server_list */
69 	SV_LOGIN,		/* pool->new_server_list */
70 	SV_IDLE,		/* pool->idle_server_list */
71 	SV_ACTIVE,		/* pool->active_server_list */
72 	SV_USED,		/* pool->used_server_list */
73 	SV_TESTED		/* pool->tested_server_list */
74 };
75 
76 enum PauseMode {
77 	P_NONE = 0,		/* active pooling */
78 	P_PAUSE = 1,		/* wait for client to finish work */
79 	P_SUSPEND = 2		/* wait for buffers to be empty */
80 };
81 
82 enum SSLMode {
83 	SSLMODE_DISABLED,
84 	SSLMODE_ALLOW,
85 	SSLMODE_PREFER,
86 	SSLMODE_REQUIRE,
87 	SSLMODE_VERIFY_CA,
88 	SSLMODE_VERIFY_FULL
89 };
90 
91 #define is_server_socket(sk) ((sk)->state >= SV_FREE)
92 
93 
94 typedef struct PgSocket PgSocket;
95 typedef struct PgUser PgUser;
96 typedef struct PgDatabase PgDatabase;
97 typedef struct PgPool PgPool;
98 typedef struct PgStats PgStats;
99 typedef union PgAddr PgAddr;
100 typedef enum SocketState SocketState;
101 typedef struct PktHdr PktHdr;
102 typedef struct ScramState ScramState;
103 
104 extern int cf_sbuf_len;
105 
106 #include "util.h"
107 #include "iobuf.h"
108 #include "sbuf.h"
109 #include "pktbuf.h"
110 #include "varcache.h"
111 #include "dnslookup.h"
112 
113 #include "admin.h"
114 #include "loader.h"
115 #include "client.h"
116 #include "server.h"
117 #include "pooler.h"
118 #include "proto.h"
119 #include "objects.h"
120 #include "stats.h"
121 #include "takeover.h"
122 #include "janitor.h"
123 #include "hba.h"
124 #include "pam.h"
125 
126 #ifndef WIN32
127 #define DEFAULT_UNIX_SOCKET_DIR "/tmp"
128 #else
129 #define DEFAULT_UNIX_SOCKET_DIR ""
130 #endif
131 
132 /*
133  * To avoid allocations, we use static buffers.
134  *
135  * Note that a trailing zero byte is used in each case, so the actual
136  * usable length is one less.
137  */
138 
139 /* matching NAMEDATALEN */
140 #define MAX_DBNAME	64
141 
142 /*
143  * Ought to match NAMEDATALEN.  Some cloud services use longer user
144  * names, so give it some extra room.
145  */
146 #define MAX_USERNAME	128
147 
148 /*
149  * Some cloud services use very long generated passwords, so give it
150  * plenty of room.  Up to PostgreSQL 13, the server can handle
151  * passwords up to 996 bytes, after that it's longer.  Also, libpq
152  * maxes out around 1024, so going much higher is not straightforward.
153  */
154 #define MAX_PASSWORD	996
155 
156 /*
157  * AUTH_* symbols are used for both protocol handling and
158  * configuration settings (auth_type, hba).  Some are only applicable
159  * to one or the other.
160  */
161 
162 /* no-auth modes */
163 #define AUTH_ANY	-1 /* same as trust but without username check */
164 #define AUTH_TRUST	AUTH_OK
165 
166 /* protocol codes in Authentication* 'R' messages from server */
167 #define AUTH_OK		0
168 #define AUTH_KRB4	1	/* not supported */
169 #define AUTH_KRB5	2	/* not supported */
170 #define AUTH_PLAIN	3
171 #define AUTH_CRYPT	4	/* not supported */
172 #define AUTH_MD5	5
173 #define AUTH_SCM_CREDS	6	/* not supported */
174 #define AUTH_GSS	7	/* not supported */
175 #define AUTH_GSS_CONT	8	/* not supported */
176 #define AUTH_SSPI	9	/* not supported */
177 #define AUTH_SASL	10
178 #define AUTH_SASL_CONT	11
179 #define AUTH_SASL_FIN	12
180 
181 /* internal codes */
182 #define AUTH_CERT	107
183 #define AUTH_PEER	108
184 #define AUTH_HBA	109
185 #define AUTH_REJECT	110
186 #define AUTH_PAM	111
187 #define AUTH_SCRAM_SHA_256	112
188 
189 /* type codes for weird pkts */
190 #define PKT_STARTUP_V2  0x20000
191 #define PKT_STARTUP     0x30000
192 #define PKT_CANCEL      80877102
193 #define PKT_SSLREQ      80877103
194 #define PKT_GSSENCREQ   80877104
195 
196 #define POOL_SESSION	0
197 #define POOL_TX		1
198 #define POOL_STMT	2
199 #define POOL_INHERIT	3
200 
201 #define BACKENDKEY_LEN	8
202 
203 /* buffer size for startup noise */
204 #define STARTUP_BUF	1024
205 
206 
207 /*
208  * Remote/local address
209  */
210 
211 /* buffer for pgaddr string conversions (with port) */
212 #define PGADDR_BUF  (INET6_ADDRSTRLEN + 10)
213 
214 struct sockaddr_ucreds {
215 	struct sockaddr_in sin;
216 	uid_t uid;
217 	pid_t pid;
218 };
219 
220 /*
221  * AF_INET,AF_INET6 are stored as-is,
222  * AF_UNIX uses sockaddr_in port + uid/pid.
223  */
224 union PgAddr {
225 	struct sockaddr sa;
226 	struct sockaddr_in sin;
227 	struct sockaddr_in6 sin6;
228 	struct sockaddr_ucreds scred;
229 };
230 
pga_family(const PgAddr * a)231 static inline unsigned int pga_family(const PgAddr *a) { return a->sa.sa_family; }
pga_is_unix(const PgAddr * a)232 static inline bool pga_is_unix(const PgAddr *a) { return a->sa.sa_family == AF_UNIX; }
233 
234 int pga_port(const PgAddr *a);
235 void pga_set(PgAddr *a, int fam, int port);
236 void pga_copy(PgAddr *a, const struct sockaddr *sa);
237 bool pga_pton(PgAddr *a, const char *s, int port);
238 const char *pga_ntop(const PgAddr *a, char *dst, int dstlen);
239 const char *pga_str(const PgAddr *a, char *dst, int dstlen);
240 const char *pga_details(const PgAddr *a, char *dst, int dstlen);
241 int pga_cmp_addr(const PgAddr *a, const PgAddr *b);
242 
243 /*
244  * Stats, kept per-pool.
245  */
246 struct PgStats {
247 	uint64_t xact_count;
248 	uint64_t query_count;
249 	uint64_t server_bytes;
250 	uint64_t client_bytes;
251 	usec_t xact_time;	/* total transaction time in us */
252 	usec_t query_time;	/* total query time in us */
253 	usec_t wait_time;	/* total time clients had to wait */
254 };
255 
256 /*
257  * Contains connections for one db+user pair.
258  *
259  * Stats:
260  *   ->stats is updated online.
261  *   for each stats_period:
262  *   ->older_stats = ->newer_stats
263  *   ->newer_stats = ->stats
264  */
265 struct PgPool {
266 	struct List head;			/* entry in global pool_list */
267 	struct List map_head;			/* entry in user->pool_list */
268 
269 	PgDatabase *db;			/* corresponding database */
270 	PgUser *user;			/* user logged in as */
271 
272 	struct StatList active_client_list;	/* waiting events logged in clients */
273 	struct StatList waiting_client_list;	/* client waits for a server to be available */
274 	struct StatList cancel_req_list;	/* closed client connections with server key */
275 
276 	struct StatList active_server_list;	/* servers linked with clients */
277 	struct StatList idle_server_list;	/* servers ready to be linked with clients */
278 	struct StatList used_server_list;	/* server just unlinked from clients */
279 	struct StatList tested_server_list;	/* server in testing process */
280 	struct StatList new_server_list;	/* servers in login phase */
281 
282 	PgStats stats;
283 	PgStats newer_stats;
284 	PgStats older_stats;
285 
286 	/* database info to be sent to client */
287 	struct PktBuf *welcome_msg; /* ServerParams without VarCache ones */
288 
289 	VarCache orig_vars;		/* default params from server */
290 
291 	usec_t last_lifetime_disconnect;/* last time when server_lifetime was applied */
292 
293 	/* if last connect to server failed, there should be delay before next */
294 	usec_t last_connect_time;
295 	bool last_connect_failed:1;
296 	bool last_login_failed:1;
297 
298 	bool welcome_msg_ready:1;
299 };
300 
301 #define pool_connected_server_count(pool) ( \
302 		statlist_count(&(pool)->active_server_list) + \
303 		statlist_count(&(pool)->idle_server_list) + \
304 		statlist_count(&(pool)->tested_server_list) + \
305 		statlist_count(&(pool)->used_server_list))
306 
307 #define pool_server_count(pool) ( \
308 		pool_connected_server_count(pool) + \
309 		statlist_count(&(pool)->new_server_list))
310 
311 #define pool_client_count(pool) ( \
312 		statlist_count(&(pool)->active_client_list) + \
313 		statlist_count(&(pool)->waiting_client_list))
314 
315 /*
316  * A user in login db.
317  *
318  * FIXME: remove ->head as ->tree_node should be enough.
319  *
320  * For databases where remote user is forced, the pool is:
321  * first(db->forced_user->pool_list), where pool_list has only one entry.
322  *
323  * Otherwise, ->pool_list contains multiple pools, for all PgDatabases
324  * which user has logged in.
325  */
326 struct PgUser {
327 	struct List head;		/* used to attach user to list */
328 	struct List pool_list;		/* list of pools where pool->user == this user */
329 	struct AANode tree_node;	/* used to attach user to tree */
330 	char name[MAX_USERNAME];
331 	char passwd[MAX_PASSWORD];
332 	uint8_t scram_ClientKey[32];
333 	uint8_t scram_ServerKey[32];
334 	bool has_scram_keys;		/* true if the above two are valid */
335 	bool mock_auth;			/* not a real user, only for mock auth */
336 	int pool_mode;
337 	int max_user_connections;	/* how much server connections are allowed */
338 	int connection_count;	/* how much connections are used by user now */
339 };
340 
341 /*
342  * A database entry from config.
343  */
344 struct PgDatabase {
345 	struct List head;
346 	char name[MAX_DBNAME];	/* db name for clients */
347 
348 	bool db_paused;		/* PAUSE <db>; was issued */
349 	bool db_wait_close;	/* WAIT_CLOSE was issued for this database */
350 	bool db_dead;		/* used on RELOAD/SIGHUP to later detect removed dbs */
351 	bool db_auto;		/* is the database auto-created by autodb_connstr */
352 	bool db_disabled;	/* is the database accepting new connections? */
353 	bool admin;		/* internal console db */
354 
355 	struct PktBuf *startup_params; /* partial StartupMessage (without user) be sent to server */
356 
357 	PgUser *forced_user;	/* if not NULL, the user/psw is forced */
358 	PgUser *auth_user;	/* if not NULL, users not in userlist.txt will be looked up on the server */
359 
360 	char *host;		/* host or unix socket name */
361 	int port;
362 
363 	int pool_size;		/* max server connections in one pool */
364 	int min_pool_size;	/* min server connections in one pool */
365 	int res_pool_size;	/* additional server connections in case of trouble */
366 	int pool_mode;		/* pool mode for this database */
367 	int max_db_connections;	/* max server connections between all pools */
368 
369 	const char *dbname;	/* server-side name, pointer to inside startup_msg */
370 
371 	/* startup commands to send to server after connect. malloc-ed */
372 	char *connect_query;
373 
374 	usec_t inactive_time;	/* when auto-database became inactive (to kill it after timeout) */
375 	unsigned active_stamp;	/* set if autodb has connections */
376 
377 	int connection_count;	/* total connections for this database in all pools */
378 
379 	struct AATree user_tree;	/* users that have been queried on this database */
380 };
381 
382 
383 /*
384  * A client or server connection.
385  *
386  * ->state corresponds to various lists the struct can be at.
387  */
388 struct PgSocket {
389 	struct List head;		/* list header */
390 	PgSocket *link;		/* the dest of packets */
391 	PgPool *pool;		/* parent pool, if NULL not yet assigned */
392 
393 	PgUser *login_user;	/* presented login, for client it may differ from pool->user */
394 
395 	int client_auth_type;	/* auth method decided by hba */
396 
397 	SocketState state:8;	/* this also specifies socket location */
398 
399 	bool ready:1;		/* server: accepts new query */
400 	bool idle_tx:1;		/* server: idling in tx */
401 	bool close_needed:1;	/* server: this socket must be closed ASAP */
402 	bool setting_vars:1;	/* server: setting client vars */
403 	bool exec_on_connect:1;	/* server: executing connect_query */
404 	bool resetting:1;	/* server: executing reset query from auth login; don't release on flush */
405 	bool copy_mode:1;	/* server: in copy stream, ignores any Sync packets */
406 
407 	bool wait_for_welcome:1;/* client: no server yet in pool, cannot send welcome msg */
408 	bool wait_for_user_conn:1;/* client: waiting for auth_conn server connection */
409 	bool wait_for_user:1;	/* client: waiting for auth_conn query results */
410 	bool wait_for_auth:1;	/* client: waiting for external auth (PAM) to be completed */
411 
412 	bool suspended:1;	/* client/server: if the socket is suspended */
413 
414 	bool admin_user:1;	/* console client: has admin rights */
415 	bool own_user:1;	/* console client: client with same uid on unix socket */
416 	bool wait_for_response:1;/* console client: waits for completion of PAUSE/SUSPEND cmd */
417 
418 	bool wait_sslchar:1;	/* server: waiting for ssl response: S/N */
419 
420 	int expect_rfq_count;	/* client: count of ReadyForQuery packets client should see */
421 
422 	usec_t connect_time;	/* when connection was made */
423 	usec_t request_time;	/* last activity time */
424 	usec_t query_start;	/* query start moment */
425 	usec_t xact_start;	/* xact start moment */
426 	usec_t wait_start;	/* waiting start moment */
427 
428 	uint8_t cancel_key[BACKENDKEY_LEN]; /* client: generated, server: remote */
429 	PgAddr remote_addr;	/* ip:port for remote endpoint */
430 	PgAddr local_addr;	/* ip:port for local endpoint */
431 
432 	union {
433 		struct DNSToken *dns_token;	/* ongoing request */
434 		PgDatabase *db;			/* cache db while doing auth query */
435 	};
436 
437 	struct ScramState {
438 		char *client_nonce;
439 		char *client_first_message_bare;
440 		char *client_final_message_without_proof;
441 		char *server_nonce;
442 		char *server_first_message;
443 		uint8_t	*SaltedPassword;
444 		char cbind_flag;
445 		bool adhoc;	/* SCRAM data made up from plain-text password */
446 		int iterations;
447 		char *salt;	/* base64-encoded */
448 		uint8_t ClientKey[32];	/* SHA256_DIGEST_LENGTH */
449 		uint8_t StoredKey[32];
450 		uint8_t ServerKey[32];
451 	} scram_state;
452 
453 	VarCache vars;		/* state of interesting server parameters */
454 
455 	SBuf sbuf;		/* stream buffer, must be last */
456 };
457 
458 #define RAW_IOBUF_SIZE	offsetof(IOBuf, buf)
459 #define IOBUF_SIZE	(RAW_IOBUF_SIZE + cf_sbuf_len)
460 
461 /* where to store old fd info during SHOW FDS result processing */
462 #define tmp_sk_oldfd	request_time
463 #define tmp_sk_linkfd	query_start
464 /* takeover_clean_socket() needs to clean those up */
465 
466 /* where the salt is temporarily stored */
467 #define tmp_login_salt  cancel_key
468 
469 /* main.c */
470 extern int cf_daemon;
471 
472 extern char *cf_config_file;
473 extern char *cf_jobname;
474 
475 extern char *cf_unix_socket_dir;
476 extern int cf_unix_socket_mode;
477 extern char *cf_unix_socket_group;
478 extern char *cf_listen_addr;
479 extern int cf_listen_port;
480 extern int cf_listen_backlog;
481 
482 extern int cf_pool_mode;
483 extern int cf_max_client_conn;
484 extern int cf_default_pool_size;
485 extern int cf_min_pool_size;
486 extern int cf_res_pool_size;
487 extern usec_t cf_res_pool_timeout;
488 extern int cf_max_db_connections;
489 extern int cf_max_user_connections;
490 
491 extern char * cf_autodb_connstr;
492 extern usec_t cf_autodb_idle_timeout;
493 
494 extern usec_t cf_suspend_timeout;
495 extern usec_t cf_server_lifetime;
496 extern usec_t cf_server_idle_timeout;
497 extern char * cf_server_reset_query;
498 extern int cf_server_reset_query_always;
499 extern char * cf_server_check_query;
500 extern usec_t cf_server_check_delay;
501 extern int cf_server_fast_close;
502 extern usec_t cf_server_connect_timeout;
503 extern usec_t cf_server_login_retry;
504 extern usec_t cf_query_timeout;
505 extern usec_t cf_query_wait_timeout;
506 extern usec_t cf_client_idle_timeout;
507 extern usec_t cf_client_login_timeout;
508 extern usec_t cf_idle_transaction_timeout;
509 extern int cf_server_round_robin;
510 extern int cf_disable_pqexec;
511 extern usec_t cf_dns_max_ttl;
512 extern usec_t cf_dns_nxdomain_ttl;
513 extern usec_t cf_dns_zone_check_period;
514 extern char *cf_resolv_conf;
515 
516 extern int cf_auth_type;
517 extern char *cf_auth_file;
518 extern char *cf_auth_query;
519 extern char *cf_auth_user;
520 extern char *cf_auth_hba_file;
521 
522 extern char *cf_pidfile;
523 
524 extern char *cf_ignore_startup_params;
525 
526 extern char *cf_admin_users;
527 extern char *cf_stats_users;
528 extern int cf_stats_period;
529 extern int cf_log_stats;
530 
531 extern int cf_pause_mode;
532 extern int cf_shutdown;
533 extern int cf_reboot;
534 
535 extern unsigned int cf_max_packet_size;
536 
537 extern int cf_sbuf_loopcnt;
538 extern int cf_so_reuseport;
539 extern int cf_tcp_keepalive;
540 extern int cf_tcp_keepcnt;
541 extern int cf_tcp_keepidle;
542 extern int cf_tcp_keepintvl;
543 extern int cf_tcp_socket_buffer;
544 extern int cf_tcp_defer_accept;
545 extern int cf_tcp_user_timeout;
546 
547 extern int cf_log_connections;
548 extern int cf_log_disconnections;
549 extern int cf_log_pooler_errors;
550 extern int cf_application_name_add_host;
551 
552 extern int cf_client_tls_sslmode;
553 extern char *cf_client_tls_protocols;
554 extern char *cf_client_tls_ca_file;
555 extern char *cf_client_tls_cert_file;
556 extern char *cf_client_tls_key_file;
557 extern char *cf_client_tls_ciphers;
558 extern char *cf_client_tls_dheparams;
559 extern char *cf_client_tls_ecdhecurve;
560 
561 extern int cf_server_tls_sslmode;
562 extern char *cf_server_tls_protocols;
563 extern char *cf_server_tls_ca_file;
564 extern char *cf_server_tls_cert_file;
565 extern char *cf_server_tls_key_file;
566 extern char *cf_server_tls_ciphers;
567 
568 extern const struct CfLookup pool_mode_map[];
569 
570 extern usec_t g_suspend_start;
571 
572 extern struct DNSContext *adns;
573 extern struct HBA *parsed_hba;
574 
575 static inline PgSocket * _MUSTCHECK
pop_socket(struct StatList * slist)576 pop_socket(struct StatList *slist)
577 {
578 	struct List *item = statlist_pop(slist);
579 	if (item == NULL)
580 		return NULL;
581 	return container_of(item, PgSocket, head);
582 }
583 
584 static inline PgSocket *
first_socket(struct StatList * slist)585 first_socket(struct StatList *slist)
586 {
587 	if (statlist_empty(slist))
588 		return NULL;
589 	return container_of(slist->head.next, PgSocket, head);
590 }
591 
592 static inline PgSocket *
last_socket(struct StatList * slist)593 last_socket(struct StatList *slist)
594 {
595 	if (statlist_empty(slist))
596 		return NULL;
597 	return container_of(slist->head.prev, PgSocket, head);
598 }
599 
600 bool requires_auth_file(int);
601 void load_config(void);
602 
603 
604 bool set_config_param(const char *key, const char *val);
605 void config_for_each(void (*param_cb)(void *arg, const char *name, const char *val, const char *defval, bool reloadable),
606 		     void *arg);
607