1 /*
2 * PgBouncer - Lightweight connection pooler for PostgreSQL.
3 *
4 * Copyright (c) 2007-2009 Marko Kreen, Skype Technologies OÜ
5 *
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 /*
20 * core structures
21 */
22
23 #include "system.h"
24
25 #include <usual/cfparser.h>
26 #include <usual/time.h>
27 #include <usual/list.h>
28 #include <usual/statlist.h>
29 #include <usual/string.h>
30 #include <usual/logging.h>
31 #include <usual/aatree.h>
32 #include <usual/hashing/lookup3.h>
33 #include <usual/slab.h>
34 #include <usual/socket.h>
35 #include <usual/safeio.h>
36 #include <usual/mbuf.h>
37 #include <usual/strpool.h>
38
39 #include <event2/event.h>
40 #include <event2/event_struct.h>
41
42 #ifdef USE_SYSTEMD
43 #include <systemd/sd-daemon.h>
44 #else
45 #define SD_LISTEN_FDS_START 3
46 #define sd_is_socket(fd, f, t, l) (0)
47 #define sd_listen_fds(ue) (0)
48 #define sd_notify(ue, s)
49 #define sd_notifyf(ue, f, ...)
50 #endif
51
52
53 /* global libevent handle */
54 extern struct event_base *pgb_event_base;
55
56
57 /* each state corresponds to a list */
58 enum SocketState {
59 CL_FREE, /* free_client_list */
60 CL_JUSTFREE, /* justfree_client_list */
61 CL_LOGIN, /* login_client_list */
62 CL_WAITING, /* pool->waiting_client_list */
63 CL_WAITING_LOGIN, /* - but return to CL_LOGIN instead of CL_ACTIVE */
64 CL_ACTIVE, /* pool->active_client_list */
65 CL_CANCEL, /* pool->cancel_req_list */
66
67 SV_FREE, /* free_server_list */
68 SV_JUSTFREE, /* justfree_server_list */
69 SV_LOGIN, /* pool->new_server_list */
70 SV_IDLE, /* pool->idle_server_list */
71 SV_ACTIVE, /* pool->active_server_list */
72 SV_USED, /* pool->used_server_list */
73 SV_TESTED /* pool->tested_server_list */
74 };
75
76 enum PauseMode {
77 P_NONE = 0, /* active pooling */
78 P_PAUSE = 1, /* wait for client to finish work */
79 P_SUSPEND = 2 /* wait for buffers to be empty */
80 };
81
82 enum SSLMode {
83 SSLMODE_DISABLED,
84 SSLMODE_ALLOW,
85 SSLMODE_PREFER,
86 SSLMODE_REQUIRE,
87 SSLMODE_VERIFY_CA,
88 SSLMODE_VERIFY_FULL
89 };
90
91 #define is_server_socket(sk) ((sk)->state >= SV_FREE)
92
93
94 typedef struct PgSocket PgSocket;
95 typedef struct PgUser PgUser;
96 typedef struct PgDatabase PgDatabase;
97 typedef struct PgPool PgPool;
98 typedef struct PgStats PgStats;
99 typedef union PgAddr PgAddr;
100 typedef enum SocketState SocketState;
101 typedef struct PktHdr PktHdr;
102 typedef struct ScramState ScramState;
103
104 extern int cf_sbuf_len;
105
106 #include "util.h"
107 #include "iobuf.h"
108 #include "sbuf.h"
109 #include "pktbuf.h"
110 #include "varcache.h"
111 #include "dnslookup.h"
112
113 #include "admin.h"
114 #include "loader.h"
115 #include "client.h"
116 #include "server.h"
117 #include "pooler.h"
118 #include "proto.h"
119 #include "objects.h"
120 #include "stats.h"
121 #include "takeover.h"
122 #include "janitor.h"
123 #include "hba.h"
124 #include "pam.h"
125
126 #ifndef WIN32
127 #define DEFAULT_UNIX_SOCKET_DIR "/tmp"
128 #else
129 #define DEFAULT_UNIX_SOCKET_DIR ""
130 #endif
131
132 /*
133 * To avoid allocations, we use static buffers.
134 *
135 * Note that a trailing zero byte is used in each case, so the actual
136 * usable length is one less.
137 */
138
139 /* matching NAMEDATALEN */
140 #define MAX_DBNAME 64
141
142 /*
143 * Ought to match NAMEDATALEN. Some cloud services use longer user
144 * names, so give it some extra room.
145 */
146 #define MAX_USERNAME 128
147
148 /*
149 * Some cloud services use very long generated passwords, so give it
150 * plenty of room. Up to PostgreSQL 13, the server can handle
151 * passwords up to 996 bytes, after that it's longer. Also, libpq
152 * maxes out around 1024, so going much higher is not straightforward.
153 */
154 #define MAX_PASSWORD 996
155
156 /*
157 * AUTH_* symbols are used for both protocol handling and
158 * configuration settings (auth_type, hba). Some are only applicable
159 * to one or the other.
160 */
161
162 /* no-auth modes */
163 #define AUTH_ANY -1 /* same as trust but without username check */
164 #define AUTH_TRUST AUTH_OK
165
166 /* protocol codes in Authentication* 'R' messages from server */
167 #define AUTH_OK 0
168 #define AUTH_KRB4 1 /* not supported */
169 #define AUTH_KRB5 2 /* not supported */
170 #define AUTH_PLAIN 3
171 #define AUTH_CRYPT 4 /* not supported */
172 #define AUTH_MD5 5
173 #define AUTH_SCM_CREDS 6 /* not supported */
174 #define AUTH_GSS 7 /* not supported */
175 #define AUTH_GSS_CONT 8 /* not supported */
176 #define AUTH_SSPI 9 /* not supported */
177 #define AUTH_SASL 10
178 #define AUTH_SASL_CONT 11
179 #define AUTH_SASL_FIN 12
180
181 /* internal codes */
182 #define AUTH_CERT 107
183 #define AUTH_PEER 108
184 #define AUTH_HBA 109
185 #define AUTH_REJECT 110
186 #define AUTH_PAM 111
187 #define AUTH_SCRAM_SHA_256 112
188
189 /* type codes for weird pkts */
190 #define PKT_STARTUP_V2 0x20000
191 #define PKT_STARTUP 0x30000
192 #define PKT_CANCEL 80877102
193 #define PKT_SSLREQ 80877103
194 #define PKT_GSSENCREQ 80877104
195
196 #define POOL_SESSION 0
197 #define POOL_TX 1
198 #define POOL_STMT 2
199 #define POOL_INHERIT 3
200
201 #define BACKENDKEY_LEN 8
202
203 /* buffer size for startup noise */
204 #define STARTUP_BUF 1024
205
206
207 /*
208 * Remote/local address
209 */
210
211 /* buffer for pgaddr string conversions (with port) */
212 #define PGADDR_BUF (INET6_ADDRSTRLEN + 10)
213
214 struct sockaddr_ucreds {
215 struct sockaddr_in sin;
216 uid_t uid;
217 pid_t pid;
218 };
219
220 /*
221 * AF_INET,AF_INET6 are stored as-is,
222 * AF_UNIX uses sockaddr_in port + uid/pid.
223 */
224 union PgAddr {
225 struct sockaddr sa;
226 struct sockaddr_in sin;
227 struct sockaddr_in6 sin6;
228 struct sockaddr_ucreds scred;
229 };
230
pga_family(const PgAddr * a)231 static inline unsigned int pga_family(const PgAddr *a) { return a->sa.sa_family; }
pga_is_unix(const PgAddr * a)232 static inline bool pga_is_unix(const PgAddr *a) { return a->sa.sa_family == AF_UNIX; }
233
234 int pga_port(const PgAddr *a);
235 void pga_set(PgAddr *a, int fam, int port);
236 void pga_copy(PgAddr *a, const struct sockaddr *sa);
237 bool pga_pton(PgAddr *a, const char *s, int port);
238 const char *pga_ntop(const PgAddr *a, char *dst, int dstlen);
239 const char *pga_str(const PgAddr *a, char *dst, int dstlen);
240 const char *pga_details(const PgAddr *a, char *dst, int dstlen);
241 int pga_cmp_addr(const PgAddr *a, const PgAddr *b);
242
243 /*
244 * Stats, kept per-pool.
245 */
246 struct PgStats {
247 uint64_t xact_count;
248 uint64_t query_count;
249 uint64_t server_bytes;
250 uint64_t client_bytes;
251 usec_t xact_time; /* total transaction time in us */
252 usec_t query_time; /* total query time in us */
253 usec_t wait_time; /* total time clients had to wait */
254 };
255
256 /*
257 * Contains connections for one db+user pair.
258 *
259 * Stats:
260 * ->stats is updated online.
261 * for each stats_period:
262 * ->older_stats = ->newer_stats
263 * ->newer_stats = ->stats
264 */
265 struct PgPool {
266 struct List head; /* entry in global pool_list */
267 struct List map_head; /* entry in user->pool_list */
268
269 PgDatabase *db; /* corresponding database */
270 PgUser *user; /* user logged in as */
271
272 struct StatList active_client_list; /* waiting events logged in clients */
273 struct StatList waiting_client_list; /* client waits for a server to be available */
274 struct StatList cancel_req_list; /* closed client connections with server key */
275
276 struct StatList active_server_list; /* servers linked with clients */
277 struct StatList idle_server_list; /* servers ready to be linked with clients */
278 struct StatList used_server_list; /* server just unlinked from clients */
279 struct StatList tested_server_list; /* server in testing process */
280 struct StatList new_server_list; /* servers in login phase */
281
282 PgStats stats;
283 PgStats newer_stats;
284 PgStats older_stats;
285
286 /* database info to be sent to client */
287 struct PktBuf *welcome_msg; /* ServerParams without VarCache ones */
288
289 VarCache orig_vars; /* default params from server */
290
291 usec_t last_lifetime_disconnect;/* last time when server_lifetime was applied */
292
293 /* if last connect to server failed, there should be delay before next */
294 usec_t last_connect_time;
295 bool last_connect_failed:1;
296 bool last_login_failed:1;
297
298 bool welcome_msg_ready:1;
299 };
300
301 #define pool_connected_server_count(pool) ( \
302 statlist_count(&(pool)->active_server_list) + \
303 statlist_count(&(pool)->idle_server_list) + \
304 statlist_count(&(pool)->tested_server_list) + \
305 statlist_count(&(pool)->used_server_list))
306
307 #define pool_server_count(pool) ( \
308 pool_connected_server_count(pool) + \
309 statlist_count(&(pool)->new_server_list))
310
311 #define pool_client_count(pool) ( \
312 statlist_count(&(pool)->active_client_list) + \
313 statlist_count(&(pool)->waiting_client_list))
314
315 /*
316 * A user in login db.
317 *
318 * FIXME: remove ->head as ->tree_node should be enough.
319 *
320 * For databases where remote user is forced, the pool is:
321 * first(db->forced_user->pool_list), where pool_list has only one entry.
322 *
323 * Otherwise, ->pool_list contains multiple pools, for all PgDatabases
324 * which user has logged in.
325 */
326 struct PgUser {
327 struct List head; /* used to attach user to list */
328 struct List pool_list; /* list of pools where pool->user == this user */
329 struct AANode tree_node; /* used to attach user to tree */
330 char name[MAX_USERNAME];
331 char passwd[MAX_PASSWORD];
332 uint8_t scram_ClientKey[32];
333 uint8_t scram_ServerKey[32];
334 bool has_scram_keys; /* true if the above two are valid */
335 bool mock_auth; /* not a real user, only for mock auth */
336 int pool_mode;
337 int max_user_connections; /* how much server connections are allowed */
338 int connection_count; /* how much connections are used by user now */
339 };
340
341 /*
342 * A database entry from config.
343 */
344 struct PgDatabase {
345 struct List head;
346 char name[MAX_DBNAME]; /* db name for clients */
347
348 bool db_paused; /* PAUSE <db>; was issued */
349 bool db_wait_close; /* WAIT_CLOSE was issued for this database */
350 bool db_dead; /* used on RELOAD/SIGHUP to later detect removed dbs */
351 bool db_auto; /* is the database auto-created by autodb_connstr */
352 bool db_disabled; /* is the database accepting new connections? */
353 bool admin; /* internal console db */
354
355 struct PktBuf *startup_params; /* partial StartupMessage (without user) be sent to server */
356
357 PgUser *forced_user; /* if not NULL, the user/psw is forced */
358 PgUser *auth_user; /* if not NULL, users not in userlist.txt will be looked up on the server */
359
360 char *host; /* host or unix socket name */
361 int port;
362
363 int pool_size; /* max server connections in one pool */
364 int min_pool_size; /* min server connections in one pool */
365 int res_pool_size; /* additional server connections in case of trouble */
366 int pool_mode; /* pool mode for this database */
367 int max_db_connections; /* max server connections between all pools */
368
369 const char *dbname; /* server-side name, pointer to inside startup_msg */
370
371 /* startup commands to send to server after connect. malloc-ed */
372 char *connect_query;
373
374 usec_t inactive_time; /* when auto-database became inactive (to kill it after timeout) */
375 unsigned active_stamp; /* set if autodb has connections */
376
377 int connection_count; /* total connections for this database in all pools */
378
379 struct AATree user_tree; /* users that have been queried on this database */
380 };
381
382
383 /*
384 * A client or server connection.
385 *
386 * ->state corresponds to various lists the struct can be at.
387 */
388 struct PgSocket {
389 struct List head; /* list header */
390 PgSocket *link; /* the dest of packets */
391 PgPool *pool; /* parent pool, if NULL not yet assigned */
392
393 PgUser *login_user; /* presented login, for client it may differ from pool->user */
394
395 int client_auth_type; /* auth method decided by hba */
396
397 SocketState state:8; /* this also specifies socket location */
398
399 bool ready:1; /* server: accepts new query */
400 bool idle_tx:1; /* server: idling in tx */
401 bool close_needed:1; /* server: this socket must be closed ASAP */
402 bool setting_vars:1; /* server: setting client vars */
403 bool exec_on_connect:1; /* server: executing connect_query */
404 bool resetting:1; /* server: executing reset query from auth login; don't release on flush */
405 bool copy_mode:1; /* server: in copy stream, ignores any Sync packets */
406
407 bool wait_for_welcome:1;/* client: no server yet in pool, cannot send welcome msg */
408 bool wait_for_user_conn:1;/* client: waiting for auth_conn server connection */
409 bool wait_for_user:1; /* client: waiting for auth_conn query results */
410 bool wait_for_auth:1; /* client: waiting for external auth (PAM) to be completed */
411
412 bool suspended:1; /* client/server: if the socket is suspended */
413
414 bool admin_user:1; /* console client: has admin rights */
415 bool own_user:1; /* console client: client with same uid on unix socket */
416 bool wait_for_response:1;/* console client: waits for completion of PAUSE/SUSPEND cmd */
417
418 bool wait_sslchar:1; /* server: waiting for ssl response: S/N */
419
420 int expect_rfq_count; /* client: count of ReadyForQuery packets client should see */
421
422 usec_t connect_time; /* when connection was made */
423 usec_t request_time; /* last activity time */
424 usec_t query_start; /* query start moment */
425 usec_t xact_start; /* xact start moment */
426 usec_t wait_start; /* waiting start moment */
427
428 uint8_t cancel_key[BACKENDKEY_LEN]; /* client: generated, server: remote */
429 PgAddr remote_addr; /* ip:port for remote endpoint */
430 PgAddr local_addr; /* ip:port for local endpoint */
431
432 union {
433 struct DNSToken *dns_token; /* ongoing request */
434 PgDatabase *db; /* cache db while doing auth query */
435 };
436
437 struct ScramState {
438 char *client_nonce;
439 char *client_first_message_bare;
440 char *client_final_message_without_proof;
441 char *server_nonce;
442 char *server_first_message;
443 uint8_t *SaltedPassword;
444 char cbind_flag;
445 bool adhoc; /* SCRAM data made up from plain-text password */
446 int iterations;
447 char *salt; /* base64-encoded */
448 uint8_t ClientKey[32]; /* SHA256_DIGEST_LENGTH */
449 uint8_t StoredKey[32];
450 uint8_t ServerKey[32];
451 } scram_state;
452
453 VarCache vars; /* state of interesting server parameters */
454
455 SBuf sbuf; /* stream buffer, must be last */
456 };
457
458 #define RAW_IOBUF_SIZE offsetof(IOBuf, buf)
459 #define IOBUF_SIZE (RAW_IOBUF_SIZE + cf_sbuf_len)
460
461 /* where to store old fd info during SHOW FDS result processing */
462 #define tmp_sk_oldfd request_time
463 #define tmp_sk_linkfd query_start
464 /* takeover_clean_socket() needs to clean those up */
465
466 /* where the salt is temporarily stored */
467 #define tmp_login_salt cancel_key
468
469 /* main.c */
470 extern int cf_daemon;
471
472 extern char *cf_config_file;
473 extern char *cf_jobname;
474
475 extern char *cf_unix_socket_dir;
476 extern int cf_unix_socket_mode;
477 extern char *cf_unix_socket_group;
478 extern char *cf_listen_addr;
479 extern int cf_listen_port;
480 extern int cf_listen_backlog;
481
482 extern int cf_pool_mode;
483 extern int cf_max_client_conn;
484 extern int cf_default_pool_size;
485 extern int cf_min_pool_size;
486 extern int cf_res_pool_size;
487 extern usec_t cf_res_pool_timeout;
488 extern int cf_max_db_connections;
489 extern int cf_max_user_connections;
490
491 extern char * cf_autodb_connstr;
492 extern usec_t cf_autodb_idle_timeout;
493
494 extern usec_t cf_suspend_timeout;
495 extern usec_t cf_server_lifetime;
496 extern usec_t cf_server_idle_timeout;
497 extern char * cf_server_reset_query;
498 extern int cf_server_reset_query_always;
499 extern char * cf_server_check_query;
500 extern usec_t cf_server_check_delay;
501 extern int cf_server_fast_close;
502 extern usec_t cf_server_connect_timeout;
503 extern usec_t cf_server_login_retry;
504 extern usec_t cf_query_timeout;
505 extern usec_t cf_query_wait_timeout;
506 extern usec_t cf_client_idle_timeout;
507 extern usec_t cf_client_login_timeout;
508 extern usec_t cf_idle_transaction_timeout;
509 extern int cf_server_round_robin;
510 extern int cf_disable_pqexec;
511 extern usec_t cf_dns_max_ttl;
512 extern usec_t cf_dns_nxdomain_ttl;
513 extern usec_t cf_dns_zone_check_period;
514 extern char *cf_resolv_conf;
515
516 extern int cf_auth_type;
517 extern char *cf_auth_file;
518 extern char *cf_auth_query;
519 extern char *cf_auth_user;
520 extern char *cf_auth_hba_file;
521
522 extern char *cf_pidfile;
523
524 extern char *cf_ignore_startup_params;
525
526 extern char *cf_admin_users;
527 extern char *cf_stats_users;
528 extern int cf_stats_period;
529 extern int cf_log_stats;
530
531 extern int cf_pause_mode;
532 extern int cf_shutdown;
533 extern int cf_reboot;
534
535 extern unsigned int cf_max_packet_size;
536
537 extern int cf_sbuf_loopcnt;
538 extern int cf_so_reuseport;
539 extern int cf_tcp_keepalive;
540 extern int cf_tcp_keepcnt;
541 extern int cf_tcp_keepidle;
542 extern int cf_tcp_keepintvl;
543 extern int cf_tcp_socket_buffer;
544 extern int cf_tcp_defer_accept;
545 extern int cf_tcp_user_timeout;
546
547 extern int cf_log_connections;
548 extern int cf_log_disconnections;
549 extern int cf_log_pooler_errors;
550 extern int cf_application_name_add_host;
551
552 extern int cf_client_tls_sslmode;
553 extern char *cf_client_tls_protocols;
554 extern char *cf_client_tls_ca_file;
555 extern char *cf_client_tls_cert_file;
556 extern char *cf_client_tls_key_file;
557 extern char *cf_client_tls_ciphers;
558 extern char *cf_client_tls_dheparams;
559 extern char *cf_client_tls_ecdhecurve;
560
561 extern int cf_server_tls_sslmode;
562 extern char *cf_server_tls_protocols;
563 extern char *cf_server_tls_ca_file;
564 extern char *cf_server_tls_cert_file;
565 extern char *cf_server_tls_key_file;
566 extern char *cf_server_tls_ciphers;
567
568 extern const struct CfLookup pool_mode_map[];
569
570 extern usec_t g_suspend_start;
571
572 extern struct DNSContext *adns;
573 extern struct HBA *parsed_hba;
574
575 static inline PgSocket * _MUSTCHECK
pop_socket(struct StatList * slist)576 pop_socket(struct StatList *slist)
577 {
578 struct List *item = statlist_pop(slist);
579 if (item == NULL)
580 return NULL;
581 return container_of(item, PgSocket, head);
582 }
583
584 static inline PgSocket *
first_socket(struct StatList * slist)585 first_socket(struct StatList *slist)
586 {
587 if (statlist_empty(slist))
588 return NULL;
589 return container_of(slist->head.next, PgSocket, head);
590 }
591
592 static inline PgSocket *
last_socket(struct StatList * slist)593 last_socket(struct StatList *slist)
594 {
595 if (statlist_empty(slist))
596 return NULL;
597 return container_of(slist->head.prev, PgSocket, head);
598 }
599
600 bool requires_auth_file(int);
601 void load_config(void);
602
603
604 bool set_config_param(const char *key, const char *val);
605 void config_for_each(void (*param_cb)(void *arg, const char *name, const char *val, const char *defval, bool reloadable),
606 void *arg);
607