1# 2# Tests of pg_shmem.h functions 3# 4use strict; 5use warnings; 6use IPC::Run 'run'; 7use PostgresNode; 8use Test::More; 9use TestLib; 10use Time::HiRes qw(usleep); 11 12if ($windows_os) 13{ 14 plan skip_all => 'SysV shared memory not supported by this platform'; 15} 16else 17{ 18 plan tests => 5; 19} 20 21my $tempdir = TestLib::tempdir; 22my $port; 23 24# Log "ipcs" diffs on a best-effort basis, swallowing any error. 25my $ipcs_before = "$tempdir/ipcs_before"; 26eval { run_log [ 'ipcs', '-am' ], '>', $ipcs_before; }; 27 28sub log_ipcs 29{ 30 eval { run_log [ 'ipcs', '-am' ], '|', [ 'diff', $ipcs_before, '-' ] }; 31 return; 32} 33 34# These tests need a $port such that nothing creates or removes a segment in 35# $port's IpcMemoryKey range while this test script runs. While there's no 36# way to ensure that in general, we do ensure that if PostgreSQL tests are the 37# only actors. With TCP, the first get_new_node picks a port number. With 38# Unix sockets, use a postmaster, $port_holder, to represent a key space 39# reservation. $port_holder holds a reservation on the key space of port 40# 1+$port_holder->port if it created the first IpcMemoryKey of its own port's 41# key space. If multiple copies of this test script run concurrently, they 42# will pick different ports. $port_holder postmasters use odd-numbered ports, 43# and tests use even-numbered ports. In the absence of collisions from other 44# shmget() activity, gnat starts with key 0x7d001 (512001), and flea starts 45# with key 0x7d002 (512002). 46my $port_holder; 47if (!$PostgresNode::use_tcp) 48{ 49 my $lock_port; 50 for ($lock_port = 511; $lock_port < 711; $lock_port += 2) 51 { 52 $port_holder = PostgresNode->get_new_node( 53 "port${lock_port}_holder", 54 port => $lock_port, 55 own_host => 1); 56 $port_holder->init(hba_permit_replication => 0); 57 $port_holder->append_conf('postgresql.conf', 'max_connections = 5'); 58 $port_holder->start; 59 # Match the AddToDataDirLockFile() call in sysv_shmem.c. Assume all 60 # systems not using sysv_shmem.c do use TCP. 61 my $shmem_key_line_prefix = sprintf("%9lu ", 1 + $lock_port * 1000); 62 last 63 if slurp_file($port_holder->data_dir . '/postmaster.pid') =~ 64 /^$shmem_key_line_prefix/m; 65 $port_holder->stop; 66 } 67 $port = $lock_port + 1; 68} 69 70# Node setup. 71sub init_start 72{ 73 my $name = shift; 74 my $ret = PostgresNode->get_new_node($name, port => $port, own_host => 1); 75 defined($port) or $port = $ret->port; # same port for all nodes 76 $ret->init(hba_permit_replication => 0); 77 # Limit semaphore consumption, since we run several nodes concurrently. 78 $ret->append_conf('postgresql.conf', 'max_connections = 5'); 79 $ret->start; 80 log_ipcs(); 81 return $ret; 82} 83my $gnat = init_start 'gnat'; 84my $flea = init_start 'flea'; 85 86# Upon postmaster death, postmaster children exit automatically. 87$gnat->kill9; 88log_ipcs(); 89$flea->restart; # flea ignores the shm key gnat abandoned. 90log_ipcs(); 91poll_start($gnat); # gnat recycles its former shm key. 92log_ipcs(); 93 94# After clean shutdown, the nodes swap shm keys. 95$gnat->stop; 96$flea->restart; 97log_ipcs(); 98$gnat->start; 99log_ipcs(); 100 101# Scenarios involving no postmaster.pid, dead postmaster, and a live backend. 102# Use a regress.c function to emulate the responsiveness of a backend working 103# through a CPU-intensive task. 104$gnat->safe_psql('postgres', <<EOSQL); 105CREATE FUNCTION wait_pid(int) 106 RETURNS void 107 AS '$ENV{REGRESS_SHLIB}' 108 LANGUAGE C STRICT; 109EOSQL 110my $slow_query = 'SELECT wait_pid(pg_backend_pid())'; 111my ($stdout, $stderr); 112my $slow_client = IPC::Run::start( 113 [ 114 'psql', '-X', '-qAt', '-d', $gnat->connstr('postgres'), 115 '-c', $slow_query 116 ], 117 '<', 118 \undef, 119 '>', 120 \$stdout, 121 '2>', 122 \$stderr, 123 IPC::Run::timeout(900)); # five times the poll_query_until timeout 124ok( $gnat->poll_query_until( 125 'postgres', 126 "SELECT true FROM pg_stat_activity WHERE query = '$slow_query'"), 127 'slow query started'); 128my $slow_pid = $gnat->safe_psql('postgres', 129 "SELECT pid FROM pg_stat_activity WHERE query = '$slow_query'"); 130$gnat->kill9; 131unlink($gnat->data_dir . '/postmaster.pid'); 132$gnat->rotate_logfile; 133log_ipcs(); 134# Reject ordinary startup. Retry for the same reasons poll_start() does. 135my $pre_existing_msg = qr/pre-existing shared memory block/; 136{ 137 my $max_attempts = 180 * 10; # Retry every 0.1s for at least 180s. 138 my $attempts = 0; 139 while ($attempts < $max_attempts) 140 { 141 last 142 if $gnat->start(fail_ok => 1) 143 || slurp_file($gnat->logfile) =~ $pre_existing_msg; 144 usleep(100_000); 145 $attempts++; 146 } 147} 148like(slurp_file($gnat->logfile), 149 $pre_existing_msg, 'detected live backend via shared memory'); 150# Reject single-user startup. 151my $single_stderr; 152ok( !run_log( 153 [ 'postgres', '--single', '-D', $gnat->data_dir, 'template1' ], 154 '<', \undef, '2>', \$single_stderr), 155 'live query blocks --single'); 156print STDERR $single_stderr; 157like($single_stderr, $pre_existing_msg, 158 'single-user mode detected live backend via shared memory'); 159log_ipcs(); 160# Fail to reject startup if shm key N has become available and we crash while 161# using key N+1. This is unwanted, but expected. 162$flea->stop; # release first key 163is($gnat->start(fail_ok => 1), 1, 'key turnover fools only sysv_shmem.c'); 164$gnat->stop; # release first key 165$flea->start; # grab first key 166# cleanup 167TestLib::system_log('pg_ctl', 'kill', 'QUIT', $slow_pid); 168$slow_client->finish; # client has detected backend termination 169log_ipcs(); 170poll_start($gnat); # recycle second key 171 172$gnat->stop; 173$flea->stop; 174$port_holder->stop if $port_holder; 175log_ipcs(); 176 177 178# We may need retries to start a new postmaster. Causes: 179# - kernel is slow to deliver SIGKILL 180# - postmaster parent is slow to waitpid() 181# - postmaster child is slow to exit in response to SIGQUIT 182# - postmaster child is slow to exit after postmaster death 183sub poll_start 184{ 185 my ($node) = @_; 186 187 my $max_attempts = 180 * 10; 188 my $attempts = 0; 189 190 while ($attempts < $max_attempts) 191 { 192 $node->start(fail_ok => 1) && return 1; 193 194 # Wait 0.1 second before retrying. 195 usleep(100_000); 196 197 $attempts++; 198 } 199 200 # No success within 180 seconds. Try one last time without fail_ok, which 201 # will BAIL_OUT unless it succeeds. 202 $node->start && return 1; 203 return 0; 204} 205