1#
2# Tests of pg_shmem.h functions
3#
4use strict;
5use warnings;
6use IPC::Run 'run';
7use PostgresNode;
8use Test::More;
9use TestLib;
10use Time::HiRes qw(usleep);
11
12if ($windows_os)
13{
14	plan skip_all => 'SysV shared memory not supported by this platform';
15}
16else
17{
18	plan tests => 5;
19}
20
21my $tempdir = TestLib::tempdir;
22my $port;
23
24# Log "ipcs" diffs on a best-effort basis, swallowing any error.
25my $ipcs_before = "$tempdir/ipcs_before";
26eval { run_log [ 'ipcs', '-am' ], '>', $ipcs_before; };
27
28sub log_ipcs
29{
30	eval { run_log [ 'ipcs', '-am' ], '|', [ 'diff', $ipcs_before, '-' ] };
31	return;
32}
33
34# These tests need a $port such that nothing creates or removes a segment in
35# $port's IpcMemoryKey range while this test script runs.  While there's no
36# way to ensure that in general, we do ensure that if PostgreSQL tests are the
37# only actors.  With TCP, the first get_new_node picks a port number.  With
38# Unix sockets, use a postmaster, $port_holder, to represent a key space
39# reservation.  $port_holder holds a reservation on the key space of port
40# 1+$port_holder->port if it created the first IpcMemoryKey of its own port's
41# key space.  If multiple copies of this test script run concurrently, they
42# will pick different ports.  $port_holder postmasters use odd-numbered ports,
43# and tests use even-numbered ports.  In the absence of collisions from other
44# shmget() activity, gnat starts with key 0x7d001 (512001), and flea starts
45# with key 0x7d002 (512002).
46my $port_holder;
47if (!$PostgresNode::use_tcp)
48{
49	my $lock_port;
50	for ($lock_port = 511; $lock_port < 711; $lock_port += 2)
51	{
52		$port_holder = PostgresNode->get_new_node(
53			"port${lock_port}_holder",
54			port     => $lock_port,
55			own_host => 1);
56		$port_holder->init;
57		$port_holder->append_conf('postgresql.conf', 'max_connections = 5');
58		$port_holder->start;
59		# Match the AddToDataDirLockFile() call in sysv_shmem.c.  Assume all
60		# systems not using sysv_shmem.c do use TCP.
61		my $shmem_key_line_prefix = sprintf("%9lu ", 1 + $lock_port * 1000);
62		last
63		  if slurp_file($port_holder->data_dir . '/postmaster.pid') =~
64		  /^$shmem_key_line_prefix/m;
65		$port_holder->stop;
66	}
67	$port = $lock_port + 1;
68}
69
70# Node setup.
71sub init_start
72{
73	my $name = shift;
74	my $ret = PostgresNode->get_new_node($name, port => $port, own_host => 1);
75	defined($port) or $port = $ret->port;    # same port for all nodes
76	$ret->init;
77	# Limit semaphore consumption, since we run several nodes concurrently.
78	$ret->append_conf('postgresql.conf', 'max_connections = 5');
79	$ret->start;
80	log_ipcs();
81	return $ret;
82}
83my $gnat = init_start 'gnat';
84my $flea = init_start 'flea';
85
86# Upon postmaster death, postmaster children exit automatically.
87$gnat->kill9;
88log_ipcs();
89$flea->restart;       # flea ignores the shm key gnat abandoned.
90log_ipcs();
91poll_start($gnat);    # gnat recycles its former shm key.
92log_ipcs();
93
94# After clean shutdown, the nodes swap shm keys.
95$gnat->stop;
96$flea->restart;
97log_ipcs();
98$gnat->start;
99log_ipcs();
100
101# Scenarios involving no postmaster.pid, dead postmaster, and a live backend.
102# Use a regress.c function to emulate the responsiveness of a backend working
103# through a CPU-intensive task.
104$gnat->safe_psql('postgres', <<EOSQL);
105CREATE FUNCTION wait_pid(int)
106   RETURNS void
107   AS '$ENV{REGRESS_SHLIB}'
108   LANGUAGE C STRICT;
109EOSQL
110my $slow_query = 'SELECT wait_pid(pg_backend_pid())';
111my ($stdout, $stderr);
112my $slow_client = IPC::Run::start(
113	[
114		'psql', '-X', '-qAt', '-d', $gnat->connstr('postgres'),
115		'-c', $slow_query
116	],
117	'<',
118	\undef,
119	'>',
120	\$stdout,
121	'2>',
122	\$stderr,
123	IPC::Run::timeout(900));    # five times the poll_query_until timeout
124ok( $gnat->poll_query_until(
125		'postgres',
126		"SELECT 1 FROM pg_stat_activity WHERE query = '$slow_query'", '1'),
127	'slow query started');
128my $slow_pid = $gnat->safe_psql('postgres',
129	"SELECT pid FROM pg_stat_activity WHERE query = '$slow_query'");
130$gnat->kill9;
131unlink($gnat->data_dir . '/postmaster.pid');
132$gnat->rotate_logfile;
133log_ipcs();
134# Reject ordinary startup.  Retry for the same reasons poll_start() does.
135my $pre_existing_msg = qr/pre-existing shared memory block/;
136{
137	my $max_attempts = 180 * 10;    # Retry every 0.1s for at least 180s.
138	my $attempts     = 0;
139	while ($attempts < $max_attempts)
140	{
141		last
142		  if $gnat->start(fail_ok => 1)
143		  || slurp_file($gnat->logfile) =~ $pre_existing_msg;
144		usleep(100_000);
145		$attempts++;
146	}
147}
148like(slurp_file($gnat->logfile),
149	$pre_existing_msg, 'detected live backend via shared memory');
150# Reject single-user startup.
151my $single_stderr;
152ok( !run_log(
153		[ 'postgres', '--single', '-D', $gnat->data_dir, 'template1' ],
154		'<', \undef, '2>', \$single_stderr),
155	'live query blocks --single');
156print STDERR $single_stderr;
157like($single_stderr, $pre_existing_msg,
158	'single-user mode detected live backend via shared memory');
159log_ipcs();
160# Fail to reject startup if shm key N has become available and we crash while
161# using key N+1.  This is unwanted, but expected.
162$flea->stop;    # release first key
163is($gnat->start(fail_ok => 1), 1, 'key turnover fools only sysv_shmem.c');
164$gnat->stop;     # release first key
165$flea->start;    # grab first key
166# cleanup
167TestLib::system_log('pg_ctl', 'kill', 'QUIT', $slow_pid);
168$slow_client->finish;    # client has detected backend termination
169log_ipcs();
170poll_start($gnat);       # recycle second key
171
172$gnat->stop;
173$flea->stop;
174$port_holder->stop if $port_holder;
175log_ipcs();
176
177
178# We may need retries to start a new postmaster.  Causes:
179# - kernel is slow to deliver SIGKILL
180# - postmaster parent is slow to waitpid()
181# - postmaster child is slow to exit in response to SIGQUIT
182# - postmaster child is slow to exit after postmaster death
183sub poll_start
184{
185	my ($node) = @_;
186
187	my $max_attempts = 180 * 10;
188	my $attempts     = 0;
189
190	while ($attempts < $max_attempts)
191	{
192		$node->start(fail_ok => 1) && return 1;
193
194		# Wait 0.1 second before retrying.
195		usleep(100_000);
196
197		$attempts++;
198	}
199
200	# No success within 180 seconds.  Try one last time without fail_ok, which
201	# will BAIL_OUT unless it succeeds.
202	$node->start && return 1;
203	return 0;
204}
205