1#
2# Tests of pg_shmem.h functions
3#
4use strict;
5use warnings;
6use Config;
7use IPC::Run 'run';
8use PostgresNode;
9use Test::More;
10use TestLib;
11use Time::HiRes qw(usleep);
12
13if ($windows_os)
14{
15	plan skip_all => 'SysV shared memory not supported by this platform';
16}
17else
18{
19	plan tests => 5;
20}
21
22my $tempdir = TestLib::tempdir;
23my $port;
24
25# Log "ipcs" diffs on a best-effort basis, swallowing any error.
26my $ipcs_before = "$tempdir/ipcs_before";
27eval { run_log [ 'ipcs', '-am' ], '>', $ipcs_before; };
28
29sub log_ipcs
30{
31	eval { run_log [ 'ipcs', '-am' ], '|', [ 'diff', $ipcs_before, '-' ] };
32	return;
33}
34
35# These tests need a $port such that nothing creates or removes a segment in
36# $port's IpcMemoryKey range while this test script runs.  While there's no
37# way to ensure that in general, we do ensure that if PostgreSQL tests are the
38# only actors.  With TCP, the first get_new_node picks a port number.  With
39# Unix sockets, use a postmaster, $port_holder, to represent a key space
40# reservation.  $port_holder holds a reservation on the key space of port
41# 1+$port_holder->port if it created the first IpcMemoryKey of its own port's
42# key space.  If multiple copies of this test script run concurrently, they
43# will pick different ports.  $port_holder postmasters use odd-numbered ports,
44# and tests use even-numbered ports.  In the absence of collisions from other
45# shmget() activity, gnat starts with key 0x7d001 (512001), and flea starts
46# with key 0x7d002 (512002).
47my $port_holder;
48if (!$PostgresNode::use_tcp)
49{
50	my $lock_port;
51	for ($lock_port = 511; $lock_port < 711; $lock_port += 2)
52	{
53		$port_holder = PostgresNode->get_new_node(
54			"port${lock_port}_holder",
55			port     => $lock_port,
56			own_host => 1);
57		$port_holder->init;
58		$port_holder->append_conf('postgresql.conf', 'max_connections = 5');
59		$port_holder->start;
60		# Match the AddToDataDirLockFile() call in sysv_shmem.c.  Assume all
61		# systems not using sysv_shmem.c do use TCP.
62		my $shmem_key_line_prefix = sprintf("%9lu ", 1 + $lock_port * 1000);
63		last
64		  if slurp_file($port_holder->data_dir . '/postmaster.pid') =~
65		  /^$shmem_key_line_prefix/m;
66		$port_holder->stop;
67	}
68	$port = $lock_port + 1;
69}
70
71# Node setup.
72sub init_start
73{
74	my $name = shift;
75	my $ret = PostgresNode->get_new_node($name, port => $port, own_host => 1);
76	defined($port) or $port = $ret->port;    # same port for all nodes
77	$ret->init;
78	# Limit semaphore consumption, since we run several nodes concurrently.
79	$ret->append_conf('postgresql.conf', 'max_connections = 5');
80	$ret->start;
81	log_ipcs();
82	return $ret;
83}
84my $gnat = init_start 'gnat';
85my $flea = init_start 'flea';
86
87# Upon postmaster death, postmaster children exit automatically.
88$gnat->kill9;
89log_ipcs();
90$flea->restart;       # flea ignores the shm key gnat abandoned.
91log_ipcs();
92poll_start($gnat);    # gnat recycles its former shm key.
93log_ipcs();
94
95# After clean shutdown, the nodes swap shm keys.
96$gnat->stop;
97$flea->restart;
98log_ipcs();
99$gnat->start;
100log_ipcs();
101
102# Scenarios involving no postmaster.pid, dead postmaster, and a live backend.
103# Use a regress.c function to emulate the responsiveness of a backend working
104# through a CPU-intensive task.
105my $regress_shlib = TestLib::perl2host($ENV{REGRESS_SHLIB});
106$gnat->safe_psql('postgres', <<EOSQL);
107CREATE FUNCTION wait_pid(int)
108   RETURNS void
109   AS '$regress_shlib'
110   LANGUAGE C STRICT;
111EOSQL
112my $slow_query = 'SELECT wait_pid(pg_backend_pid())';
113my ($stdout, $stderr);
114my $slow_client = IPC::Run::start(
115	[
116		'psql', '-X', '-qAt', '-d', $gnat->connstr('postgres'),
117		'-c', $slow_query
118	],
119	'<',
120	\undef,
121	'>',
122	\$stdout,
123	'2>',
124	\$stderr,
125	IPC::Run::timeout(900));    # five times the poll_query_until timeout
126ok( $gnat->poll_query_until(
127		'postgres',
128		"SELECT 1 FROM pg_stat_activity WHERE query = '$slow_query'", '1'),
129	'slow query started');
130my $slow_pid = $gnat->safe_psql('postgres',
131	"SELECT pid FROM pg_stat_activity WHERE query = '$slow_query'");
132$gnat->kill9;
133unlink($gnat->data_dir . '/postmaster.pid');
134$gnat->rotate_logfile;
135log_ipcs();
136# Reject ordinary startup.  Retry for the same reasons poll_start() does.
137my $pre_existing_msg = qr/pre-existing shared memory block/;
138{
139	my $max_attempts = 180 * 10;    # Retry every 0.1s for at least 180s.
140	my $attempts     = 0;
141	while ($attempts < $max_attempts)
142	{
143		last
144		  if $gnat->start(fail_ok => 1)
145		  || slurp_file($gnat->logfile) =~ $pre_existing_msg;
146		usleep(100_000);
147		$attempts++;
148	}
149}
150like(slurp_file($gnat->logfile),
151	$pre_existing_msg, 'detected live backend via shared memory');
152# Reject single-user startup.
153my $single_stderr;
154ok( !run_log(
155		[ 'postgres', '--single', '-D', $gnat->data_dir, 'template1' ],
156		'<', \undef, '2>', \$single_stderr),
157	'live query blocks --single');
158print STDERR $single_stderr;
159like($single_stderr, $pre_existing_msg,
160	'single-user mode detected live backend via shared memory');
161log_ipcs();
162# Fail to reject startup if shm key N has become available and we crash while
163# using key N+1.  This is unwanted, but expected.
164$flea->stop;    # release first key
165is($gnat->start(fail_ok => 1), 1, 'key turnover fools only sysv_shmem.c');
166$gnat->stop;     # release first key
167$flea->start;    # grab first key
168# cleanup
169TestLib::system_log('pg_ctl', 'kill', 'QUIT', $slow_pid);
170$slow_client->finish;    # client has detected backend termination
171log_ipcs();
172poll_start($gnat);       # recycle second key
173
174$gnat->stop;
175$flea->stop;
176$port_holder->stop if $port_holder;
177log_ipcs();
178
179
180# We may need retries to start a new postmaster.  Causes:
181# - kernel is slow to deliver SIGKILL
182# - postmaster parent is slow to waitpid()
183# - postmaster child is slow to exit in response to SIGQUIT
184# - postmaster child is slow to exit after postmaster death
185sub poll_start
186{
187	my ($node) = @_;
188
189	my $max_attempts = 180 * 10;
190	my $attempts     = 0;
191
192	while ($attempts < $max_attempts)
193	{
194		$node->start(fail_ok => 1) && return 1;
195
196		# Wait 0.1 second before retrying.
197		usleep(100_000);
198
199		$attempts++;
200	}
201
202	# No success within 180 seconds.  Try one last time without fail_ok, which
203	# will BAIL_OUT unless it succeeds.
204	$node->start && return 1;
205	return 0;
206}
207