1package RewindTest;
2
3# Test driver for pg_rewind. Each test consists of a cycle where a new cluster
4# is first created with initdb, and a streaming replication standby is set up
5# to follow the master. Then the master is shut down and the standby is
6# promoted, and finally pg_rewind is used to rewind the old master, using the
7# standby as the source.
8#
9# To run a test, the test script (in t/ subdirectory) calls the functions
10# in this module. These functions should be called in this sequence:
11#
12# 1. setup_cluster - creates a PostgreSQL cluster that runs as the master
13#
14# 2. start_master - starts the master server
15#
16# 3. create_standby - runs pg_basebackup to initialize a standby server, and
17#    sets it up to follow the master.
18#
19# 4. promote_standby - runs "pg_ctl promote" to promote the standby server.
20# The old master keeps running.
21#
22# 5. run_pg_rewind - stops the old master (if it's still running) and runs
23# pg_rewind to synchronize it with the now-promoted standby server.
24#
25# 6. clean_rewind_test - stops both servers used in the test, if they're
26# still running.
27#
28# The test script can use the helper functions master_psql and standby_psql
29# to run psql against the master and standby servers, respectively. The
30# test script can also use the $connstr_master and $connstr_standby global
31# variables, which contain libpq connection strings for connecting to the
32# master and standby servers. The data directories are also available
33# in paths $test_master_datadir and $test_standby_datadir
34
35use strict;
36use warnings;
37
38use Config;
39use Exporter 'import';
40use File::Copy;
41use File::Path qw(rmtree);
42use IPC::Run qw(run);
43use PostgresNode;
44use TestLib;
45use Test::More;
46
47our @EXPORT = qw(
48  $node_master
49  $node_standby
50
51  master_psql
52  standby_psql
53  check_query
54
55  setup_cluster
56  start_master
57  create_standby
58  promote_standby
59  run_pg_rewind
60  clean_rewind_test
61);
62
63# Our nodes.
64our $node_master;
65our $node_standby;
66
67sub master_psql
68{
69	my $cmd = shift;
70
71	system_or_bail 'psql', '-q', '--no-psqlrc', '-d',
72	  $node_master->connstr('postgres'), '-c', "$cmd";
73}
74
75sub standby_psql
76{
77	my $cmd = shift;
78
79	system_or_bail 'psql', '-q', '--no-psqlrc', '-d',
80	  $node_standby->connstr('postgres'), '-c', "$cmd";
81}
82
83# Run a query against the master, and check that the output matches what's
84# expected
85sub check_query
86{
87	my ($query, $expected_stdout, $test_name) = @_;
88	my ($stdout, $stderr);
89
90	# we want just the output, no formatting
91	my $result = run [
92		'psql', '-q', '-A', '-t', '--no-psqlrc', '-d',
93		$node_master->connstr('postgres'),
94		'-c', $query ],
95	  '>', \$stdout, '2>', \$stderr;
96
97	# We don't use ok() for the exit code and stderr, because we want this
98	# check to be just a single test.
99	if (!$result)
100	{
101		fail("$test_name: psql exit code");
102	}
103	elsif ($stderr ne '')
104	{
105		diag $stderr;
106		fail("$test_name: psql no stderr");
107	}
108	else
109	{
110		$stdout =~ s/\r\n/\n/g if $Config{osname} eq 'msys';
111		is($stdout, $expected_stdout, "$test_name: query result matches");
112	}
113}
114
115sub setup_cluster
116{
117
118	# Initialize master, data checksums are mandatory
119	$node_master = get_new_node('master');
120	$node_master->init(allows_streaming => 1);
121}
122
123sub start_master
124{
125	$node_master->start;
126
127	#### Now run the test-specific parts to initialize the master before setting
128	# up standby
129}
130
131sub create_standby
132{
133	$node_standby = get_new_node('standby');
134	$node_master->backup('my_backup');
135	$node_standby->init_from_backup($node_master, 'my_backup');
136	my $connstr_master = $node_master->connstr();
137
138	$node_standby->append_conf(
139		"recovery.conf", qq(
140primary_conninfo='$connstr_master application_name=rewind_standby'
141standby_mode=on
142recovery_target_timeline='latest'
143));
144
145	# Start standby
146	$node_standby->start;
147
148	# The standby may have WAL to apply before it matches the primary.  That
149	# is fine, because no test examines the standby before promotion.
150}
151
152sub promote_standby
153{
154	#### Now run the test-specific parts to run after standby has been started
155	# up standby
156
157	# Wait for the standby to receive and write all WAL.
158	my $wal_received_query =
159"SELECT pg_current_wal_lsn() = write_lsn FROM pg_stat_replication WHERE application_name = 'rewind_standby';";
160	$node_master->poll_query_until('postgres', $wal_received_query)
161	  or die "Timed out while waiting for standby to receive and write WAL";
162
163	# Now promote standby and insert some new data on master, this will put
164	# the master out-of-sync with the standby.
165	$node_standby->promote;
166
167	# Force a checkpoint after the promotion. pg_rewind looks at the control
168	# file to determine what timeline the server is on, and that isn't updated
169	# immediately at promotion, but only at the next checkpoint. When running
170	# pg_rewind in remote mode, it's possible that we complete the test steps
171	# after promotion so quickly that when pg_rewind runs, the standby has not
172	# performed a checkpoint after promotion yet.
173	standby_psql("checkpoint");
174}
175
176sub run_pg_rewind
177{
178	my $test_mode       = shift;
179	my $master_pgdata   = $node_master->data_dir;
180	my $standby_pgdata  = $node_standby->data_dir;
181	my $standby_connstr = $node_standby->connstr('postgres');
182	my $tmp_folder      = TestLib::tempdir;
183
184	# Stop the master and be ready to perform the rewind
185	$node_master->stop;
186
187	# At this point, the rewind processing is ready to run.
188	# We now have a very simple scenario with a few diverged WAL record.
189	# The real testing begins really now with a bifurcation of the possible
190	# scenarios that pg_rewind supports.
191
192	# Keep a temporary postgresql.conf for master node or it would be
193	# overwritten during the rewind.
194	copy(
195		"$master_pgdata/postgresql.conf",
196		"$tmp_folder/master-postgresql.conf.tmp");
197
198	# Now run pg_rewind
199	if ($test_mode eq "local")
200	{
201
202		# Do rewind using a local pgdata as source
203		# Stop the master and be ready to perform the rewind
204		$node_standby->stop;
205		command_ok(
206			[   'pg_rewind',
207				"--debug",
208				"--source-pgdata=$standby_pgdata",
209				"--target-pgdata=$master_pgdata" ],
210			'pg_rewind local');
211	}
212	elsif ($test_mode eq "remote")
213	{
214
215		# Do rewind using a remote connection as source
216		command_ok(
217			[   'pg_rewind',       "--debug",
218				"--source-server", $standby_connstr,
219				"--target-pgdata=$master_pgdata" ],
220			'pg_rewind remote');
221	}
222	else
223	{
224
225		# Cannot come here normally
226		die("Incorrect test mode specified");
227	}
228
229	# Now move back postgresql.conf with old settings
230	move(
231		"$tmp_folder/master-postgresql.conf.tmp",
232		"$master_pgdata/postgresql.conf");
233
234	# Plug-in rewound node to the now-promoted standby node
235	my $port_standby = $node_standby->port;
236	$node_master->append_conf(
237		'recovery.conf', qq(
238primary_conninfo='port=$port_standby'
239standby_mode=on
240recovery_target_timeline='latest'
241));
242
243	# Restart the master to check that rewind went correctly
244	$node_master->start;
245
246	#### Now run the test-specific parts to check the result
247}
248
249# Clean up after the test. Stop both servers, if they're still running.
250sub clean_rewind_test
251{
252	$node_master->teardown_node  if defined $node_master;
253	$node_standby->teardown_node if defined $node_standby;
254}
255
2561;
257