pg_rewind/t/RewindTest.pm

package RewindTest;

# Test driver for pg_rewind. Each test consists of a cycle where a new cluster
# is first created with initdb, and a streaming replication standby is set up
# to follow the master. Then the master is shut down and the standby is
# promoted, and finally pg_rewind is used to rewind the old master, using the
# standby as the source.
#
# To run a test, the test script (in t/ subdirectory) calls the functions
# in this module. These functions should be called in this sequence:
#
# 1. setup_cluster - creates a PostgreSQL cluster that runs as the master
#
# 2. start_master - starts the master server
#
# 3. create_standby - runs pg_basebackup to initialize a standby server, and
#    sets it up to follow the master.
#
# 4. promote_standby - runs "pg_ctl promote" to promote the standby server.
# The old master keeps running.
#
# 5. run_pg_rewind - stops the old master (if it's still running) and runs
# pg_rewind to synchronize it with the now-promoted standby server.
#
# 6. clean_rewind_test - stops both servers used in the test, if they're
# still running.
#
# The test script can use the helper functions master_psql and standby_psql
# to run psql against the master and standby servers, respectively. The
# test script can also use the $connstr_master and $connstr_standby global
# variables, which contain libpq connection strings for connecting to the
# master and standby servers. The data directories are also available
# in paths $test_master_datadir and $test_standby_datadir

use strict;
use warnings;

use Config;
use Exporter 'import';
use File::Copy;
use File::Path qw(rmtree);
use IPC::Run qw(run);
use PostgresNode;
use TestLib;
use Test::More;

our @EXPORT = qw(
  $node_master
  $node_standby

  master_psql
  standby_psql
  check_query

  setup_cluster
  start_master
  create_standby
  promote_standby
  run_pg_rewind
  clean_rewind_test
);

# Our nodes.
our $node_master;
our $node_standby;

sub master_psql
{
	my $cmd = shift;

	system_or_bail 'psql', '-q', '--no-psqlrc', '-d',
	  $node_master->connstr('postgres'), '-c', "$cmd";
}

sub standby_psql
{
	my $cmd = shift;

	system_or_bail 'psql', '-q', '--no-psqlrc', '-d',
	  $node_standby->connstr('postgres'), '-c', "$cmd";
}

# Run a query against the master, and check that the output matches what's
# expected
sub check_query
{
	my ($query, $expected_stdout, $test_name) = @_;
	my ($stdout, $stderr);

	# we want just the output, no formatting
	my $result = run [
		'psql', '-q', '-A', '-t', '--no-psqlrc', '-d',
		$node_master->connstr('postgres'),
		'-c', $query ],
	  '>', \$stdout, '2>', \$stderr;

	# We don't use ok() for the exit code and stderr, because we want this
	# check to be just a single test.
	if (!$result)
	{
		fail("$test_name: psql exit code");
	}
	elsif ($stderr ne '')
	{
		diag $stderr;
		fail("$test_name: psql no stderr");
	}
	else
	{
		$stdout =~ s/\r\n/\n/g if $Config{osname} eq 'msys';
		is($stdout, $expected_stdout, "$test_name: query result matches");
	}
}

sub setup_cluster
{

	# Initialize master, data checksums are mandatory
	$node_master = get_new_node('master');
	$node_master->init(allows_streaming => 1);
}

sub start_master
{
	$node_master->start;

	#### Now run the test-specific parts to initialize the master before setting
	# up standby
}

sub create_standby
{
	$node_standby = get_new_node('standby');
	$node_master->backup('my_backup');
	$node_standby->init_from_backup($node_master, 'my_backup');
	my $connstr_master = $node_master->connstr();

	$node_standby->append_conf(
		"recovery.conf", qq(
primary_conninfo='$connstr_master application_name=rewind_standby'
standby_mode=on
recovery_target_timeline='latest'
));

	# Start standby
	$node_standby->start;

	# The standby may have WAL to apply before it matches the primary.  That
	# is fine, because no test examines the standby before promotion.
}

sub promote_standby
{
	#### Now run the test-specific parts to run after standby has been started
	# up standby

	# Wait for the standby to receive and write all WAL.
	my $wal_received_query =
"SELECT pg_current_wal_lsn() = write_lsn FROM pg_stat_replication WHERE application_name = 'rewind_standby';";
	$node_master->poll_query_until('postgres', $wal_received_query)
	  or die "Timed out while waiting for standby to receive and write WAL";

	# Now promote standby and insert some new data on master, this will put
	# the master out-of-sync with the standby.
	$node_standby->promote;

	# Force a checkpoint after the promotion. pg_rewind looks at the control
	# file to determine what timeline the server is on, and that isn't updated
	# immediately at promotion, but only at the next checkpoint. When running
	# pg_rewind in remote mode, it's possible that we complete the test steps
	# after promotion so quickly that when pg_rewind runs, the standby has not
	# performed a checkpoint after promotion yet.
	standby_psql("checkpoint");
}

sub run_pg_rewind
{
	my $test_mode       = shift;
	my $master_pgdata   = $node_master->data_dir;
	my $standby_pgdata  = $node_standby->data_dir;
	my $standby_connstr = $node_standby->connstr('postgres');
	my $tmp_folder      = TestLib::tempdir;

	# Stop the master and be ready to perform the rewind
	$node_master->stop;

	# At this point, the rewind processing is ready to run.
	# We now have a very simple scenario with a few diverged WAL record.
	# The real testing begins really now with a bifurcation of the possible
	# scenarios that pg_rewind supports.

	# Keep a temporary postgresql.conf for master node or it would be
	# overwritten during the rewind.
	copy(
		"$master_pgdata/postgresql.conf",
		"$tmp_folder/master-postgresql.conf.tmp");

	# Now run pg_rewind
	if ($test_mode eq "local")
	{

		# Do rewind using a local pgdata as source
		# Stop the master and be ready to perform the rewind
		$node_standby->stop;
		command_ok(
			[   'pg_rewind',
				"--debug",
				"--source-pgdata=$standby_pgdata",
				"--target-pgdata=$master_pgdata" ],
			'pg_rewind local');
	}
	elsif ($test_mode eq "remote")
	{

		# Do rewind using a remote connection as source
		command_ok(
			[   'pg_rewind',       "--debug",
				"--source-server", $standby_connstr,
				"--target-pgdata=$master_pgdata" ],
			'pg_rewind remote');
	}
	else
	{

		# Cannot come here normally
		die("Incorrect test mode specified");
	}

	# Now move back postgresql.conf with old settings
	move(
		"$tmp_folder/master-postgresql.conf.tmp",
		"$master_pgdata/postgresql.conf");

	# Plug-in rewound node to the now-promoted standby node
	my $port_standby = $node_standby->port;
	$node_master->append_conf(
		'recovery.conf', qq(
primary_conninfo='port=$port_standby'
standby_mode=on
recovery_target_timeline='latest'
));

	# Restart the master to check that rewind went correctly
	$node_master->start;

	#### Now run the test-specific parts to check the result
}

# Clean up after the test. Stop both servers, if they're still running.
sub clean_rewind_test
{
	$node_master->teardown_node  if defined $node_master;
	$node_standby->teardown_node if defined $node_standby;
}

1;