1#
2# Tests related to WAL archiving and recovery.
3#
4use strict;
5use warnings;
6use PostgresNode;
7use TestLib;
8use Test::More tests => 13;
9use Config;
10
11my $primary = get_new_node('master');
12$primary->init(
13	has_archiving    => 1,
14	allows_streaming => 1);
15$primary->append_conf('postgresql.conf', 'autovacuum = off');
16$primary->start;
17my $primary_data = $primary->data_dir;
18
19# Temporarily use an archive_command value to make the archiver fail,
20# knowing that archiving is enabled.  Note that we cannot use a command
21# that does not exist as in this case the archiver process would just exit
22# without reporting the failure to pg_stat_archiver.  This also cannot
23# use a plain "false" as that's unportable on Windows.  So, instead, as
24# a portable solution, use an archive command based on a command known to
25# work but will fail: copy with an incorrect original path.
26my $incorrect_command =
27  $TestLib::windows_os
28  ? qq{copy "%p_does_not_exist" "%f_does_not_exist"}
29  : qq{cp "%p_does_not_exist" "%f_does_not_exist"};
30$primary->safe_psql(
31	'postgres', qq{
32    ALTER SYSTEM SET archive_command TO '$incorrect_command';
33    SELECT pg_reload_conf();
34});
35
36# Save the WAL segment currently in use and switch to a new segment.
37# This will be used to track the activity of the archiver.
38my $segment_name_1 = $primary->safe_psql('postgres',
39	q{SELECT pg_walfile_name(pg_current_wal_lsn())});
40my $segment_path_1       = "pg_wal/archive_status/$segment_name_1";
41my $segment_path_1_ready = "$segment_path_1.ready";
42my $segment_path_1_done  = "$segment_path_1.done";
43$primary->safe_psql(
44	'postgres', q{
45	CREATE TABLE mine AS SELECT generate_series(1,10) AS x;
46	SELECT pg_switch_wal();
47	CHECKPOINT;
48});
49
50# Wait for an archive failure.
51$primary->poll_query_until('postgres',
52	q{SELECT failed_count > 0 FROM pg_stat_archiver}, 't')
53  or die "Timed out while waiting for archiving to fail";
54ok( -f "$primary_data/$segment_path_1_ready",
55	".ready file exists for WAL segment $segment_name_1 waiting to be archived"
56);
57ok( !-f "$primary_data/$segment_path_1_done",
58	".done file does not exist for WAL segment $segment_name_1 waiting to be archived"
59);
60
61is( $primary->safe_psql(
62		'postgres', q{
63		SELECT archived_count, last_failed_wal
64		FROM pg_stat_archiver
65	}),
66	"0|$segment_name_1",
67	"pg_stat_archiver failed to archive $segment_name_1");
68
69# Crash the cluster for the next test in charge of checking that non-archived
70# WAL segments are not removed.
71$primary->stop('immediate');
72
73# Recovery tests for the archiving with a standby partially check
74# the recovery behavior when restoring a backup taken using a
75# snapshot with no pg_start/stop_backup.  In this situation,
76# the recovered standby should enter first crash recovery then
77# switch to regular archive recovery.  Note that the base backup
78# is taken here so as archive_command will fail.  This is necessary
79# for the assumptions of the tests done with the standbys below.
80$primary->backup_fs_cold('backup');
81
82$primary->start;
83ok( -f "$primary_data/$segment_path_1_ready",
84	".ready file for WAL segment $segment_name_1 still exists after crash recovery on primary"
85);
86
87# Allow WAL archiving again and wait for a success.
88$primary->safe_psql(
89	'postgres', q{
90	ALTER SYSTEM RESET archive_command;
91	SELECT pg_reload_conf();
92});
93
94$primary->poll_query_until('postgres',
95	q{SELECT archived_count FROM pg_stat_archiver}, '1')
96  or die "Timed out while waiting for archiving to finish";
97
98ok(!-f "$primary_data/$segment_path_1_ready",
99	".ready file for archived WAL segment $segment_name_1 removed");
100
101ok(-f "$primary_data/$segment_path_1_done",
102	".done file for archived WAL segment $segment_name_1 exists");
103
104is( $primary->safe_psql(
105		'postgres', q{ SELECT last_archived_wal FROM pg_stat_archiver }),
106	$segment_name_1,
107	"archive success reported in pg_stat_archiver for WAL segment $segment_name_1"
108);
109
110# Create some WAL activity and a new checkpoint so as the next standby can
111# create a restartpoint.  As this standby starts in crash recovery because
112# of the cold backup taken previously, it needs a clean restartpoint to deal
113# with existing status files.
114my $segment_name_2 = $primary->safe_psql('postgres',
115	q{SELECT pg_walfile_name(pg_current_wal_lsn())});
116my $segment_path_2       = "pg_wal/archive_status/$segment_name_2";
117my $segment_path_2_ready = "$segment_path_2.ready";
118my $segment_path_2_done  = "$segment_path_2.done";
119$primary->safe_psql(
120	'postgres', q{
121	INSERT INTO mine SELECT generate_series(10,20) AS x;
122	SELECT pg_switch_wal();
123	CHECKPOINT;
124});
125
126$primary->poll_query_until('postgres',
127	q{ SELECT last_archived_wal FROM pg_stat_archiver },
128	$segment_name_2)
129  or die "Timed out while waiting for archiving to finish";
130
131# Test standby with archive_mode = on.
132my $standby1 = get_new_node('standby');
133$standby1->init_from_backup($primary, 'backup', has_restoring => 1);
134$standby1->append_conf('postgresql.conf', "archive_mode = on");
135my $standby1_data = $standby1->data_dir;
136$standby1->start;
137$standby1->safe_psql('postgres', q{CHECKPOINT});
138
139# Recovery with archive_mode=on should not create .ready files.
140# Note that this segment did not exist in the backup.
141ok( !-f "$standby1_data/$segment_path_2_ready",
142	".ready file for WAL segment $segment_name_2 not created on standby when archive_mode=on on standby"
143);
144
145# Test recovery with archive_mode = always, which should always keep
146# .ready files if archiving is enabled, though here we want the archive
147# command to fail to persist the .ready files.  Note that this node
148# has inherited the archive command of the previous cold backup that
149# will cause archiving failures.
150my $standby2 = get_new_node('standby2');
151$standby2->init_from_backup($primary, 'backup', has_restoring => 1);
152$standby2->append_conf('postgresql.conf', 'archive_mode = always');
153my $standby2_data = $standby2->data_dir;
154$standby2->start;
155
156$standby2->safe_psql('postgres', q{CHECKPOINT});
157
158ok( -f "$standby2_data/$segment_path_1_ready",
159	".ready file for WAL segment $segment_name_1 existing in backup is kept with archive_mode=always on standby"
160);
161
162# Reset statistics of the archiver for the next checks.
163$standby2->safe_psql('postgres', q{SELECT pg_stat_reset_shared('archiver')});
164
165# Now crash the cluster to check that recovery step does not
166# remove non-archived WAL segments on a standby where archiving
167# is enabled.
168$standby2->stop('immediate');
169$standby2->start;
170
171ok( -f "$standby2_data/$segment_path_1_ready",
172	"WAL segment still ready to archive after crash recovery on standby with archive_mode=always"
173);
174
175# Allow WAL archiving again, and wait for the segments to be archived.
176$standby2->safe_psql(
177	'postgres', q{
178	ALTER SYSTEM RESET archive_command;
179	SELECT pg_reload_conf();
180});
181$standby2->poll_query_until('postgres',
182	q{SELECT last_archived_wal FROM pg_stat_archiver},
183	$segment_name_2)
184  or die "Timed out while waiting for archiving to finish";
185
186is( $standby2->safe_psql(
187		'postgres', q{SELECT archived_count FROM pg_stat_archiver}),
188	'2',
189	"correct number of WAL segments archived from standby");
190
191ok( !-f "$standby2_data/$segment_path_1_ready"
192	  && !-f "$standby2_data/$segment_path_2_ready",
193	".ready files removed after archive success with archive_mode=always on standby"
194);
195
196ok( -f "$standby2_data/$segment_path_1_done"
197	  && -f "$standby2_data/$segment_path_2_done",
198	".done files created after archive success with archive_mode=always on standby"
199);
200