1# 2# Tests related to WAL archiving and recovery. 3# 4use strict; 5use warnings; 6use PostgresNode; 7use TestLib; 8use Test::More tests => 13; 9use Config; 10 11my $primary = get_new_node('master'); 12$primary->init( 13 has_archiving => 1, 14 allows_streaming => 1); 15$primary->append_conf('postgresql.conf', 'autovacuum = off'); 16$primary->start; 17my $primary_data = $primary->data_dir; 18 19# Temporarily use an archive_command value to make the archiver fail, 20# knowing that archiving is enabled. Note that we cannot use a command 21# that does not exist as in this case the archiver process would just exit 22# without reporting the failure to pg_stat_archiver. This also cannot 23# use a plain "false" as that's unportable on Windows. So, instead, as 24# a portable solution, use an archive command based on a command known to 25# work but will fail: copy with an incorrect original path. 26my $incorrect_command = 27 $TestLib::windows_os 28 ? qq{copy "%p_does_not_exist" "%f_does_not_exist"} 29 : qq{cp "%p_does_not_exist" "%f_does_not_exist"}; 30$primary->safe_psql( 31 'postgres', qq{ 32 ALTER SYSTEM SET archive_command TO '$incorrect_command'; 33 SELECT pg_reload_conf(); 34}); 35 36# Save the WAL segment currently in use and switch to a new segment. 37# This will be used to track the activity of the archiver. 38my $segment_name_1 = $primary->safe_psql('postgres', 39 q{SELECT pg_walfile_name(pg_current_wal_lsn())}); 40my $segment_path_1 = "pg_wal/archive_status/$segment_name_1"; 41my $segment_path_1_ready = "$segment_path_1.ready"; 42my $segment_path_1_done = "$segment_path_1.done"; 43$primary->safe_psql( 44 'postgres', q{ 45 CREATE TABLE mine AS SELECT generate_series(1,10) AS x; 46 SELECT pg_switch_wal(); 47 CHECKPOINT; 48}); 49 50# Wait for an archive failure. 51$primary->poll_query_until('postgres', 52 q{SELECT failed_count > 0 FROM pg_stat_archiver}, 't') 53 or die "Timed out while waiting for archiving to fail"; 54ok( -f "$primary_data/$segment_path_1_ready", 55 ".ready file exists for WAL segment $segment_name_1 waiting to be archived" 56); 57ok( !-f "$primary_data/$segment_path_1_done", 58 ".done file does not exist for WAL segment $segment_name_1 waiting to be archived" 59); 60 61is( $primary->safe_psql( 62 'postgres', q{ 63 SELECT archived_count, last_failed_wal 64 FROM pg_stat_archiver 65 }), 66 "0|$segment_name_1", 67 "pg_stat_archiver failed to archive $segment_name_1"); 68 69# Crash the cluster for the next test in charge of checking that non-archived 70# WAL segments are not removed. 71$primary->stop('immediate'); 72 73# Recovery tests for the archiving with a standby partially check 74# the recovery behavior when restoring a backup taken using a 75# snapshot with no pg_start/stop_backup. In this situation, 76# the recovered standby should enter first crash recovery then 77# switch to regular archive recovery. Note that the base backup 78# is taken here so as archive_command will fail. This is necessary 79# for the assumptions of the tests done with the standbys below. 80$primary->backup_fs_cold('backup'); 81 82$primary->start; 83ok( -f "$primary_data/$segment_path_1_ready", 84 ".ready file for WAL segment $segment_name_1 still exists after crash recovery on primary" 85); 86 87# Allow WAL archiving again and wait for a success. 88$primary->safe_psql( 89 'postgres', q{ 90 ALTER SYSTEM RESET archive_command; 91 SELECT pg_reload_conf(); 92}); 93 94$primary->poll_query_until('postgres', 95 q{SELECT archived_count FROM pg_stat_archiver}, '1') 96 or die "Timed out while waiting for archiving to finish"; 97 98ok(!-f "$primary_data/$segment_path_1_ready", 99 ".ready file for archived WAL segment $segment_name_1 removed"); 100 101ok(-f "$primary_data/$segment_path_1_done", 102 ".done file for archived WAL segment $segment_name_1 exists"); 103 104is( $primary->safe_psql( 105 'postgres', q{ SELECT last_archived_wal FROM pg_stat_archiver }), 106 $segment_name_1, 107 "archive success reported in pg_stat_archiver for WAL segment $segment_name_1" 108); 109 110# Create some WAL activity and a new checkpoint so as the next standby can 111# create a restartpoint. As this standby starts in crash recovery because 112# of the cold backup taken previously, it needs a clean restartpoint to deal 113# with existing status files. 114my $segment_name_2 = $primary->safe_psql('postgres', 115 q{SELECT pg_walfile_name(pg_current_wal_lsn())}); 116my $segment_path_2 = "pg_wal/archive_status/$segment_name_2"; 117my $segment_path_2_ready = "$segment_path_2.ready"; 118my $segment_path_2_done = "$segment_path_2.done"; 119$primary->safe_psql( 120 'postgres', q{ 121 INSERT INTO mine SELECT generate_series(10,20) AS x; 122 SELECT pg_switch_wal(); 123 CHECKPOINT; 124}); 125 126$primary->poll_query_until('postgres', 127 q{ SELECT last_archived_wal FROM pg_stat_archiver }, 128 $segment_name_2) 129 or die "Timed out while waiting for archiving to finish"; 130 131# Test standby with archive_mode = on. 132my $standby1 = get_new_node('standby'); 133$standby1->init_from_backup($primary, 'backup', has_restoring => 1); 134$standby1->append_conf('postgresql.conf', "archive_mode = on"); 135my $standby1_data = $standby1->data_dir; 136$standby1->start; 137$standby1->safe_psql('postgres', q{CHECKPOINT}); 138 139# Recovery with archive_mode=on should not create .ready files. 140# Note that this segment did not exist in the backup. 141ok( !-f "$standby1_data/$segment_path_2_ready", 142 ".ready file for WAL segment $segment_name_2 not created on standby when archive_mode=on on standby" 143); 144 145# Test recovery with archive_mode = always, which should always keep 146# .ready files if archiving is enabled, though here we want the archive 147# command to fail to persist the .ready files. Note that this node 148# has inherited the archive command of the previous cold backup that 149# will cause archiving failures. 150my $standby2 = get_new_node('standby2'); 151$standby2->init_from_backup($primary, 'backup', has_restoring => 1); 152$standby2->append_conf('postgresql.conf', 'archive_mode = always'); 153my $standby2_data = $standby2->data_dir; 154$standby2->start; 155 156$standby2->safe_psql('postgres', q{CHECKPOINT}); 157 158ok( -f "$standby2_data/$segment_path_1_ready", 159 ".ready file for WAL segment $segment_name_1 existing in backup is kept with archive_mode=always on standby" 160); 161 162# Reset statistics of the archiver for the next checks. 163$standby2->safe_psql('postgres', q{SELECT pg_stat_reset_shared('archiver')}); 164 165# Now crash the cluster to check that recovery step does not 166# remove non-archived WAL segments on a standby where archiving 167# is enabled. 168$standby2->stop('immediate'); 169$standby2->start; 170 171ok( -f "$standby2_data/$segment_path_1_ready", 172 "WAL segment still ready to archive after crash recovery on standby with archive_mode=always" 173); 174 175# Allow WAL archiving again, and wait for the segments to be archived. 176$standby2->safe_psql( 177 'postgres', q{ 178 ALTER SYSTEM RESET archive_command; 179 SELECT pg_reload_conf(); 180}); 181$standby2->poll_query_until('postgres', 182 q{SELECT last_archived_wal FROM pg_stat_archiver}, 183 $segment_name_2) 184 or die "Timed out while waiting for archiving to finish"; 185 186is( $standby2->safe_psql( 187 'postgres', q{SELECT archived_count FROM pg_stat_archiver}), 188 '2', 189 "correct number of WAL segments archived from standby"); 190 191ok( !-f "$standby2_data/$segment_path_1_ready" 192 && !-f "$standby2_data/$segment_path_2_ready", 193 ".ready files removed after archive success with archive_mode=always on standby" 194); 195 196ok( -f "$standby2_data/$segment_path_1_done" 197 && -f "$standby2_data/$segment_path_2_done", 198 ".done files created after archive success with archive_mode=always on standby" 199); 200