16f793812SPavel Zakharov#!/usr/bin/ksh -p
26f793812SPavel Zakharov
36f793812SPavel Zakharov#
46f793812SPavel Zakharov# This file and its contents are supplied under the terms of the
56f793812SPavel Zakharov# Common Development and Distribution License ("CDDL"), version 1.0.
66f793812SPavel Zakharov# You may only use this file in accordance with the terms of version
76f793812SPavel Zakharov# 1.0 of the CDDL.
86f793812SPavel Zakharov#
96f793812SPavel Zakharov# A full copy of the text of the CDDL should have accompanied this
106f793812SPavel Zakharov# source.  A copy of the CDDL is also available via the Internet at
116f793812SPavel Zakharov# http://www.illumos.org/license/CDDL.
126f793812SPavel Zakharov#
136f793812SPavel Zakharov
146f793812SPavel Zakharov#
156f793812SPavel Zakharov# Copyright (c) 2016 by Delphix. All rights reserved.
166f793812SPavel Zakharov#
176f793812SPavel Zakharov
186f793812SPavel Zakharov. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
196f793812SPavel Zakharov
206f793812SPavel Zakharov#
216f793812SPavel Zakharov# DESCRIPTION:
226f793812SPavel Zakharov#	It should be possible to rewind a pool beyond a device replacement.
236f793812SPavel Zakharov#
246f793812SPavel Zakharov# STRATEGY:
256f793812SPavel Zakharov#	1. Create a pool.
266f793812SPavel Zakharov#	2. Generate files and remember their md5sum.
276f793812SPavel Zakharov#	3. Sync a few times and note last synced txg.
286f793812SPavel Zakharov#	4. Take a snapshot to make sure old blocks are not overwritten.
296f793812SPavel Zakharov#	5. Initiate device replacement and export the pool. Special care must
306f793812SPavel Zakharov#	   be taken so that resilvering doesn't complete before the export.
316f793812SPavel Zakharov#	6. Test 1: Rewind pool to noted txg and then verify data checksums.
326f793812SPavel Zakharov#	   Import it read-only so that we do not overwrite blocks in later txgs.
336f793812SPavel Zakharov#	7. Re-import pool at latest txg and let the replacement finish.
346f793812SPavel Zakharov#	8. Export the pool an remove the new device - we shouldn't need it.
356f793812SPavel Zakharov#	9. Test 2: Rewind pool to noted txg and then verify data checksums.
366f793812SPavel Zakharov#
376f793812SPavel Zakharov# STRATEGY TO SLOW DOWN RESILVERING:
386f793812SPavel Zakharov#	1. Reduce zfs_txg_timeout, which controls how long can we resilver for
396f793812SPavel Zakharov#	   each sync.
406f793812SPavel Zakharov#	2. Add data to pool
416f793812SPavel Zakharov#	3. Re-import the pool so that data isn't cached
426f793812SPavel Zakharov#	4. Use zinject to slow down device I/O
436f793812SPavel Zakharov#	5. Trigger the resilvering
446f793812SPavel Zakharov#	6. Use spa freeze to stop writing to the pool.
456f793812SPavel Zakharov#	7. Clear zinject events (needed to export the pool)
466f793812SPavel Zakharov#	8. Export the pool
476f793812SPavel Zakharov#
486f793812SPavel Zakharov# DISCLAIMER:
496f793812SPavel Zakharov#	This test can fail since nothing guarantees that old MOS blocks aren't
506f793812SPavel Zakharov#	overwritten. Snapshots protect datasets and data files but not the MOS.
516f793812SPavel Zakharov#	sync_some_data_a_few_times interleaves file data and MOS data for a few
526f793812SPavel Zakharov#	txgs, thus increasing the odds that some txgs will have their MOS data
536f793812SPavel Zakharov#	left untouched.
546f793812SPavel Zakharov#
556f793812SPavel Zakharov
566f793812SPavel Zakharovverify_runnable "global"
576f793812SPavel Zakharov
586f793812SPavel ZakharovZFS_TXG_TIMEOUT=""
596f793812SPavel Zakharov
606f793812SPavel Zakharovfunction custom_cleanup
616f793812SPavel Zakharov{
626f793812SPavel Zakharov	# Revert zfs_txg_timeout to defaults
636f793812SPavel Zakharov	[[ -n ZFS_TXG_TIMEOUT ]] &&
646f793812SPavel Zakharov	    log_must set_zfs_txg_timeout $ZFS_TXG_TIMEOUT
656f793812SPavel Zakharov	log_must rm -rf $BACKUP_DEVICE_DIR
66*e4c795beSTom Caputi	log_must set_tunable32 zfs_scan_suspend_progress 0
676f793812SPavel Zakharov	cleanup
686f793812SPavel Zakharov}
696f793812SPavel Zakharov
706f793812SPavel Zakharovlog_onexit custom_cleanup
716f793812SPavel Zakharov
726f793812SPavel Zakharovfunction test_replace_vdev
736f793812SPavel Zakharov{
746f793812SPavel Zakharov	typeset poolcreate="$1"
756f793812SPavel Zakharov	typeset replacevdev="$2"
766f793812SPavel Zakharov	typeset replaceby="$3"
776f793812SPavel Zakharov	typeset poolfinalstate="$4"
786f793812SPavel Zakharov	typeset zinjectdevices="$5"
796f793812SPavel Zakharov
806f793812SPavel Zakharov	log_note "$0: pool '$poolcreate', replace $replacevdev by $replaceby."
816f793812SPavel Zakharov
826f793812SPavel Zakharov	log_must zpool create $TESTPOOL1 $poolcreate
836f793812SPavel Zakharov
846f793812SPavel Zakharov	# generate data and checksum it
856f793812SPavel Zakharov	log_must generate_data $TESTPOOL1 $MD5FILE
866f793812SPavel Zakharov
876f793812SPavel Zakharov	# add more data so that resilver takes longer
886f793812SPavel Zakharov	log_must write_some_data $TESTPOOL1
896f793812SPavel Zakharov
906f793812SPavel Zakharov	# Syncing a few times while writing new data increases the odds that
916f793812SPavel Zakharov	# MOS metadata for some of the txgs will survive.
926f793812SPavel Zakharov	log_must sync_some_data_a_few_times $TESTPOOL1
936f793812SPavel Zakharov	typeset txg
946f793812SPavel Zakharov	txg=$(get_last_txg_synced $TESTPOOL1)
956f793812SPavel Zakharov	log_must zfs snapshot -r $TESTPOOL1@snap1
966f793812SPavel Zakharov
976f793812SPavel Zakharov	# This should not free original data.
986f793812SPavel Zakharov	log_must overwrite_data $TESTPOOL1 ""
996f793812SPavel Zakharov
1006f793812SPavel Zakharov	log_must zpool export $TESTPOOL1
1016f793812SPavel Zakharov	log_must zpool import -d $DEVICE_DIR $TESTPOOL1
102*e4c795beSTom Caputi
103*e4c795beSTom Caputi	# Ensure resilvering doesn't complete.
104*e4c795beSTom Caputi	log_must set_tunable32 zfs_scan_suspend_progress 1
1056f793812SPavel Zakharov	log_must zpool replace $TESTPOOL1 $replacevdev $replaceby
1066f793812SPavel Zakharov
1076f793812SPavel Zakharov	# Confirm pool is still replacing
1086f793812SPavel Zakharov	log_must pool_is_replacing $TESTPOOL1
1096f793812SPavel Zakharov	log_must zpool export $TESTPOOL1
110*e4c795beSTom Caputi	log_must set_tunable32 zfs_scan_suspend_progress 0
1116f793812SPavel Zakharov
1126f793812SPavel Zakharov	############################################################
1136f793812SPavel Zakharov	# Test 1: rewind while device is resilvering.
1146f793812SPavel Zakharov	# Import read only to avoid overwriting more recent blocks.
1156f793812SPavel Zakharov	############################################################
1166f793812SPavel Zakharov	log_must zpool import -d $DEVICE_DIR -o readonly=on -T $txg $TESTPOOL1
1176f793812SPavel Zakharov	log_must check_pool_config $TESTPOOL1 "$poolcreate"
1186f793812SPavel Zakharov
1196f793812SPavel Zakharov	log_must verify_data_md5sums $MD5FILE
1206f793812SPavel Zakharov
1216f793812SPavel Zakharov	log_must zpool export $TESTPOOL1
1226f793812SPavel Zakharov
1236f793812SPavel Zakharov	# Import pool at latest txg to finish the resilvering
1246f793812SPavel Zakharov	log_must zpool import -d $DEVICE_DIR $TESTPOOL1
1256f793812SPavel Zakharov	log_must overwrite_data $TESTPOOL1 ""
1266f793812SPavel Zakharov	log_must wait_for_pool_config $TESTPOOL1 "$poolfinalstate"
1276f793812SPavel Zakharov	log_must zpool export $TESTPOOL1
1286f793812SPavel Zakharov
1296f793812SPavel Zakharov	# Move out the new device
1306f793812SPavel Zakharov	log_must mv $replaceby $BACKUP_DEVICE_DIR/
1316f793812SPavel Zakharov
1326f793812SPavel Zakharov	############################################################
1336f793812SPavel Zakharov	# Test 2: rewind after device has been replaced.
1346f793812SPavel Zakharov	# Import read-write since we won't need the pool anymore.
1356f793812SPavel Zakharov	############################################################
1366f793812SPavel Zakharov	log_must zpool import -d $DEVICE_DIR -T $txg $TESTPOOL1
1376f793812SPavel Zakharov	log_must check_pool_config $TESTPOOL1 "$poolcreate"
1386f793812SPavel Zakharov
1396f793812SPavel Zakharov	log_must verify_data_md5sums $MD5FILE
1406f793812SPavel Zakharov
1416f793812SPavel Zakharov	# Cleanup
1426f793812SPavel Zakharov	log_must zpool destroy $TESTPOOL1
1436f793812SPavel Zakharov	# Restore the device we moved out
1446f793812SPavel Zakharov	log_must mv "$BACKUP_DEVICE_DIR/$(basename $replaceby)" $DEVICE_DIR/
1456f793812SPavel Zakharov	# Fast way to clear vdev labels
1466f793812SPavel Zakharov	log_must zpool create -f $TESTPOOL2 $VDEV0 $VDEV1 $VDEV2 $VDEV3 $VDEV4
1476f793812SPavel Zakharov	log_must zpool destroy $TESTPOOL2
1486f793812SPavel Zakharov
1496f793812SPavel Zakharov	log_note ""
1506f793812SPavel Zakharov}
1516f793812SPavel Zakharov
1526f793812SPavel Zakharovlog_must mkdir $BACKUP_DEVICE_DIR
1536f793812SPavel Zakharov# Make the devices bigger to reduce chances of overwriting MOS metadata.
1546f793812SPavel Zakharovincrease_device_sizes $(( FILE_SIZE * 4 ))
1556f793812SPavel Zakharov
1566f793812SPavel Zakharov# We set zfs_txg_timeout to 1 to reduce resilvering time at each sync.
1576f793812SPavel ZakharovZFS_TXG_TIMEOUT=$(get_zfs_txg_timeout)
1586f793812SPavel Zakharovset_zfs_txg_timeout 1
1596f793812SPavel Zakharov
1606f793812SPavel Zakharovtest_replace_vdev "$VDEV0 $VDEV1" \
1616f793812SPavel Zakharov    "$VDEV1" "$VDEV2" \
1626f793812SPavel Zakharov    "$VDEV0 $VDEV2" \
1636f793812SPavel Zakharov    "$VDEV0 $VDEV1"
1646f793812SPavel Zakharov
1656f793812SPavel Zakharovtest_replace_vdev "mirror $VDEV0 $VDEV1" \
1666f793812SPavel Zakharov	"$VDEV1" "$VDEV2" \
1676f793812SPavel Zakharov	"mirror $VDEV0 $VDEV2" \
1686f793812SPavel Zakharov	"$VDEV0 $VDEV1"
1696f793812SPavel Zakharov
1706f793812SPavel Zakharovtest_replace_vdev "raidz $VDEV0 $VDEV1 $VDEV2" \
1716f793812SPavel Zakharov	"$VDEV1" "$VDEV3" \
1726f793812SPavel Zakharov	"raidz $VDEV0 $VDEV3 $VDEV2" \
1736f793812SPavel Zakharov	"$VDEV0 $VDEV1 $VDEV2"
1746f793812SPavel Zakharov
1756f793812SPavel Zakharovset_zfs_txg_timeout $ZFS_TXG_TIMEOUT
1766f793812SPavel Zakharov
1776f793812SPavel Zakharovlog_pass "zpool import rewind after device replacement passed."
178