1#!/bin/ksh -p
2
3#
4# CDDL HEADER START
5#
6# This file and its contents are supplied under the terms of the
7# Common Development and Distribution License ("CDDL"), version 1.0.
8# You may only use this file in accordance with the terms of version
9# 1.0 of the CDDL.
10#
11# A full copy of the text of the CDDL should have accompanied this
12# source.  A copy of the CDDL is also available via the Internet at
13# http://www.illumos.org/license/CDDL.
14#
15# CDDL HEADER END
16#
17
18#
19# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
20#
21
22. $STF_SUITE/include/libtest.shlib
23. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
24
25#
26# DESCRIPTION:
27# Verify dRAID resilver to traditional and distributed spares for
28# a variety of pool configurations and pool states.
29#
30# STRATEGY:
31# 1. For resilvers:
32#    a. Create a semi-random dRAID pool configuration which can
33#       sustain 1 failure and has 5 distributed spares.
34#    b. Fill the pool with data
35#    c. Systematically fault and replace vdevs in the pools with
36#       spares to test resilving in common pool states.
37#    d. Scrub the pool to verify no data was lost
38#    e. Verify the contents of files in the pool
39#
40
41log_assert "Verify dRAID resilver"
42
43function cleanup_tunable
44{
45	log_must set_tunable32 REBUILD_SCRUB_ENABLED 1
46	cleanup
47}
48
49log_onexit cleanup_tunable
50
51if is_kmemleak; then
52	log_unsupported "Test case runs slowly when kmemleak is enabled"
53fi
54
55#
56# Disable scrubbing after a sequential resilver to verify the resilver
57# alone is able to reconstruct the data without the help of a scrub.
58#
59log_must set_tunable32 REBUILD_SCRUB_ENABLED 0
60
61for replace_mode in "healing" "sequential"; do
62
63	if [[ "$replace_mode" = "sequential" ]]; then
64		flags="-s"
65	else
66		flags=""
67	fi
68
69	parity=1
70	spares=5
71	data=$(random_int_between 1 4)
72	children=10
73	draid="draid${parity}:${data}d:${children}c:${spares}s"
74
75	setup_test_env $TESTPOOL $draid $children
76
77	#
78	# Perform a variety of replacements to normal and distributed spares
79	# for a variety of different vdev configurations to exercise different
80	# resilver code paths. The final configuration is expected to be:
81	#
82	# NAME                                  STATE     READ WRITE CKSUM
83	# testpool                              DEGRADED     0     0     0
84	#   draid1:1d:10c:5s-0                  DEGRADED     0     0     0
85	#     /var/tmp/basedir.28683/new_vdev0  ONLINE       0     0     0
86	#     /var/tmp/basedir.28683/new_vdev1  ONLINE       0     0     0
87	#     spare-2                           DEGRADED     0     0     0
88	#       /var/tmp/basedir.28683/vdev2    FAULTED      0     0     0
89	#       draid1-0-3                      ONLINE       0     0     0
90	#     spare-3                           DEGRADED     0     0     0
91	#       /var/tmp/basedir.28683/vdev3    FAULTED      0     0     0
92	#       draid1-0-4                      ONLINE       0     0     0
93	#     /var/tmp/basedir.28683/vdev4      ONLINE       0     0     0
94	#     /var/tmp/basedir.28683/vdev5      ONLINE       0     0     0
95	#     /var/tmp/basedir.28683/vdev6      ONLINE       0     0     0
96	#     draid1-0-0                        ONLINE       0     0     0
97	#     spare-8                           DEGRADED     0     0     0
98	#       /var/tmp/basedir.28683/vdev8    FAULTED      0     0     0
99	#       draid1-0-1                      ONLINE       0     0     0
100	#     spare-9                           ONLINE       0     0     0
101	#       /var/tmp/basedir.28683/vdev9    ONLINE       0     0     0
102	#       draid1-0-2                      ONLINE       0     0     0
103	# spares
104	#   draid1-0-0                          INUSE     currently in use
105	#   draid1-0-1                          INUSE     currently in use
106	#   draid1-0-2                          INUSE     currently in use
107	#   draid1-0-3                          INUSE     currently in use
108	#   draid1-0-4                          INUSE     currently in use
109	#
110
111	# Distributed spare which replaces original online device
112	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev7 "ONLINE"
113	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev7 draid1-0-0
114	log_must zpool detach $TESTPOOL $BASEDIR/vdev7
115	log_must check_vdev_state $TESTPOOL draid1-0-0 "ONLINE"
116	log_must check_hotspare_state $TESTPOOL draid1-0-0 "INUSE"
117	log_must verify_pool $TESTPOOL
118	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
119	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
120
121	# Distributed spare in mirror with original device faulted
122	log_must zpool offline -f $TESTPOOL $BASEDIR/vdev8
123	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev8 "FAULTED"
124	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev8 draid1-0-1
125	log_must check_vdev_state $TESTPOOL spare-8 "DEGRADED"
126	log_must check_vdev_state $TESTPOOL draid1-0-1 "ONLINE"
127	log_must check_hotspare_state $TESTPOOL draid1-0-1 "INUSE"
128	log_must verify_pool $TESTPOOL
129	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
130	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
131
132	# Distributed spare in mirror with original device still online
133	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev9 "ONLINE"
134	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev9 draid1-0-2
135	log_must check_vdev_state $TESTPOOL spare-9 "ONLINE"
136	log_must check_vdev_state $TESTPOOL draid1-0-2 "ONLINE"
137	log_must check_hotspare_state $TESTPOOL draid1-0-2 "INUSE"
138	log_must verify_pool $TESTPOOL
139	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
140	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
141
142	# Normal faulted device replacement
143	new_vdev0="$BASEDIR/new_vdev0"
144	log_must truncate -s $MINVDEVSIZE $new_vdev0
145	log_must zpool offline -f $TESTPOOL $BASEDIR/vdev0
146	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev0 "FAULTED"
147	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev0 $new_vdev0
148	log_must check_vdev_state $TESTPOOL $new_vdev0 "ONLINE"
149	log_must verify_pool $TESTPOOL
150	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
151	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
152
153	# Distributed spare faulted device replacement
154	log_must zpool offline -f $TESTPOOL $BASEDIR/vdev2
155	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev2 "FAULTED"
156	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev2 draid1-0-3
157	log_must check_vdev_state $TESTPOOL spare-2 "DEGRADED"
158	log_must check_vdev_state $TESTPOOL draid1-0-3 "ONLINE"
159	log_must check_hotspare_state $TESTPOOL draid1-0-3 "INUSE"
160	log_must verify_pool $TESTPOOL
161	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
162	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
163
164	# Normal online device replacement
165	new_vdev1="$BASEDIR/new_vdev1"
166	log_must truncate -s $MINVDEVSIZE $new_vdev1
167	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev1 "ONLINE"
168	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev1 $new_vdev1
169	log_must check_vdev_state $TESTPOOL $new_vdev1 "ONLINE"
170	log_must verify_pool $TESTPOOL
171	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
172	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
173
174	# Distributed spare online device replacement (then fault)
175	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev3 draid1-0-4
176	log_must check_vdev_state $TESTPOOL spare-3 "ONLINE"
177	log_must check_vdev_state $TESTPOOL draid1-0-4 "ONLINE"
178	log_must check_hotspare_state $TESTPOOL draid1-0-4 "INUSE"
179	log_must zpool offline -f $TESTPOOL $BASEDIR/vdev3
180	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev3 "FAULTED"
181	log_must check_vdev_state $TESTPOOL spare-3 "DEGRADED"
182	log_must verify_pool $TESTPOOL
183	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
184	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
185
186	# Verify the original data is valid
187	log_must is_data_valid $TESTPOOL
188	log_must check_pool_status $TESTPOOL "errors" "No known data errors"
189
190	cleanup
191done
192
193log_pass "Verify resilver to dRAID distributed spares"
194