1#
2# This file and its contents are supplied under the terms of the
3# Common Development and Distribution License ("CDDL"), version 1.0.
4# You may only use this file in accordance with the terms of version
5# 1.0 of the CDDL.
6#
7# A full copy of the text of the CDDL should have accompanied this
8# source.  A copy of the CDDL is also available via the Internet at
9# http://www.illumos.org/license/CDDL.
10#
11
12#
13# Copyright (c) 2017, 2018 by Delphix. All rights reserved.
14#
15
16. $STF_SUITE/include/libtest.shlib
17. $STF_SUITE/tests/functional/removal/removal.kshlib
18
19#
20# In general all the tests related to the pool checkpoint can
21# be divided into two categories. TESTS that verify features
22# provided by the checkpoint (e.g. checkpoint_rewind) and tests
23# that stress-test the checkpoint (e.g. checkpoint_big_rewind).
24#
25# For the first group we don't really care about the size of
26# the pool or the individual file sizes within the filesystems.
27# This is why these tests run directly on pools that use a
28# "real disk vdev" (meaning not a file based one). These tests
29# use the $TESTPOOL pool that is created on top of $TESTDISK.
30# This pool is referred to as the "test pool" and thus all
31# the tests of this group use the testpool-related functions of
32# this file (not the nested_pools ones).
33#
34# For the second group we generally try to bring the pool to its
35# limits by increasing fragmentation, filling all allocatable
36# space, attempting to use vdevs that the checkpoint spacemap
37# cannot represent, etc. For these tests we need to control
38# almost all parameters of the pool and the vdevs that back it
39# so we create them based on file-based vdevs that we carefully
40# create within the $TESTPOOL pool. So most of these tests, in
41# order to create this nested pool sctructure, generally start
42# like this:
43# 1] We create the test pool ($TESTPOOL).
44# 2] We create a filesystem and we populate it with files of
45#    some predetermined size.
46# 3] We use those files as vdevs for the pool that the test
47#    will use ($NESTEDPOOL).
48# 4] Go on and let the test run and operate on $NESTEDPOOL.
49#
50
51#
52# These disks are used to back $TESTPOOL
53#
54TESTDISK="$(echo $DISKS | cut -d' ' -f1)"
55EXTRATESTDISK="$(echo $DISKS | cut -d' ' -f2)"
56
57FS0=$TESTPOOL/$TESTFS
58FS1=$TESTPOOL/$TESTFS1
59FS2=$TESTPOOL/$TESTFS2
60
61FS0FILE=/$FS0/$TESTFILE0
62FS1FILE=/$FS1/$TESTFILE1
63FS2FILE=/$FS2/$TESTFILE2
64
65#
66# The following are created within $TESTPOOL and
67# will be used to back $NESTEDPOOL
68#
69DISKFS=$TESTPOOL/disks
70FILEDISKDIR=/$DISKFS
71FILEDISK1=/$DISKFS/dsk1
72FILEDISK2=/$DISKFS/dsk2
73FILEDISKS="$FILEDISK1 $FILEDISK2"
74
75#
76# $NESTEDPOOL related variables
77#
78NESTEDPOOL=nestedpool
79NESTEDFS0=$NESTEDPOOL/$TESTFS
80NESTEDFS1=$NESTEDPOOL/$TESTFS1
81NESTEDFS2=$NESTEDPOOL/$TESTFS2
82NESTEDFS0FILE=/$NESTEDFS0/$TESTFILE0
83NESTEDFS1FILE=/$NESTEDFS1/$TESTFILE1
84NESTEDFS2FILE=/$NESTEDFS2/$TESTFILE2
85
86#
87# In the tests that stress-test the pool (second category
88# mentioned above), there exist some that need to bring
89# fragmentation at high percentages in a relatively short
90# period of time. In order to do that we set the following
91# parameters:
92#
93# * We use two disks of 1G each, to create a pool of size 2G.
94#   The point is that 2G is not small nor large, and we also
95#   want to have 2 disks to introduce indirect vdevs on our
96#   setup.
97# * We enable compression and set the record size of all
98#   filesystems to 8K. The point of compression is to
99#   ensure that we are not filling up the whole pool (that's
100#   what checkpoint_capacity is for), and the specific
101#   record size is set to match the block size of randwritecomp
102#   which is used to increase fragmentation by writing on
103#   files.
104# * We always have 2 big files present of 512M each, which
105#   should account for 40%~50% capacity by the end of each
106#   test with fragmentation around 50~60%.
107# * At each file we attempt to do enough random writes to
108#   touch every offset twice on average.
109#
110# Note that the amount of random writes per files are based
111# on the following calculation:
112#
113# ((512M / 8K) * 3) * 2 = ~400000
114#
115# Given that the file is 512M and one write is 8K, we would
116# need (512M / 8K) writes to go through the whole file.
117# Assuming though that each write has a compression ratio of
118# 3, then we want 3 times that to cover the same amount of
119# space. Finally, we multiply that by 2 since our goal is to
120# touch each offset twice on average.
121#
122# Examples of those tests are checkpoint_big_rewind and
123# checkpoint_discard_busy.
124#
125FILEDISKSIZE=1g
126DISKSIZE=1g
127BIGFILESIZE=512M
128RANDOMWRITES=400000
129
130
131#
132# Assumes create_test_pool has been called beforehand.
133#
134function setup_nested_pool
135{
136	log_must zfs create $DISKFS
137
138	log_must truncate -s $DISKSIZE $FILEDISK1
139	log_must truncate -s $DISKSIZE $FILEDISK2
140
141	log_must zpool create -O sync=disabled $NESTEDPOOL $FILEDISKS
142}
143
144function setup_test_pool
145{
146	log_must zpool create -O sync=disabled $TESTPOOL "$TESTDISK"
147}
148
149function setup_nested_pools
150{
151	setup_test_pool
152	setup_nested_pool
153}
154
155function cleanup_nested_pool
156{
157	if poolexists $NESTEDPOOL; then
158		log_must zpool destroy $NESTEDPOOL
159	fi
160
161	log_must rm -f $FILEDISKS
162}
163
164function cleanup_test_pool
165{
166	if poolexists $TESTPOOL; then
167		log_must zpool destroy $TESTPOOL
168	fi
169
170	#
171	# We always clear the labels of all disks
172	# between tests so imports from zpool or
173	# or zdb do not get confused with leftover
174	# data from old pools.
175	#
176	for disk in $DISKS; do
177		zpool labelclear -f $disk
178	done
179}
180
181function cleanup_nested_pools
182{
183	cleanup_nested_pool
184	cleanup_test_pool
185}
186
187#
188# Remove and re-add each vdev to ensure that data is
189# moved between disks and indirect mappings are created
190#
191function introduce_indirection
192{
193	for disk in ${FILEDISKS[@]}; do
194		log_must zpool remove $NESTEDPOOL $disk
195		log_must wait_for_removal $NESTEDPOOL
196		log_mustnot vdevs_in_pool $NESTEDPOOL $disk
197		log_must zpool add $NESTEDPOOL $disk
198	done
199}
200
201FILECONTENTS0="Can't wait to be checkpointed!"
202FILECONTENTS1="Can't wait to be checkpointed too!"
203NEWFILECONTENTS0="I survived after the checkpoint!"
204NEWFILECONTENTS2="I was born after the checkpoint!"
205
206function populate_test_pool
207{
208	log_must zfs create -o compression=lz4 -o recordsize=8k $FS0
209	log_must zfs create -o compression=lz4 -o recordsize=8k $FS1
210
211	echo $FILECONTENTS0 > $FS0FILE
212	echo $FILECONTENTS1 > $FS1FILE
213}
214
215function populate_nested_pool
216{
217	log_must zfs create -o compression=lz4 -o recordsize=8k $NESTEDFS0
218	log_must zfs create -o compression=lz4 -o recordsize=8k $NESTEDFS1
219
220	echo $FILECONTENTS0 > $NESTEDFS0FILE
221	echo $FILECONTENTS1 > $NESTEDFS1FILE
222}
223
224function test_verify_pre_checkpoint_state
225{
226	log_must zfs list $FS0
227	log_must zfs list $FS1
228	log_must [ "$(<$FS0FILE)" = "$FILECONTENTS0" ]
229	log_must [ "$(<$FS1FILE)" = "$FILECONTENTS1" ]
230
231	#
232	# If we've opened the checkpointed state of the
233	# pool as read-only without rewinding on-disk we
234	# can't really use zdb on it.
235	#
236	if [[ "$1" != "ro-check" ]] ; then
237		log_must zdb $TESTPOOL
238	fi
239
240	#
241	# Ensure post-checkpoint state is not present
242	#
243	log_mustnot zfs list $FS2
244	log_mustnot [ "$(<$FS0FILE)" = "$NEWFILECONTENTS0" ]
245}
246
247function nested_verify_pre_checkpoint_state
248{
249	log_must zfs list $NESTEDFS0
250	log_must zfs list $NESTEDFS1
251	log_must [ "$(<$NESTEDFS0FILE)" = "$FILECONTENTS0" ]
252	log_must [ "$(<$NESTEDFS1FILE)" = "$FILECONTENTS1" ]
253
254	#
255	# If we've opened the checkpointed state of the
256	# pool as read-only without rewinding on-disk we
257	# can't really use zdb on it.
258	#
259	if [[ "$1" != "ro-check" ]] ; then
260		log_must zdb $NESTEDPOOL
261	fi
262
263	#
264	# Ensure post-checkpoint state is not present
265	#
266	log_mustnot zfs list $NESTEDFS2
267	log_mustnot [ "$(<$NESTEDFS0FILE)" = "$NEWFILECONTENTS0" ]
268}
269
270function test_change_state_after_checkpoint
271{
272	log_must zfs destroy $FS1
273	log_must zfs create -o compression=lz4 -o recordsize=8k $FS2
274
275	echo $NEWFILECONTENTS0 > $FS0FILE
276	echo $NEWFILECONTENTS2 > $FS2FILE
277}
278
279function nested_change_state_after_checkpoint
280{
281	log_must zfs destroy $NESTEDFS1
282	log_must zfs create -o compression=lz4 -o recordsize=8k $NESTEDFS2
283
284	echo $NEWFILECONTENTS0 > $NESTEDFS0FILE
285	echo $NEWFILECONTENTS2 > $NESTEDFS2FILE
286}
287
288function test_verify_post_checkpoint_state
289{
290	log_must zfs list $FS0
291	log_must zfs list $FS2
292	log_must [ "$(<$FS0FILE)" = "$NEWFILECONTENTS0" ]
293	log_must [ "$(<$FS2FILE)" = "$NEWFILECONTENTS2" ]
294
295	log_must zdb $TESTPOOL
296
297	#
298	# Ensure pre-checkpointed state that was removed post-checkpoint
299	# is not present
300	#
301	log_mustnot zfs list $FS1
302	log_mustnot [ "$(<$FS0FILE)" = "$FILECONTENTS0" ]
303}
304
305function fragment_before_checkpoint
306{
307	populate_nested_pool
308	log_must mkfile -n $BIGFILESIZE $NESTEDFS0FILE
309	log_must mkfile -n $BIGFILESIZE $NESTEDFS1FILE
310	log_must randwritecomp $NESTEDFS0FILE $RANDOMWRITES
311	log_must randwritecomp $NESTEDFS1FILE $RANDOMWRITES
312
313	#
314	# Display fragmentation on test log
315	#
316	log_must zpool list -v
317}
318
319function fragment_after_checkpoint_and_verify
320{
321	log_must zfs destroy $NESTEDFS1
322	log_must zfs create -o compression=lz4 -o recordsize=8k $NESTEDFS2
323	log_must mkfile -n $BIGFILESIZE $NESTEDFS2FILE
324	log_must randwritecomp $NESTEDFS0FILE $RANDOMWRITES
325	log_must randwritecomp $NESTEDFS2FILE $RANDOMWRITES
326
327	#
328	# Display fragmentation on test log
329	#
330	log_must zpool list -v
331
332	#
333	# Typically we would just run zdb at this point and things
334	# would be fine. Unfortunately, if there is still any
335	# background I/O in the pool the zdb command can fail with
336	# checksum errors temporarily.
337	#
338	# Export the pool when running zdb so the pool is idle and
339	# the verification results are consistent.
340	#
341	log_must zpool export $NESTEDPOOL
342	log_must zdb -e -p $FILEDISKDIR $NESTEDPOOL
343	log_must zdb -e -p $FILEDISKDIR -kc $NESTEDPOOL
344	log_must zpool import -d $FILEDISKDIR $NESTEDPOOL
345}
346
347function wait_discard_finish
348{
349	typeset pool="$1"
350
351	typeset status
352	status=$(zpool status $pool | grep "checkpoint:")
353	while [ "" != "$status" ]; do
354		sleep 5
355		status=$(zpool status $pool | grep "checkpoint:")
356	done
357}
358
359function test_wait_discard_finish
360{
361	wait_discard_finish $TESTPOOL
362}
363
364function nested_wait_discard_finish
365{
366	wait_discard_finish $NESTEDPOOL
367}
368
369#
370# Creating the setup for the second group of tests mentioned in
371# block comment of this file can take some time as we are doing
372# random writes to raise capacity and fragmentation before taking
373# the checkpoint. Thus we create this setup once and save the
374# disks of the nested pool in a temporary directory where we can
375# reuse it for each test that requires that setup.
376#
377SAVEDPOOLDIR="$TEST_BASE_DIR/ckpoint_saved_pool"
378
379function test_group_premake_nested_pools
380{
381	setup_nested_pools
382
383	#
384	# Populate and fragment the pool.
385	#
386	fragment_before_checkpoint
387
388	#
389	# Export and save the pool for other tests.
390	#
391	log_must zpool export $NESTEDPOOL
392	log_must mkdir $SAVEDPOOLDIR
393	log_must cp $FILEDISKS $SAVEDPOOLDIR
394
395	#
396	# Reimport pool to be destroyed by
397	# cleanup_nested_pools function
398	#
399	log_must zpool import -d $FILEDISKDIR $NESTEDPOOL
400}
401
402function test_group_destroy_saved_pool
403{
404	log_must rm -rf $SAVEDPOOLDIR
405}
406
407#
408# Recreate nested pool setup from saved pool.
409#
410function setup_nested_pool_state
411{
412	setup_test_pool
413
414	log_must zfs create $DISKFS
415	log_must cp $SAVEDPOOLDIR/* $FILEDISKDIR
416
417	log_must zpool import -d $FILEDISKDIR $NESTEDPOOL
418}
419