1#!/bin/ksh -p
2# CDDL HEADER START
3#
4# The contents of this file are subject to the terms of the
5# Common Development and Distribution License (the "License").
6# You may not use this file except in compliance with the License.
7#
8# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9# or http://www.opensolaris.org/os/licensing.
10# See the License for the specific language governing permissions
11# and limitations under the License.
12#
13# When distributing Covered Code, include this CDDL HEADER in each
14# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15# If applicable, add the following below this CDDL HEADER, with the
16# fields enclosed by brackets "[]" replaced with your own identifying
17# information: Portions Copyright [yyyy] [name of copyright owner]
18#
19# CDDL HEADER END
20#
21
22#
23# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
24# Copyright (c) 2021 by Delphix. All rights reserved.
25#
26
27# DESCRIPTION:
28#	Verify that new errors after a pool scrub are considered a duplicate
29#
30# STRATEGY:
31#	1. Create a raidz pool with a file
32#	2. Inject garbage into one of the vdevs
33#	3. Scrub the pool
34#	4. Observe the checksum error counts
35#	5. Repeat inject and pool scrub
36#	6. Verify that second pass also produces similar errors (i.e. not
37#	   treated as a duplicate)
38#
39
40. $STF_SUITE/include/libtest.shlib
41
42verify_runnable "both"
43
44MOUNTDIR=$TEST_BASE_DIR/mount
45FILEPATH=$MOUNTDIR/target
46VDEV1=$TEST_BASE_DIR/vfile1
47VDEV2=$TEST_BASE_DIR/vfile2
48VDEV3=$TEST_BASE_DIR/vfile3
49SUPPLY=$TEST_BASE_DIR/supply
50POOL=test_pool
51FILESIZE="15M"
52DAMAGEBLKS=10
53
54OLD_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX)
55RETAIN_MAX=$(get_tunable ZEVENT_RETAIN_MAX)
56OLD_CHECKSUMS=$(get_tunable CHECKSUM_EVENTS_PER_SECOND)
57
58EREPORTS="$STF_SUITE/tests/functional/cli_root/zpool_events/ereports"
59
60function cleanup
61{
62	log_must set_tunable64 CHECKSUM_EVENTS_PER_SECOND $OLD_CHECKSUMS
63	log_must set_tunable64 ZEVENT_LEN_MAX $OLD_LEN_MAX
64
65	zpool events -c
66	if poolexists $POOL ; then
67		zpool export $POOL
68	fi
69	log_must rm -f $VDEV1 $VDEV2 $VDEV3
70}
71
72function damage_and_repair
73{
74	log_must zpool clear $POOL $VDEV1
75	log_must zpool events -c
76
77	log_note injecting damage to $VDEV1
78	log_must dd conv=notrunc if=$SUPPLY of=$VDEV1 bs=1M seek=4 count=$DAMAGEBLKS
79	log_must zpool scrub $POOL
80	log_must zpool wait -t scrub $POOL
81	log_note "pass $1 observed $($EREPORTS | grep -c checksum) checksum ereports"
82
83	repaired=$(zpool status $POOL | grep "scan: scrub repaired" | awk '{print $4}')
84	if [ "$repaired" == "0B" ]; then
85		log_fail "INVALID TEST -- expected scrub to repair some blocks"
86	else
87		log_note "$repaired repaired during scrub"
88	fi
89}
90
91function checksum_error_count
92{
93	zpool status -p $POOL | grep $VDEV1 | awk '{print $5}'
94}
95
96assertion="Damage to recently repaired blocks should be reported/counted"
97log_assert "$assertion"
98log_note "zevent retain max setting: $RETAIN_MAX"
99
100log_onexit cleanup
101
102# Set our threshold high to avoid dropping events.
103set_tunable64 ZEVENT_LEN_MAX 20000
104set_tunable64 CHECKSUM_EVENTS_PER_SECOND 20000
105
106# Initialize resources for the test
107log_must truncate -s $MINVDEVSIZE $VDEV1 $VDEV2 $VDEV3
108log_must dd if=/dev/urandom of=$SUPPLY bs=1M count=$DAMAGEBLKS
109log_must mkdir -p $MOUNTDIR
110log_must zpool create -f -m $MOUNTDIR -o failmode=continue $POOL raidz $VDEV1 $VDEV2 $VDEV3
111log_must zfs set compression=off recordsize=16k $POOL
112# create a file full of zeros
113log_must mkfile -v $FILESIZE $FILEPATH
114sync_pool $POOL
115
116# run once and observe the checksum errors
117damage_and_repair 1
118errcnt=$(checksum_error_count)
119log_note "$errcnt errors observed"
120# set expectaton of at least 75% of what we observed in first pass
121(( expected = (errcnt * 75) / 100 ))
122
123# run again and we should observe new checksum errors
124damage_and_repair 2
125errcnt=$(checksum_error_count)
126
127log_must zpool destroy $POOL
128
129if (( errcnt < expected )); then
130	log_fail "FAILED -- expecting at least $expected checksum errors but only observed $errcnt"
131else
132	log_note observed $errcnt new checksum errors after a scrub
133	log_pass "$assertion"
134fi
135
136