1#!/bin/ksh -p
2# CDDL HEADER START
3#
4# The contents of this file are subject to the terms of the
5# Common Development and Distribution License (the "License").
6# You may not use this file except in compliance with the License.
7#
8# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9# or https://opensource.org/licenses/CDDL-1.0.
10# See the License for the specific language governing permissions
11# and limitations under the License.
12#
13# When distributing Covered Code, include this CDDL HEADER in each
14# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15# If applicable, add the following below this CDDL HEADER, with the
16# fields enclosed by brackets "[]" replaced with your own identifying
17# information: Portions Copyright [yyyy] [name of copyright owner]
18#
19# CDDL HEADER END
20#
21
22#
23# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
24#
25
26# DESCRIPTION:
27#	Verify the number of IO and checksum events match the error counters
28#	in zpool status.
29#
30# STRATEGY:
31#	1. Create a mirror, raidz, or draid pool
32#	2. Inject read/write IO errors or checksum errors
33#	3. Verify the number of errors in zpool status match the corresponding
34#	   number of error events.
35#	4. Repeat for all combinations of mirror/raidz/draid and io/checksum
36#	   errors.
37#
38
39. $STF_SUITE/include/libtest.shlib
40
41verify_runnable "both"
42
43MOUNTDIR=$TEST_BASE_DIR/mount
44VDEV1=$TEST_BASE_DIR/file1
45VDEV2=$TEST_BASE_DIR/file2
46VDEV3=$TEST_BASE_DIR/file3
47POOL=error_pool
48FILESIZE=$((20 * 1024 * 1024))
49OLD_CHECKSUMS=$(get_tunable CHECKSUM_EVENTS_PER_SECOND)
50OLD_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX)
51
52function cleanup
53{
54	log_must set_tunable64 CHECKSUM_EVENTS_PER_SECOND $OLD_CHECKSUMS
55	log_must set_tunable64 ZEVENT_LEN_MAX $OLD_LEN_MAX
56
57	log_must zinject -c all
58	log_must zpool events -c
59	if poolexists $POOL ; then
60		log_must destroy_pool $POOL
61	fi
62	log_must rm -fd $VDEV1 $VDEV2 $VDEV3 $MOUNTDIR
63}
64
65log_assert "Check that the number of zpool errors match the number of events"
66
67log_onexit cleanup
68
69# Set our thresholds high so we never ratelimit or drop events.
70set_tunable64 CHECKSUM_EVENTS_PER_SECOND 20000
71set_tunable64 ZEVENT_LEN_MAX 20000
72
73log_must truncate -s $MINVDEVSIZE $VDEV1 $VDEV2 $VDEV3
74log_must mkdir -p $MOUNTDIR
75
76# Run error test on a specific type of pool
77#
78# $1: pool - mirror, raidz, draid
79# $2: test type - corrupt (checksum error), io
80# $3: read, write
81function do_test
82{
83	POOLTYPE=$1
84	ERR=$2
85	RW=$3
86
87	log_note "Testing $ERR $RW on $POOLTYPE"
88	log_must zpool create -f -m $MOUNTDIR -o failmode=continue $POOL $POOLTYPE $VDEV1 $VDEV2 $VDEV3
89	log_must zpool events -c
90	log_must zfs set compression=off $POOL
91
92	if [ "$RW" == "read" ] ; then
93		log_must mkfile $FILESIZE $MOUNTDIR/file
94	fi
95
96	log_must zinject -d $VDEV1 -e $ERR -T $RW -f 100 $POOL
97
98	if [ "$RW" == "write" ] ; then
99		log_must mkfile $FILESIZE $MOUNTDIR/file
100		sync_pool $POOL
101	else
102		log_must zpool scrub $POOL
103		wait_scrubbed $POOL
104	fi
105
106	log_must zinject -c all
107
108	# Wait for the pool to settle down and finish resilvering (if
109	# necessary).  We want the errors to stop incrementing before we
110	# check the error and event counts.
111	while is_pool_resilvering $POOL ; do
112		sleep 1
113	done
114
115	out="$(zpool status -p | grep $VDEV1)"
116
117	if [ "$ERR" == "corrupt" ] ; then
118		events=$(zpool events | grep -c checksum)
119		val=$(echo "$out" | awk '{print $5}')
120		str="checksum"
121	elif [ "$ERR" == "io" ] ; then
122		allevents=$(zpool events | grep io)
123		events=$(echo "$allevents" | wc -l)
124		if [ "$RW" == "read" ] ; then
125			str="read IO"
126			val=$(echo "$out" | awk '{print $3}')
127		else
128			str="write IO"
129			val=$(echo "$out" | awk '{print $4}')
130		fi
131	fi
132
133	if [ -z "$val" -o $val -eq 0 -o -z "$events" -o $events -eq 0 ] ; then
134		log_fail "Didn't see any errors or events ($val/$events)"
135	fi
136
137	if [ $val -ne $events ] ; then
138		log_fail "$val $POOLTYPE $str errors != $events events"
139	else
140		log_note "$val $POOLTYPE $str errors == $events events"
141	fi
142
143	log_must zpool destroy $POOL
144}
145
146# Test all types of errors on mirror, raidz, and draid pools
147for pooltype in mirror raidz draid; do
148	do_test $pooltype corrupt read
149	do_test $pooltype io read
150	do_test $pooltype io write
151done
152
153log_pass "The number of errors matched the number of events"
154