1#!/bin/ksh -p
2#
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10# or https://opensource.org/licenses/CDDL-1.0.
11# See the License for the specific language governing permissions
12# and limitations under the License.
13#
14# When distributing Covered Code, include this CDDL HEADER in each
15# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16# If applicable, add the following below this CDDL HEADER, with the
17# fields enclosed by brackets "[]" replaced with your own identifying
18# information: Portions Copyright [yyyy] [name of copyright owner]
19#
20# CDDL HEADER END
21#
22
23#
24# Copyright (c) 2023, Klara Inc.
25#
26
27# DESCRIPTION:
28#	Verify that delay events from multiple vdevs doesnt degrade
29#
30# STRATEGY:
31#	1. Create a pool with a 3 disk raidz vdev
32#	2. Inject slow io errors
33#	3. Verify that ZED detects slow I/Os but doesn't degrade any vdevs
34#
35
36. $STF_SUITE/include/libtest.shlib
37
38TESTDIR="$TEST_BASE_DIR/zed_slow_io"
39VDEV1="$TEST_BASE_DIR/vdevfile1.$$"
40VDEV2="$TEST_BASE_DIR/vdevfile2.$$"
41VDEV3="$TEST_BASE_DIR/vdevfile3.$$"
42VDEV4="$TEST_BASE_DIR/vdevfile4.$$"
43VDEVS="$VDEV1 $VDEV2 $VDEV3 $VDEV4"
44TESTPOOL="slow_io_pool"
45FILEPATH="$TESTDIR/slow_io.testfile"
46
47OLD_SLOW_IO=$(get_tunable ZIO_SLOW_IO_MS)
48OLD_SLOW_IO_EVENTS=$(get_tunable SLOW_IO_EVENTS_PER_SECOND)
49
50verify_runnable "both"
51
52function cleanup
53{
54	log_must zinject -c all
55
56	# if pool still exists then something failed so log additional info
57	if poolexists $TESTPOOL ; then
58		log_note "$(zpool status -s $TESTPOOL)"
59		echo "=================== zed log search ==================="
60		grep "Diagnosis Engine" $ZEDLET_DIR/zed.log
61		destroy_pool $TESTPOOL
62	fi
63	log_must zed_stop
64
65	log_must rm -f $VDEVS
66	log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
67	log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
68}
69
70function start_slow_io
71{
72	for vdev in $VDEVS
73	do
74		log_must zpool set slow_io_n=4 $TESTPOOL $vdev
75		log_must zpool set slow_io_t=60 $TESTPOOL $vdev
76	done
77	zpool sync
78
79	log_must set_tunable64 ZIO_SLOW_IO_MS 10
80	log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND 1000
81
82	for vdev in $VDEVS
83	do
84		log_must zinject -d $vdev -D10:1 $TESTPOOL
85	done
86	zpool sync
87}
88
89function stop_slow_io
90{
91	log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
92	log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
93
94	log_must zinject -c all
95}
96
97function multiple_slow_vdevs_test
98{
99	log_must truncate -s 1G $VDEVS
100	default_raidz_setup_noexit $VDEVS
101
102	log_must zpool events -c
103	log_must zfs set compression=off $TESTPOOL
104	log_must zfs set primarycache=none $TESTPOOL
105	log_must zfs set recordsize=4K $TESTPOOL
106
107	log_must dd if=/dev/urandom of=$FILEPATH bs=1M count=20
108	zpool sync
109
110	#
111	# Read the file with slow io injected on the disks
112	# This will cause multiple errors on each disk to trip ZED SERD
113	#
114	#   pool: slow_io_pool
115	#  state: ONLINE
116	# config:
117	#
118	#         NAME                           STATE  READ WRITE CKSUM  SLOW
119	#         slow_io_pool                   ONLINE    0     0     0     -
120	#           raidz1-0                     ONLINE    0     0     0     -
121	#             /var/tmp/vdevfile1.499278  ONLINE    0     0     0   113
122	#             /var/tmp/vdevfile2.499278  ONLINE    0     0     0   109
123	#             /var/tmp/vdevfile3.499278  ONLINE    0     0     0    96
124	#             /var/tmp/vdevfile4.499278  ONLINE    0     0     0   109
125	#
126	start_slow_io
127	dd if=$FILEPATH of=/dev/null bs=1M count=20 2>/dev/null
128	stop_slow_io
129
130	# count events available for processing
131	typeset -i i=0
132	typeset -i events=0
133	while [[ $i -lt 60 ]]; do
134		events=$(zpool events | grep "ereport\.fs\.zfs.delay" | wc -l)
135		[[ $events -ge "50" ]] && break
136		i=$((i+1))
137		sleep 1
138	done
139	log_note "$events delay events found"
140	if [[ $events -lt "50" ]]; then
141		log_note "bailing: not enough events to complete the test"
142		destroy_pool $TESTPOOL
143		return
144	fi
145
146	#
147	# give slow ZED a chance to process the delay events
148	#
149	typeset -i i=0
150	typeset -i skips=0
151	while [[ $i -lt 75 ]]; do
152		skips=$(grep "retiring case" \
153			$ZEDLET_DIR/zed.log | wc -l)
154		[[ $skips -gt "0" ]] && break
155		i=$((i+1))
156		sleep 1
157	done
158
159	log_note $skips degrade skips in ZED log after $i seconds
160	[ $skips -gt "0" ] || log_fail "expecting to see skips"
161
162	degrades=$(grep "zpool_vdev_degrade" $ZEDLET_DIR/zed.log | wc -l)
163	log_note $degrades vdev degrades in ZED log
164	[ $degrades -eq "0" ] || \
165		log_fail "expecting no degrade events, found $degrades"
166
167	destroy_pool $TESTPOOL
168}
169
170log_assert "Test ZED slow io across multiple vdevs"
171log_onexit cleanup
172
173log_must zed_events_drain
174log_must zed_start
175multiple_slow_vdevs_test
176
177log_pass "Test ZED slow io across multiple vdevs"
178