1#!/bin/ksh -p
2#
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10# or http://www.opensolaris.org/os/licensing.
11# See the License for the specific language governing permissions
12# and limitations under the License.
13#
14# When distributing Covered Code, include this CDDL HEADER in each
15# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16# If applicable, add the following below this CDDL HEADER, with the
17# fields enclosed by brackets "[]" replaced with your own identifying
18# information: Portions Copyright [yyyy] [name of copyright owner]
19#
20# CDDL HEADER END
21#
22#
23# Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved.
24# Copyright (c) 2019 by Delphix. All rights reserved.
25#
26
27. $STF_SUITE/include/libtest.shlib
28. $STF_SUITE/tests/functional/fault/fault.cfg
29
30#
31# DESCRIPTION:
32# Testing Fault Management Agent ZED Logic - Automated Auto-Online Test.
33#
34# STRATEGY:
35# 1. Create a pool
36# 2. Export a pool
37# 3. Offline disk
38# 4. Import pool with missing disk
39# 5. Online disk
40# 6. ZED polls for an event change for online disk to be automatically
41#    added back to the pool.
42#
43# Creates a raidz1 zpool using persistent disk path names
44# (ie not /dev/sdc).
45#
46# If loop devices are used, then a scsi_debug device is added to the pool.
47# otherwise just an sd device is used as the auto-online device.
48# Auto-online matches by devid.
49#
50verify_runnable "both"
51
52if ! is_physical_device $DISKS; then
53	log_unsupported "Unsupported disks for this test."
54fi
55
56function cleanup
57{
58	typeset disk
59
60	# Replace any disk that may have been removed at failure time.
61	for disk in $DISK1 $DISK2 $DISK3; do
62		# Skip loop devices and devices that currently exist.
63		is_loop_device $disk && continue
64		is_real_device $disk && continue
65		insert_disk $disk $(get_scsi_host $disk)
66	done
67	destroy_pool $TESTPOOL
68	unload_scsi_debug
69}
70
71log_assert "Testing automated auto-online FMA test"
72
73log_onexit cleanup
74
75# If using the default loop devices, need a scsi_debug device for auto-online
76if is_loop_device $DISK1; then
77	load_scsi_debug $SDSIZE $SDHOSTS $SDTGTS $SDLUNS '512b'
78	SDDEVICE=$(get_debug_device)
79	SDDEVICE_ID=$(get_persistent_disk_name $SDDEVICE)
80	autoonline_disks="$SDDEVICE"
81else
82	autoonline_disks="$DISK1 $DISK2 $DISK3"
83fi
84
85# Clear disk labels
86for i in {0..2}
87do
88	zpool labelclear -f /dev/disk/by-id/"${devs_id[i]}"
89done
90
91if is_loop_device $DISK1; then
92	# create a pool with one scsi_debug device and 3 loop devices
93	log_must zpool create -f $TESTPOOL raidz1 $SDDEVICE_ID $DISK1 \
94	    $DISK2 $DISK3
95elif ( is_real_device $DISK1 || is_mpath_device $DISK1 ); then
96	# else use the persistent names for sd devices
97	log_must zpool create -f $TESTPOOL raidz1 ${devs_id[0]} \
98	    ${devs_id[1]} ${devs_id[2]}
99else
100	log_fail "Disks are not supported for this test"
101fi
102
103# Add some data to the pool
104log_must mkfile $FSIZE /$TESTPOOL/data
105
106for offline_disk in $autoonline_disks
107do
108	log_must zpool export -F $TESTPOOL
109
110	host=$(get_scsi_host $offline_disk)
111
112	# Offline disk
113	remove_disk $offline_disk
114
115	# Reimport pool with drive missing
116	log_must zpool import $TESTPOOL
117	check_state $TESTPOOL "" "degraded"
118	if (($? != 0)); then
119		log_fail "$TESTPOOL is not degraded"
120	fi
121
122	# Clear zpool events
123	log_must zpool events -c
124
125	# Online disk
126	insert_disk $offline_disk $host
127
128	log_note "Delay for ZED auto-online"
129	typeset -i timeout=0
130	while true; do
131		if ((timeout == $MAXTIMEOUT)); then
132			log_fail "Timeout occurred"
133		fi
134		((timeout++))
135
136		sleep 1
137		zpool events $TESTPOOL \
138		    | egrep sysevent.fs.zfs.resilver_finish > /dev/null
139		if (($? == 0)); then
140			log_note "Auto-online of $offline_disk is complete"
141			sleep 1
142			break
143		fi
144	done
145
146	# Validate auto-online was successful
147	check_state $TESTPOOL "" "online"
148	if (($? != 0)); then
149		log_fail "$TESTPOOL is not back online"
150	fi
151	sleep 2
152done
153log_must zpool destroy $TESTPOOL
154
155log_pass "Auto-online test successful"
156