xref: /minix/minix/tests/testrelpol.sh (revision 9f988b79)
1#!/bin/sh
2
3# Idea:
4# Iterate over all the /proc/service entries, and
5#	for each restatability policy call the policy test function if it is
6#	supported. No accounting of failed / successful test is done, as a
7#	failed test can currently provoque cascading effects, so instead we
8#	fail the test as a whole on the first failure found. Live update tests
9#	are currently policy-agnostic.
10#
11# If arguments are given, use this instead of all entries found in
12# /proc/service. Full path have to be provided on the command line, like
13#   /usr/tests/minix/testrelpol /proc/service/vfs
14# to test vfs recovery only.
15#
16# Supported policies have to be in the POLICIES variable, and define a test
17# function.
18#
19# Known limitations:
20#	 - Currently not all recovery policies are tested
21#	 - Running this test under X11 hangs the X server
22#	 - Live update tests do not test rollback situations
23#
24# To add a new policy, you have to do the following:
25#	1. Add the policy into the active policies array by:
26#	POLICIES="${POLICIES} <policyname>"
27#
28#	2. define the following shell function:
29#	pol_<policyname>() {}
30#	 - it will recieve the following parameters:
31#	   + service filename as $1	: the full path to the proc entry
32#	   + label as $2		: the service label
33#	 - which prints 'ok' on success, 'not ok' on failure.
34
35# Currently known policies:
36# 			/*	user	| endpoint	*/
37#	POL_RESET,	/* visible	|  change	*/
38#	POL_RESTART	/* transparent	| preserved	*/
39
40#######################################################################
41# Utility functions & global state initializations
42#######################################################################
43POLICIES=""
44MAX_RETRY=7 # so that a single test takes at most 10 seconds
45MAX_MULTI_LU_RETRY=3 # how many times should we retry after bad luck?
46
47# get_value(key, filename, noerror)
48get_value() {
49	local value
50	local result
51
52	value=$(grep $1 $2 2>/dev/null)
53	result=$?
54
55	if test $result -ne 2
56        then
57		echo $value | cut -d: -f2
58	else
59		test -z "$3" && echo "Error: service $2 down" >&2
60	fi
61}
62
63# wait_for_service(filename)
64wait_for_service() {
65	local retry
66	local value
67	retry=0
68
69	# Arbitrary timeout, found by counting the number of mice crossing
70	# the hallway.
71	sleep 2
72	while test ${retry} -lt ${MAX_RETRY}
73	do
74		sleep 1
75		retry=$((${retry} + 1))
76		# The service might momentarily disappear from the list.
77		value=$(get_value restarts $1 noerror)
78		test -n "$value" && test $value -ne $2 && return 0
79	done
80	return 1
81}
82
83#######################################################################
84# Service management routines
85#######################################################################
86prepare_service() {
87	local label service
88
89	service=$1
90	label=$2
91
92	flags=$(get_value flags ${service})
93	echo $flags | grep -q 'r' || return 0
94	echo $flags | grep -q 'R' && return 0
95
96	service clone $label
97	return 1
98}
99
100cleanup_service() {
101	local label
102
103	label=$1
104
105	service unclone $label
106}
107
108#######################################################################
109# POLICY: restart
110#######################################################################
111POLICIES="${POLICIES} restart"
112pol_restart() {
113	local label service
114	local endpoint_pre endpoint_post
115	local restarts_pre restarts_post
116
117	service=$1
118	label=$2
119
120	restarts_pre=$(get_value restarts ${service})
121	endpoint_pre=$(get_value endpoint ${service})
122
123	service fi ${label}
124	if ! wait_for_service ${service} ${restarts_pre}
125	then
126		echo not ok
127		return
128	fi
129
130	restarts_post=$(get_value restarts ${service})
131	endpoint_post=$(get_value endpoint ${service})
132
133	if [ ${restarts_post} -gt ${restarts_pre} \
134		-a ${endpoint_post} -eq ${endpoint_pre} ]
135	then
136		echo ok
137	else
138		echo not ok
139	fi
140}
141
142#######################################################################
143# POLICY: reset
144#######################################################################
145POLICIES="${POLICIES} reset"
146pol_reset() {
147	local label service
148	local endpoint_pre endpoint_post
149	local restarts_pre restarts_post
150
151	service=$1
152	label=$2
153
154	restarts_pre=$(get_value restarts ${service})
155	endpoint_pre=$(get_value endpoint ${service})
156
157	service fi ${label}
158	if ! wait_for_service ${service} ${restarts_pre}
159	then
160		echo not ok
161		return
162	fi
163
164	restarts_post=$(get_value restarts ${service})
165	endpoint_post=$(get_value endpoint ${service})
166
167	# This policy doesn't guarantee the endpoint to be kept, but there
168	# is a slight chance that it will actualy stay the same, and fail
169	# the test.
170	if [ ${restarts_post} -gt ${restarts_pre} \
171		-a ${endpoint_post} -ne ${endpoint_pre} ]
172	then
173		echo ok
174	else
175		echo not ok
176	fi
177}
178
179#######################################################################
180# Live update tests
181#######################################################################
182lu_test_one() {
183	local label=$1
184	local prog=$2
185	local result=$3
186	lu_opts=${lu_opts:-}
187	lu_maxtime=${lu_maxtime:-3HZ}
188	lu_state=${lu_state:-1}
189
190	service ${lu_opts} update ${prog} -label ${label} -maxtime ${lu_maxtime} -state ${lu_state}
191	if [ $? -ne $result ]
192	then
193		return 1
194	else
195		return 0
196	fi
197}
198
199lu_test() {
200	local label service
201	local endpoint_pre endpoint_post
202	local restarts_pre restarts_post
203
204	service=$1
205	label=$2
206
207	restarts_pre=$(get_value restarts ${service})
208	endpoint_pre=$(get_value endpoint ${service})
209
210	lu_test_one ${label} self 0 || return
211
212	# Test live update "prepare only"
213	if ! echo "pm rs vfs vm" | grep -q ${label}
214	then
215		lu_opts="-o" lu_test_one ${label} self 0 || return
216	fi
217
218	# Test live update initialization crash
219	lu_opts="-x" lu_test_one ${label} self 200 || return
220
221	# Test live update initialization failure
222	if ! echo "rs" | grep -q ${label}
223	then
224		lu_opts="-y" lu_test_one ${label} self 78 || return
225	fi
226
227	# Test live update initialization timeout
228	if ! echo "rs" | grep -q ${label}
229	then
230		lu_maxtime="1HZ" lu_opts="-z" lu_test_one ${label} self 4 || return
231	fi
232
233	# Test live update from SEF_LU_STATE_EVAL state
234	lu_maxtime="1HZ" lu_state="5" lu_test_one ${label} self 4 || return
235
236	restarts_post=$(get_value restarts ${service})
237	endpoint_post=$(get_value endpoint ${service})
238
239	# Make sure endpoint and restarts are preserved
240	if [ ${restarts_post} -eq ${restarts_pre} \
241		-a ${endpoint_post} -eq ${endpoint_pre} ]
242	then
243		echo ok
244	else
245		echo not ok
246	fi
247}
248
249multi_lu_test_one() {
250	local expected=$1
251	local once_index=$2
252	shift 2
253	local labels="$*"
254	local ret=1
255	local retry=0
256	local index result
257
258	lu_opts=${lu_opts:-}
259	lu_maxtime=${lu_maxtime:-3HZ}
260	lu_state=${lu_state:-1}
261	lu_opts_once=${lu_opts_once:-$lu_opts}
262	lu_maxtime_once=${lu_maxtime_once:-$lu_maxtime}
263	lu_state_once=${lu_state_once:-$lu_state}
264
265	while [ $ret -eq 1 -a $retry -lt ${MAX_MULTI_LU_RETRY} ]
266	do
267		index=0
268		for label in ${labels}
269		do
270			index=`expr $index + 1`
271
272			if [ $index -eq $once_index ]
273			then
274				service ${lu_opts_once} -q update self \
275					-label ${label} \
276					-maxtime ${lu_maxtime_once} \
277					-state ${lu_state_once} || ret=2
278			else
279				service ${lu_opts} -q update self \
280					-label ${label} \
281					-maxtime ${lu_maxtime} \
282					-state ${lu_state} || ret=2
283			fi
284		done
285		service sysctl upd_run
286		result=$?
287
288		# We may experience transient failures as a result of services
289		# trying to talk to each other while being prepared for the
290		# live update.  In that case we get result code 4.  If that is
291		# not the result code we expected, try again for a limited
292		# number of times.
293		if [ $result -eq $expected ]
294		then
295			ret=0
296		elif [ $result -ne 4 ]
297		then
298			break
299		fi
300		retry=`expr $retry + 1`
301	done
302
303	return $ret
304}
305
306multi_lu_test() {
307	local y_result z_result
308	local have_rs=0
309	local labels="$*"
310
311	# Some of the results depend on whether RS is part of the live update.
312	for label in ${labels}
313	do
314		if [ "x$label" = "xrs" ]
315		then
316			have_rs=1
317		fi
318	done
319
320	if [ $have_rs -eq 1 ]
321	then
322		y_result=200
323		z_result=200
324	else
325		y_result=78
326		z_result=4
327	fi
328
329	multi_lu_test_one 0 0 ${labels} || return 1
330	lu_opts_once="-x" multi_lu_test_one 200 2 ${labels} || return 1
331	lu_opts_once="-y" multi_lu_test_one ${y_result} 3 ${labels} || return 1
332	lu_maxtime_once="1HZ" lu_opts_once="-z" multi_lu_test_one ${z_result} 2 ${labels} || return 1
333	lu_maxtime_once="1HZ" lu_state_once="5" multi_lu_test_one 4 3 ${labels} || return 1
334
335	return 0
336}
337
338multi_lu_test_wrapper() {
339	echo "# testing $@ :: multicomponent live update+rollback"
340	if ! multi_lu_test "$@"
341	then
342		echo "not ok # failed multicomponent live update+rollback"
343		return 1
344	fi
345	return 0
346}
347
348#######################################################################
349# main()
350#######################################################################
351main() {
352	local labels service_policies X11
353
354	# If there is a running X server, skip the input driver
355	if ps -ef | grep -v grep | grep -q /usr/X11R7/bin/X
356	then
357		echo "# This test can't be run while a Xserver is running"
358		echo "not ok # A Xserver is running"
359		exit 1
360	fi
361
362	if [ $# -eq 0 ]
363	then
364		services=$(echo /proc/service/*)
365	else
366		services="$@"
367	fi
368
369	for service in ${services}
370	do
371		label=$(basename ${service})
372		service_policies=$(grep policies ${service}|cut -d: -f2)
373		for pol in ${service_policies}
374		do
375			# Check if the supported policy is under test
376			if echo "${POLICIES}" | grep -q ${pol}
377			then
378				echo "# testing ${label} :: ${pol}"
379				cleanup=0
380				prepare_service ${service} ${label} || cleanup=1
381				result=$(pol_${pol} ${service} ${label})
382				if [ "x${result}" != "xok" ]
383				then
384					echo "not ok # failed ${label}, ${pol}"
385					exit 1
386				fi
387				if [ $cleanup -eq 1 ]
388				then
389					cleanup_service ${label}
390				fi
391			fi
392		done
393	done
394	if [ $# -gt 0 ]
395	then
396		echo "ok # partial test for $@ successful"
397		exit 0
398	fi
399
400	multi_lu_labels=""
401	for service in ${services}
402	do
403		label=$(basename ${service})
404		service_policies=$(grep policies ${service}|cut -d: -f2)
405		if echo "${service_policies}" | grep -q "[a-zA-Z]"
406		then
407			echo "# testing ${label} :: live update+rollback"
408			result=$(lu_test ${service} ${label})
409			if [ "x${result}" != "xok" ]
410			then
411				echo "not ok # failed ${label}, live update+rollback"
412				exit 1
413			fi
414			if [ "x${label}" = "xrs" -o "x${label}" = "xvm" ]
415			then
416				continue
417			fi
418			service_flags=$(get_value flags ${service})
419			if echo $service_flags | grep -q 's'
420			then
421				multi_lu_labels="${multi_lu_labels} ${label}"
422			fi
423		fi
424	done
425
426	multi_lu_test_wrapper ${multi_lu_labels} || exit 1
427	multi_lu_test_wrapper ${multi_lu_labels} vm || exit 1
428	multi_lu_test_wrapper ${multi_lu_labels} rs || exit 1
429	multi_lu_test_wrapper ${multi_lu_labels} vm rs || exit 1
430
431	echo ok
432	exit 0
433}
434
435main "$@"
436