1#!/bin/sh 2 3# Idea: 4# Iterate over all the /proc/service entries, and 5# for each restatability policy call the policy test function if it is 6# supported. No accounting of failed / successful test is done, as a 7# failed test can currently provoque cascading effects, so instead we 8# fail the test as a whole on the first failure found. Live update tests 9# are currently policy-agnostic. 10# 11# If arguments are given, use this instead of all entries found in 12# /proc/service. Full path have to be provided on the command line, like 13# /usr/tests/minix/testrelpol /proc/service/vfs 14# to test vfs recovery only. 15# 16# Supported policies have to be in the POLICIES variable, and define a test 17# function. 18# 19# Known limitations: 20# - Currently not all recovery policies are tested 21# - Running this test under X11 hangs the X server 22# - Live update tests do not test rollback situations 23# 24# To add a new policy, you have to do the following: 25# 1. Add the policy into the active policies array by: 26# POLICIES="${POLICIES} <policyname>" 27# 28# 2. define the following shell function: 29# pol_<policyname>() {} 30# - it will recieve the following parameters: 31# + service filename as $1 : the full path to the proc entry 32# + label as $2 : the service label 33# - which prints 'ok' on success, 'not ok' on failure. 34 35# Currently known policies: 36# /* user | endpoint */ 37# POL_RESET, /* visible | change */ 38# POL_RESTART /* transparent | preserved */ 39 40####################################################################### 41# Utility functions & global state initializations 42####################################################################### 43POLICIES="" 44MAX_RETRY=7 # so that a single test takes at most 10 seconds 45MAX_MULTI_LU_RETRY=3 # how many times should we retry after bad luck? 46 47# get_value(key, filename, noerror) 48get_value() { 49 local value 50 local result 51 52 value=$(grep $1 $2 2>/dev/null) 53 result=$? 54 55 if test $result -ne 2 56 then 57 echo $value | cut -d: -f2 58 else 59 test -z "$3" && echo "Error: service $2 down" >&2 60 fi 61} 62 63# wait_for_service(filename) 64wait_for_service() { 65 local retry 66 local value 67 retry=0 68 69 # Arbitrary timeout, found by counting the number of mice crossing 70 # the hallway. 71 sleep 2 72 while test ${retry} -lt ${MAX_RETRY} 73 do 74 sleep 1 75 retry=$((${retry} + 1)) 76 # The service might momentarily disappear from the list. 77 value=$(get_value restarts $1 noerror) 78 test -n "$value" && test $value -ne $2 && return 0 79 done 80 return 1 81} 82 83####################################################################### 84# Service management routines 85####################################################################### 86prepare_service() { 87 local label service 88 89 service=$1 90 label=$2 91 92 flags=$(get_value flags ${service}) 93 echo $flags | grep -q 'r' || return 0 94 echo $flags | grep -q 'R' && return 0 95 96 service clone $label 97 return 1 98} 99 100cleanup_service() { 101 local label 102 103 label=$1 104 105 service unclone $label 106} 107 108####################################################################### 109# POLICY: restart 110####################################################################### 111POLICIES="${POLICIES} restart" 112pol_restart() { 113 local label service 114 local endpoint_pre endpoint_post 115 local restarts_pre restarts_post 116 117 service=$1 118 label=$2 119 120 restarts_pre=$(get_value restarts ${service}) 121 endpoint_pre=$(get_value endpoint ${service}) 122 123 service fi ${label} 124 if ! wait_for_service ${service} ${restarts_pre} 125 then 126 echo not ok 127 return 128 fi 129 130 restarts_post=$(get_value restarts ${service}) 131 endpoint_post=$(get_value endpoint ${service}) 132 133 if [ ${restarts_post} -gt ${restarts_pre} \ 134 -a ${endpoint_post} -eq ${endpoint_pre} ] 135 then 136 echo ok 137 else 138 echo not ok 139 fi 140} 141 142####################################################################### 143# POLICY: reset 144####################################################################### 145POLICIES="${POLICIES} reset" 146pol_reset() { 147 local label service 148 local endpoint_pre endpoint_post 149 local restarts_pre restarts_post 150 151 service=$1 152 label=$2 153 154 restarts_pre=$(get_value restarts ${service}) 155 endpoint_pre=$(get_value endpoint ${service}) 156 157 service fi ${label} 158 if ! wait_for_service ${service} ${restarts_pre} 159 then 160 echo not ok 161 return 162 fi 163 164 restarts_post=$(get_value restarts ${service}) 165 endpoint_post=$(get_value endpoint ${service}) 166 167 # This policy doesn't guarantee the endpoint to be kept, but there 168 # is a slight chance that it will actualy stay the same, and fail 169 # the test. 170 if [ ${restarts_post} -gt ${restarts_pre} \ 171 -a ${endpoint_post} -ne ${endpoint_pre} ] 172 then 173 echo ok 174 else 175 echo not ok 176 fi 177} 178 179####################################################################### 180# Live update tests 181####################################################################### 182lu_test_one() { 183 local label=$1 184 local prog=$2 185 local result=$3 186 lu_opts=${lu_opts:-} 187 lu_maxtime=${lu_maxtime:-3HZ} 188 lu_state=${lu_state:-1} 189 190 service ${lu_opts} update ${prog} -label ${label} -maxtime ${lu_maxtime} -state ${lu_state} 191 if [ $? -ne $result ] 192 then 193 return 1 194 else 195 return 0 196 fi 197} 198 199lu_test() { 200 local label service 201 local endpoint_pre endpoint_post 202 local restarts_pre restarts_post 203 204 service=$1 205 label=$2 206 207 restarts_pre=$(get_value restarts ${service}) 208 endpoint_pre=$(get_value endpoint ${service}) 209 210 lu_test_one ${label} self 0 || return 211 212 # Test live update "prepare only" 213 if ! echo "pm rs vfs vm" | grep -q ${label} 214 then 215 lu_opts="-o" lu_test_one ${label} self 0 || return 216 fi 217 218 # Test live update initialization crash 219 lu_opts="-x" lu_test_one ${label} self 200 || return 220 221 # Test live update initialization failure 222 if ! echo "rs" | grep -q ${label} 223 then 224 lu_opts="-y" lu_test_one ${label} self 78 || return 225 fi 226 227 # Test live update initialization timeout 228 if ! echo "rs" | grep -q ${label} 229 then 230 lu_maxtime="1HZ" lu_opts="-z" lu_test_one ${label} self 4 || return 231 fi 232 233 # Test live update from SEF_LU_STATE_EVAL state 234 lu_maxtime="1HZ" lu_state="5" lu_test_one ${label} self 4 || return 235 236 restarts_post=$(get_value restarts ${service}) 237 endpoint_post=$(get_value endpoint ${service}) 238 239 # Make sure endpoint and restarts are preserved 240 if [ ${restarts_post} -eq ${restarts_pre} \ 241 -a ${endpoint_post} -eq ${endpoint_pre} ] 242 then 243 echo ok 244 else 245 echo not ok 246 fi 247} 248 249multi_lu_test_one() { 250 local expected=$1 251 local once_index=$2 252 shift 2 253 local labels="$*" 254 local ret=1 255 local retry=0 256 local index result 257 258 lu_opts=${lu_opts:-} 259 lu_maxtime=${lu_maxtime:-3HZ} 260 lu_state=${lu_state:-1} 261 lu_opts_once=${lu_opts_once:-$lu_opts} 262 lu_maxtime_once=${lu_maxtime_once:-$lu_maxtime} 263 lu_state_once=${lu_state_once:-$lu_state} 264 265 while [ $ret -eq 1 -a $retry -lt ${MAX_MULTI_LU_RETRY} ] 266 do 267 index=0 268 for label in ${labels} 269 do 270 index=`expr $index + 1` 271 272 if [ $index -eq $once_index ] 273 then 274 service ${lu_opts_once} -q update self \ 275 -label ${label} \ 276 -maxtime ${lu_maxtime_once} \ 277 -state ${lu_state_once} || ret=2 278 else 279 service ${lu_opts} -q update self \ 280 -label ${label} \ 281 -maxtime ${lu_maxtime} \ 282 -state ${lu_state} || ret=2 283 fi 284 done 285 service sysctl upd_run 286 result=$? 287 288 # We may experience transient failures as a result of services 289 # trying to talk to each other while being prepared for the 290 # live update. In that case we get result code 4. If that is 291 # not the result code we expected, try again for a limited 292 # number of times. 293 if [ $result -eq $expected ] 294 then 295 ret=0 296 elif [ $result -ne 4 ] 297 then 298 break 299 fi 300 retry=`expr $retry + 1` 301 done 302 303 return $ret 304} 305 306multi_lu_test() { 307 local y_result z_result 308 local have_rs=0 309 local labels="$*" 310 311 # Some of the results depend on whether RS is part of the live update. 312 for label in ${labels} 313 do 314 if [ "x$label" = "xrs" ] 315 then 316 have_rs=1 317 fi 318 done 319 320 if [ $have_rs -eq 1 ] 321 then 322 y_result=200 323 z_result=200 324 else 325 y_result=78 326 z_result=4 327 fi 328 329 multi_lu_test_one 0 0 ${labels} || return 1 330 lu_opts_once="-x" multi_lu_test_one 200 2 ${labels} || return 1 331 lu_opts_once="-y" multi_lu_test_one ${y_result} 3 ${labels} || return 1 332 lu_maxtime_once="1HZ" lu_opts_once="-z" multi_lu_test_one ${z_result} 2 ${labels} || return 1 333 lu_maxtime_once="1HZ" lu_state_once="5" multi_lu_test_one 4 3 ${labels} || return 1 334 335 return 0 336} 337 338multi_lu_test_wrapper() { 339 echo "# testing $@ :: multicomponent live update+rollback" 340 if ! multi_lu_test "$@" 341 then 342 echo "not ok # failed multicomponent live update+rollback" 343 return 1 344 fi 345 return 0 346} 347 348####################################################################### 349# main() 350####################################################################### 351main() { 352 local labels service_policies X11 353 354 # If there is a running X server, skip the input driver 355 if ps -ef | grep -v grep | grep -q /usr/X11R7/bin/X 356 then 357 echo "# This test can't be run while a Xserver is running" 358 echo "not ok # A Xserver is running" 359 exit 1 360 fi 361 362 if [ $# -eq 0 ] 363 then 364 services=$(echo /proc/service/*) 365 else 366 services="$@" 367 fi 368 369 for service in ${services} 370 do 371 label=$(basename ${service}) 372 service_policies=$(grep policies ${service}|cut -d: -f2) 373 for pol in ${service_policies} 374 do 375 # Check if the supported policy is under test 376 if echo "${POLICIES}" | grep -q ${pol} 377 then 378 echo "# testing ${label} :: ${pol}" 379 cleanup=0 380 prepare_service ${service} ${label} || cleanup=1 381 result=$(pol_${pol} ${service} ${label}) 382 if [ "x${result}" != "xok" ] 383 then 384 echo "not ok # failed ${label}, ${pol}" 385 exit 1 386 fi 387 if [ $cleanup -eq 1 ] 388 then 389 cleanup_service ${label} 390 fi 391 fi 392 done 393 done 394 if [ $# -gt 0 ] 395 then 396 echo "ok # partial test for $@ successful" 397 exit 0 398 fi 399 400 multi_lu_labels="" 401 for service in ${services} 402 do 403 label=$(basename ${service}) 404 service_policies=$(grep policies ${service}|cut -d: -f2) 405 if echo "${service_policies}" | grep -q "[a-zA-Z]" 406 then 407 echo "# testing ${label} :: live update+rollback" 408 result=$(lu_test ${service} ${label}) 409 if [ "x${result}" != "xok" ] 410 then 411 echo "not ok # failed ${label}, live update+rollback" 412 exit 1 413 fi 414 if [ "x${label}" = "xrs" -o "x${label}" = "xvm" ] 415 then 416 continue 417 fi 418 service_flags=$(get_value flags ${service}) 419 if echo $service_flags | grep -q 's' 420 then 421 multi_lu_labels="${multi_lu_labels} ${label}" 422 fi 423 fi 424 done 425 426 multi_lu_test_wrapper ${multi_lu_labels} || exit 1 427 multi_lu_test_wrapper ${multi_lu_labels} vm || exit 1 428 multi_lu_test_wrapper ${multi_lu_labels} rs || exit 1 429 multi_lu_test_wrapper ${multi_lu_labels} vm rs || exit 1 430 431 echo ok 432 exit 0 433} 434 435main "$@" 436