1#!/bin/bash 2# 3# this shell script provides commands to the common diag system. It enables 4# test scripts to wait for certain conditions and initiate certain actions. 5# needs support in config file. 6# NOTE: this file should be included with ". diag.sh", as it otherwise is 7# not always able to convey back states to the upper-level test driver 8# begun 2009-05-27 by rgerhards 9# This file is part of the rsyslog project, released under GPLv3 10# 11# This file can be customized to environment specifics via environment 12# variables: 13# SUDO either blank, or the command to use for sudo (usually "sudo"). 14# This env var is sometimes used to increase the chance that we 15# get extra information, e.g. when trying to analyze running 16# processes. Bottom line: if sudo is possible and you are OK with 17# the testbench utilizing this, use 18# export SUDO=sudo 19# to activate the extra functionality. 20# RS_SORTCMD Sort command to use (must support $RS_SORT_NUMERIC_OPT option). If unset, 21# "sort" is used. E.g. Solaris needs "gsort" 22# RS_SORT_NUMERIC_OPT option to use for numerical sort, If unset "-g" is used. 23# RS_CMPCMD cmp command to use. If unset, "cmd" is used. 24# E.g. Solaris needs "gcmp" 25# RS_HEADCMD head command to use. If unset, "head" is used. 26# E.g. Solaris needs "ghead" 27# RSTB_GLOBAL_INPUT_SHUTDOWN_TIMEOUT 28# global input timeout shutdown, default 60000 (60) sec. This should 29# only be set specifically if there is good need to do so, e.g. if 30# a test needs to timeout. 31# USE_VALGRIND if set to "YES", the test will be run under valgrind control, with 32# "default" settings (memcheck including leak check, termination on error). 33# This permits to have valgrind and non-valgrind versions of the same test 34# without the need to write it twice: just have a 2-liner -vg.sh which 35# does: 36# export USE_VALGRIND="YES" 37# source original-test.sh 38# sample can be seen in imjournal-basic[.vg].sh 39# You may also use USE_VALGRIND="YES-NOLEAK" to request valgrind without 40# leakcheck (this sometimes is needed). 41# ABORT_ALL_ON_TEST_FAIL 42# if set to "YES" and one test fails, all others are not executed but skipped. 43# This is useful in long-running CI jobs where we are happy with seeing the 44# first failure (to save time). 45# 46# 47# EXIT STATES 48# 0 - ok 49# 1 - FAIL 50# 77 - SKIP 51# 100 - Testbench failure 52export TB_ERR_TIMEOUT=101 53# 177 - internal state: test failed, but in a way that makes us strongly believe 54# this is caused by environment. This will lead to exit 77 (SKIP), but report 55# the failure if failure reporting is active 56 57# environment variables: 58# USE_AUTO_DEBUG "on" --> enables automatic debugging, anything else 59# turns it off 60 61# diag system internal environment variables 62# these variables are for use by test scripts - they CANNOT be 63# overridden by the user 64# TCPFLOOD_EXTRA_OPTS enables to set extra options for tcpflood, usually 65# used in tests that have a common driver where it 66# is too hard to set these options otherwise 67# CONFIG 68export ZOOPIDFILE="$(pwd)/zookeeper.pid" 69 70#valgrind="valgrind --malloc-fill=ff --free-fill=fe --log-fd=1" 71 72# **** use the line below for very hard to find leaks! ***** 73#valgrind="valgrind --leak-check=full --show-leak-kinds=all --malloc-fill=ff --free-fill=fe --log-fd=1" 74 75#valgrind="valgrind --tool=drd --log-fd=1" 76#valgrind="valgrind --tool=helgrind --log-fd=1 --suppressions=$srcdir/linux_localtime_r.supp --gen-suppressions=all" 77#valgrind="valgrind --tool=exp-ptrcheck --log-fd=1" 78#set -o xtrace 79#export RSYSLOG_DEBUG="debug nologfuncflow noprintmutexaction nostdout" 80#export RSYSLOG_DEBUGLOG="log" 81TB_TIMEOUT_STARTSTOP=400 # timeout for start/stop rsyslogd in tenths (!) of a second 400 => 40 sec 82# note that 40sec for the startup should be sufficient even on very slow machines. we changed this from 2min on 2017-12-12 83TB_TEST_TIMEOUT=90 # number of seconds after which test checks timeout (eg. waits) 84TB_TEST_MAX_RUNTIME=${TEST_MAX_RUNTIME:-580} # maximum runtime in seconds for a test; 85 # default TEST_MAX_RUNTIME e.g. for long-running tests or special 86 # testbench use. Testbench will abort test 87 # after that time (iff it has a chance to, not strictly enforced) 88 # Note: 580 is slightly below the rsyslog-ci required max non-stdout writing timeout 89 # This is usually at 600 (10 minutes) and processes will be force-terminated if they 90 # go over it. This is especially bad because we do not receive notifications in this 91 # case. 92export RSYSLOG_DEBUG_TIMEOUTS_TO_STDERR="on" # we want to know when we loose messages due to timeouts 93if [ "$TESTTOOL_DIR" == "" ]; then 94 export TESTTOOL_DIR="${srcdir:-.}" 95fi 96 97# newer functionality is preferably introduced via bash functions 98# rgerhards, 2018-07-03 99rsyslog_testbench_test_url_access() { 100 local missing_requirements= 101 if ! hash curl 2>/dev/null ; then 102 missing_requirements="'curl' is missing in PATH; Make sure you have cURL installed! Skipping test ..." 103 fi 104 105 if [ -n "${missing_requirements}" ]; then 106 printf '%s\n' "${missing_requirements}" 107 exit 77 108 fi 109 110 local http_endpoint="$1" 111 if ! curl --fail --max-time 30 "${http_endpoint}" 1>/dev/null 2>&1; then 112 echo "HTTP endpoint '${http_endpoint}' is not reachable. Skipping test ..." 113 exit 77 114 else 115 echo "HTTP endpoint '${http_endpoint}' is reachable! Starting test ..." 116 fi 117} 118 119# function to skip a test on a specific platform 120# $1 is what we check in uname, $2 (optional) is a reason message 121skip_platform() { 122 if [ "$(uname)" == "$1" ]; then 123 printf 'platform is "%s" - test does not work under "%s"\n' "$(uname)" "$1" 124 if [ "$2" != "" ]; then 125 printf 'reason: %s\n' "$2" 126 fi 127 exit 77 128 fi 129 130} 131 132 133# a consistent format to output testbench timestamps 134tb_timestamp() { 135 printf '%s[%s] ' "$(date +%H:%M:%S)" "$(( $(date +%s) - TB_STARTTEST ))" 136} 137 138# override the test timeout, but only if the new value is higher 139# than the previous one. This is necessary for slow test systems 140# $1 is timeout in seconds 141override_test_timeout() { 142 if [ "${1:=0}" == "" ]; then 143 printf 'FAIL: invalid testbench call, override_test_timeout needs value\n' 144 error_exit 100 145 fi 146 if [ "$1" -gt "$TB_TEST_TIMEOUT" ]; then 147 TB_TEST_TIMEOUT=$1 148 printf 'info: TB_TEST_TIMEOUT increased to %s\n' "$TB_TEST_TIMEOUT" 149 fi 150} 151 152# set special tests status. States ($1) are: 153# unreliable -- as the name says, test does not work reliably; $2 must be github issue URL 154# depending on CI configuration, "unreliable" tests are skipped and not failed 155# or not executed at all. Test reports may also be amended to github issue. 156test_status() { 157 if [ "$1" == "unreliable" ]; then 158 if [ "$2" == "" ]; then 159 printf 'TESTBENCH_ERROR: github issue URL must be given\n' 160 error_exit 100 161 fi 162 export TEST_STATUS="$1" 163 export TEST_GITHUB_ISSUE="$2" 164 else 165 printf 'TESTBENCH_ERROR: test_status "%s" unknown\n' "$1" 166 error_exit 100 167 fi 168} 169 170 171setvar_RS_HOSTNAME() { 172 printf '### Obtaining HOSTNAME (prerequisite, not actual test) ###\n' 173 generate_conf "" 174 add_conf 'module(load="../plugins/imtcp/.libs/imtcp") 175input(type="imtcp" port="0" listenPortFileName="'$RSYSLOG_DYNNAME'.tcpflood_port") 176 177$template hostname,"%hostname%" 178local0.* ./'${RSYSLOG_DYNNAME}'.HOSTNAME;hostname 179' 180 rm -f "${RSYSLOG_DYNNAME}.HOSTNAME" 181 startup "" 182 tcpflood -m1 -M "\"<128>\"" 183 shutdown_when_empty 184 wait_shutdown "" 185 export RS_HOSTNAME="$(cat ${RSYSLOG_DYNNAME}.HOSTNAME)" 186 rm -f "${RSYSLOG_DYNNAME}.HOSTNAME" 187 echo HOSTNAME is: $RS_HOSTNAME 188} 189 190 191# begin a new testconfig 192# 2018-09-07: Incremented inputs.timeout.shutdown to 60000 because kafka tests may not be 193# finished under stress otherwise 194# $1 is the instance id, if given 195generate_conf() { 196 if [ "$RSTB_GLOBAL_QUEUE_SHUTDOWN_TIMEOUT" == "" ]; then 197 RSTB_GLOBAL_QUEUE_SHUTDOWN_TIMEOUT="10000" 198 fi 199 if [ "$RSTB_GLOBAL_INPUT_SHUTDOWN_TIMEOUT" == "" ]; then 200 RSTB_GLOBAL_INPUT_SHUTDOWN_TIMEOUT="60000" 201 fi 202 if [ "$RSTB_ACTION_DEFAULT_Q_TO_SHUTDOWN" == "" ]; then 203 RSTB_ACTION_DEFAULT_Q_TO_SHUTDOWN="20000" 204 fi 205 if [ "$RSTB_ACTION_DEFAULT_Q_TO_ENQUEUE" == "" ]; then 206 RSTB_ACTION_DEFAULT_Q_TO_ENQUEUE="20000" 207 fi 208 export TCPFLOOD_PORT="$(get_free_port)" 209 if [ "$1" == "" ]; then 210 export TESTCONF_NM="${RSYSLOG_DYNNAME}_" # this basename is also used by instance 2! 211 export RSYSLOG_OUT_LOG="${RSYSLOG_DYNNAME}.out.log" 212 export RSYSLOG2_OUT_LOG="${RSYSLOG_DYNNAME}_2.out.log" 213 export RSYSLOG_PIDBASE="${RSYSLOG_DYNNAME}:" # also used by instance 2! 214 mkdir $RSYSLOG_DYNNAME.spool 215 fi 216 echo 'module(load="../plugins/imdiag/.libs/imdiag") 217global(inputs.timeout.shutdown="'$RSTB_GLOBAL_INPUT_SHUTDOWN_TIMEOUT'" 218 default.action.queue.timeoutshutdown="'$RSTB_ACTION_DEFAULT_Q_TO_SHUTDOWN'" 219 default.action.queue.timeoutEnqueue="'$RSTB_ACTION_DEFAULT_Q_TO_ENQUEUE'") 220# use legacy-style for the following settings so that we can override if needed 221$MainmsgQueueTimeoutEnqueue 20000 222$MainmsgQueueTimeoutShutdown '$RSTB_GLOBAL_QUEUE_SHUTDOWN_TIMEOUT' 223$IMDiagListenPortFileName '$RSYSLOG_DYNNAME.imdiag$1.port' 224$IMDiagServerRun 0 225$IMDiagAbortTimeout '$TB_TEST_MAX_RUNTIME' 226 227:syslogtag, contains, "rsyslogd" ./'${RSYSLOG_DYNNAME}$1'.started 228###### end of testbench instrumentation part, test conf follows:' > ${TESTCONF_NM}$1.conf 229} 230 231# add more data to config file. Note: generate_conf must have been called 232# $1 is config fragment, $2 the instance id, if given 233add_conf() { 234 printf '%s' "$1" >> ${TESTCONF_NM}$2.conf 235} 236 237 238rst_msleep() { 239 $TESTTOOL_DIR/msleep $1 240} 241 242 243# compare file to expected exact content 244# $1 is file to compare, default $RSYSLOG_OUT_LOG 245cmp_exact() { 246 filename=${1:-"$RSYSLOG_OUT_LOG"} 247 if [ "$filename" == "" ]; then 248 printf 'Testbench ERROR, cmp_exact() does not have a filename at ALL!\n' 249 error_exit 100 250 fi 251 if [ "$EXPECTED" == "" ]; then 252 printf 'Testbench ERROR, cmp_exact() needs to have env var EXPECTED set!\n' 253 error_exit 100 254 fi 255 printf '%s\n' "$EXPECTED" | cmp - "$filename" 256 if [ $? -ne 0 ]; then 257 printf 'invalid response generated\n' 258 printf '################# %s is:\n' "$filename" 259 cat -n $filename 260 printf '################# EXPECTED was:\n' 261 cat -n <<< "$EXPECTED" 262 printf '\n#################### diff is:\n' 263 diff - "$filename" <<< "$EXPECTED" 264 error_exit 1 265 fi; 266} 267 268# code common to all startup...() functions 269startup_common() { 270 instance= 271 if [ "$1" == "2" ]; then 272 CONF_FILE="${TESTCONF_NM}2.conf" 273 instance=2 274 elif [ "$1" == "" ] || [ "$1" == "1" ]; then 275 CONF_FILE="${TESTCONF_NM}.conf" 276 else 277 CONF_FILE="$srcdir/testsuites/$1" 278 instance=$2 279 fi 280 # we need to remove the imdiag port file as there are some 281 # tests that start multiple times. These may get the old port 282 # number if the file still exists AND timing is bad so that 283 # imdiag does not generate the port file quickly enough on 284 # startup. 285 rm -f $RSYSLOG_DYNNAME.imdiag$instance.port 286 if [ ! -f $CONF_FILE ]; then 287 echo "ERROR: config file '$CONF_FILE' not found!" 288 error_exit 1 289 fi 290 echo config $CONF_FILE is: 291 cat -n $CONF_FILE 292} 293 294# wait for appearance of a specific pid file, given as $1 295wait_startup_pid() { 296 if [ "$1" == "" ]; then 297 echo "FAIL: testbench bug: wait_startup_called without \$1" 298 error_exit 100 299 fi 300 while test ! -f $1; do 301 $TESTTOOL_DIR/msleep 100 # wait 100 milliseconds 302 if [ $(date +%s) -gt $(( TB_STARTTEST + TB_TEST_MAX_RUNTIME )) ]; then 303 printf '%s ABORT! Timeout waiting on startup (pid file %s)\n' "$(tb_timestamp)" "$1" 304 ls -l "$1" 305 ps -fp $(cat "$1") 306 error_exit 1 307 fi 308 done 309 printf '%s %s found, pid %s\n' "$(tb_timestamp)" "$1" "$(cat $1)" 310} 311 312# special version of wait_startup_pid() for rsyslog startup 313wait_rsyslog_startup_pid() { 314 wait_startup_pid $RSYSLOG_PIDBASE$1.pid 315} 316 317# wait for startup of an arbitrary process 318# $1 - pid file name 319# $2 - startup file name (optional, only checked if given) 320wait_process_startup() { 321 wait_startup_pid $1.pid 322 i=0 323 if [ "$2" != "" ]; then 324 while test ! -f "$2"; do 325 $TESTTOOL_DIR/msleep 100 # wait 100 milliseconds 326 ps -p $(cat $1.pid) &> /dev/null 327 if [ $? -ne 0 ] 328 then 329 echo "ABORT! pid in $1 no longer active during startup!" 330 error_exit 1 331 fi 332 (( i++ )) 333 if [ $(date +%s) -gt $(( TB_STARTTEST + TB_TEST_MAX_RUNTIME )) ]; then 334 printf '%s ABORT! Timeout waiting on file %s\n' "$(tb_timestamp)" "$2" 335 error_exit 1 336 fi 337 done 338 printf '%s %s seen, associated pid %s\n' "$(tb_timestamp)" "$2" "$(cat $1)" 339 fi 340} 341 342 343# wait for the pid in $1 to terminate, abort on timeout 344wait_pid_termination() { 345 out_pid="$1" 346 if [[ "$out_pid" == "" ]]; then 347 printf 'TESTBENCH error: pidfile name not specified in wait_pid_termination\n' 348 error_exit 100 349 fi 350 terminated=0 351 while [[ $terminated -eq 0 ]]; do 352 ps -p $out_pid &> /dev/null 353 if [[ $? != 0 ]]; then 354 terminated=1 355 fi 356 $TESTTOOL_DIR/msleep 100 357 if [ $(date +%s) -gt $(( TB_STARTTEST + TB_TEST_MAX_RUNTIME )) ]; then 358 printf '%s ABORT! Timeout waiting on shutdown (pid %s)\n' "$(tb_timestamp)" $out_pid 359 ps -fp $out_pid 360 printf 'Instance is possibly still running and may need manual cleanup.\n' 361 error_exit 1 362 fi 363 done 364 unset terminated 365 unset out_pid 366} 367 368# wait for file $1 to exist AND be non-empty 369# $1 : file to wait for 370# $2 (optional): error message to show if timeout occurs 371wait_file_exists() { 372 echo waiting for file $1 373 i=0 374 while true; do 375 if [ -f $1 ] && [ "$(cat $1 2> /dev/null)" != "" ]; then 376 break 377 fi 378 $TESTTOOL_DIR/msleep 100 # wait 100 milliseconds 379 ((i++)) 380 if test $i -gt $TB_TIMEOUT_STARTSTOP; then 381 echo "ABORT! Timeout waiting for file $1" 382 ls -l $1 383 if [ "$2" != "" ]; then 384 echo "$2" 385 fi 386 error_exit 1 387 fi 388 done 389} 390 391# kafka special wait function: we wait for the output file in order 392# to ensure Kafka/Zookeeper is actually ready to go. This is NOT 393# a generic check function and must only used with those kafka tests 394# that actually need it. 395kafka_wait_group_coordinator() { 396echo We are waiting for kafka/zookeeper being ready to deliver messages 397wait_file_exists $RSYSLOG_OUT_LOG " 398 399Non-existence of $RSYSLOG_OUT_LOG can be caused 400by a problem inside zookeeper. If debug output in the receiver is enabled, one 401may see this message: 402 403\"GroupCoordinator response error: Broker: Group coordinator not available\" 404 405In this case you may want to do a web search and/or have a look at 406 https://github.com/edenhill/librdkafka/issues/799 407 408The question, of course, is if there is nevertheless a problem in imkafka. 409Usually, the wait we do inside the testbench is sufficient to handle all 410Zookeeper/Kafka startup. So if the issue reoccurs, it is suggested to enable 411debug output in the receiver and check for actual problems. 412" 413} 414 415# check if kafka itself failed. $1 is the message file name. 416kafka_check_broken_broker() { 417 failed=0 418 if grep "Broker transport failure" < "$1" ; then 419 failed=1 420 fi 421 if grep "broker connections are down" < "$1" ; then 422 failed=1 423 fi 424 if [ $failed -eq 1 ]; then 425 printf '\n\nenvironment-induced test error - kafka broker failed - skipping test\n' 426 printf 'content of %s:\n' "$1" 427 cat -n "$1" 428 error_exit 177 429 fi 430} 431 432# inject messages via kafkacat tool (for imkafka tests) 433# $1 == "--wait" means wait for rsyslog to receive TESTMESSAGES lines in RSYSLOG_OUT_LOG 434# $TESTMESSAGES contains number of messages to inject 435# $RANDTOPIC contains topic to produce to 436injectmsg_kafkacat() { 437 if [ "$1" == "--wait" ]; then 438 wait="YES" 439 shift 440 fi 441 if [ "$TESTMESSAGES" == "" ]; then 442 printf 'TESTBENCH ERROR: TESTMESSAGES env var not set!\n' 443 error_exit 1 444 fi 445 MAXATONCE=25000 # how many msgs should kafkacat send? - hint: current version errs out above ~70000 446 i=1 447 while (( i<=TESTMESSAGES )); do 448 currmsgs=0 449 while ((i <= $TESTMESSAGES && currmsgs != MAXATONCE)); do 450 printf ' msgnum:%8.8d\n' $i; 451 i=$((i + 1)) 452 currmsgs=$((currmsgs+1)) 453 done > "$RSYSLOG_DYNNAME.kafkacat.in" 454 set -e 455 kafkacat -P -b localhost:29092 -t $RANDTOPIC <"$RSYSLOG_DYNNAME.kafkacat.in" 2>&1 | tee >$RSYSLOG_DYNNAME.kafkacat.log 456 set +e 457 printf 'kafkacat injected %d msgs so far\n' $((i - 1)) 458 kafka_check_broken_broker $RSYSLOG_DYNNAME.kafkacat.log 459 check_not_present "ERROR" $RSYSLOG_DYNNAME.kafkacat.log 460 cat $RSYSLOG_DYNNAME.kafkacat.log 461 done 462 463 if [ "$wait" == "YES" ]; then 464 wait_seq_check "$@" 465 fi 466} 467 468 469# wait for rsyslogd startup ($1 is the instance) 470wait_startup() { 471 wait_rsyslog_startup_pid $1 472 while test ! -f ${RSYSLOG_DYNNAME}$1.started; do 473 $TESTTOOL_DIR/msleep 100 # wait 100 milliseconds 474 ps -p $(cat $RSYSLOG_PIDBASE$1.pid) &> /dev/null 475 if [ $? -ne 0 ] 476 then 477 echo "ABORT! rsyslog pid no longer active during startup!" 478 error_exit 1 stacktrace 479 fi 480 if [ $(date +%s) -gt $(( TB_STARTTEST + TB_TEST_MAX_RUNTIME )) ]; then 481 printf '%s ABORT! Timeout waiting startup file %s\n' "$(tb_timestamp)" "${RSYSLOG_DYNNAME}.started" 482 error_exit 1 483 fi 484 done 485 echo "$(tb_timestamp) rsyslogd$1 startup msg seen, pid " $(cat $RSYSLOG_PIDBASE$1.pid) 486 wait_file_exists $RSYSLOG_DYNNAME.imdiag$1.port 487 eval export IMDIAG_PORT$1=$(cat $RSYSLOG_DYNNAME.imdiag$1.port) 488 eval PORT=$IMDIAG_PORT$1 489 echo "imdiag$1 port: $PORT" 490 if [ "$PORT" == "" ]; then 491 echo "TESTBENCH ERROR: imdiag port not found!" 492 ls -l $RSYSLOG_DYNNAME* 493 exit 100 494 fi 495} 496 497# reassign ports after rsyslog startup; must be called from all 498# functions that startup rsyslog 499reassign_ports() { 500 if grep -q 'listenPortFileName="'$RSYSLOG_DYNNAME'\.tcpflood_port"' $CONF_FILE; then 501 assign_tcpflood_port $RSYSLOG_DYNNAME.tcpflood_port 502 fi 503 if grep -q '$InputTCPServerListenPortFile.*\.tcpflood_port' $CONF_FILE; then 504 assign_tcpflood_port $RSYSLOG_DYNNAME.tcpflood_port 505 fi 506} 507 508# start rsyslogd with default params. $1 is the config file name to use 509# returns only after successful startup, $2 is the instance (blank or 2!) 510# RS_REDIR maybe set to redirect rsyslog output 511# env var RSTB_DAEMONIZE" == "YES" means rsyslogd shall daemonize itself; 512# any other value or unset means it does not do that. 513startup() { 514 if [ "$USE_VALGRIND" == "YES" ]; then 515 startup_vg "$1" "$2" 516 return 517 elif [ "$USE_VALGRIND" == "YES-NOLEAK" ]; then 518 startup_vg_noleak "$1" "$2" 519 return 520 fi 521 startup_common "$1" "$2" 522 if [ "$RSTB_DAEMONIZE" == "YES" ]; then 523 n_option="" 524 else 525 n_option="-n" 526 fi 527 eval LD_PRELOAD=$RSYSLOG_PRELOAD $valgrind ../tools/rsyslogd -C $n_option -i$RSYSLOG_PIDBASE$instance.pid -M../runtime/.libs:../.libs -f$CONF_FILE $RS_REDIR & 528 wait_startup $instance 529 reassign_ports 530} 531 532 533# assign TCPFLOOD_PORT from port file 534# $1 - port file 535assign_tcpflood_port() { 536 wait_file_exists "$1" 537 export TCPFLOOD_PORT=$(cat "$1") 538 echo "TCPFLOOD_PORT now: $TCPFLOOD_PORT" 539 if [ "$TCPFLOOD_PORT" == "" ]; then 540 echo "TESTBENCH ERROR: TCPFLOOD_PORT not found!" 541 ls -l $RSYSLOG_DYNNAME* 542 exit 100 543 fi 544} 545 546 547# assign TCPFLOOD_PORT2 from port file 548# $1 - port file 549assign_tcpflood_port2() { 550 wait_file_exists "$1" 551 export TCPFLOOD_PORT2=$(cat "$1") 552 echo "TCPFLOOD_PORT2 now: $TCPFLOOD_PORT2" 553 if [ "$TCPFLOOD_PORT2" == "" ]; then 554 echo "TESTBENCH ERROR: TCPFLOOD_PORT2 not found!" 555 ls -l $RSYSLOG_DYNNAME* 556 exit 100 557 fi 558} 559# assign RS_PORT from port file - this is meant as generic way to 560# obtain additional port variables 561# $1 - port file 562assign_rs_port() { 563 wait_file_exists "$1" 564 export RS_PORT=$(cat "$1") 565 echo "RS_PORT now: $RS_PORT" 566 if [ "$RS_PORT" == "" ]; then 567 echo "TESTBENCH ERROR: RS_PORT not found!" 568 ls -l $RSYSLOG_DYNNAME* 569 exit 100 570 fi 571} 572 573# wait for a file to exist, then export it's content to env var 574# intended to be used for very small files, e.g. listenPort files 575# $1 - env var name 576# $2 - port file 577assign_file_content() { 578 wait_file_exists "$2" 579 content=$(cat "$2") 580 if [ "$content" == "" ]; then 581 echo "TESTBENCH ERROR: get_file content had empty file $2" 582 ls -l $RSYSLOG_DYNNAME* 583 exit 100 584 fi 585 eval export $1="$content" 586 printf 'exported: %s=%s\n' $1 "$content" 587} 588 589# same as startup_vg, BUT we do NOT wait on the startup message! 590startup_vg_waitpid_only() { 591 startup_common "$1" "$2" 592 if [ "$RS_TESTBENCH_LEAK_CHECK" == "" ]; then 593 RS_TESTBENCH_LEAK_CHECK=full 594 fi 595 # add --keep-debuginfo=yes for hard to find cases; this cannot be used generally, 596 # because it is only supported by newer versions of valgrind (else CI will fail 597 # on older platforms). 598 LD_PRELOAD=$RSYSLOG_PRELOAD valgrind $RS_TEST_VALGRIND_EXTRA_OPTS $RS_TESTBENCH_VALGRIND_EXTRA_OPTS --suppressions=$srcdir/known_issues.supp ${EXTRA_VALGRIND_SUPPRESSIONS:-} --gen-suppressions=all --log-fd=1 --error-exitcode=10 --malloc-fill=ff --free-fill=fe --leak-check=$RS_TESTBENCH_LEAK_CHECK ../tools/rsyslogd -C -n -i$RSYSLOG_PIDBASE$2.pid -M../runtime/.libs:../.libs -f$CONF_FILE & 599 wait_rsyslog_startup_pid $1 600} 601 602# start rsyslogd with default params under valgrind control. $1 is the config file name to use 603# returns only after successful startup, $2 is the instance (blank or 2!) 604startup_vg() { 605 startup_vg_waitpid_only $1 $2 606 wait_startup $2 607 reassign_ports 608} 609 610# same as startup-vg, except that --leak-check is set to "none". This 611# is meant to be used in cases where we have to deal with libraries (and such 612# that) we don't can influence and where we cannot provide suppressions as 613# they are platform-dependent. In that case, we can't test for leak checks 614# (obviously), but we can check for access violations, what still is useful. 615startup_vg_noleak() { 616 RS_TESTBENCH_LEAK_CHECK=no 617 startup_vg "$@" 618} 619 620# same as startup-vgthread, BUT we do NOT wait on the startup message! 621startup_vgthread_waitpid_only() { 622 startup_common "$1" "$2" 623 valgrind --tool=helgrind $RS_TEST_VALGRIND_EXTRA_OPTS $RS_TESTBENCH_VALGRIND_EXTRA_OPTS --log-fd=1 --error-exitcode=10 --suppressions=$srcdir/linux_localtime_r.supp --suppressions=$srcdir/known_issues.supp ${EXTRA_VALGRIND_SUPPRESSIONS:-} --suppressions=$srcdir/CI/gcov.supp --gen-suppressions=all ../tools/rsyslogd -C -n -i$RSYSLOG_PIDBASE$2.pid -M../runtime/.libs:../.libs -f$CONF_FILE & 624 wait_rsyslog_startup_pid $2 625} 626 627# start rsyslogd with default params under valgrind thread debugger control. 628# $1 is the config file name to use, $2 is the instance (blank or 2!) 629# returns only after successful startup 630startup_vgthread() { 631 startup_vgthread_waitpid_only $1 $2 632 wait_startup $2 633 reassign_ports 634} 635 636 637# inject messages via our inject interface (imdiag) 638# $1 is start message number, env var NUMMESSAGES is number of messages to inject 639injectmsg() { 640 if [ "$3" != "" ] ; then 641 printf 'error: injectmsg only has two arguments, extra arg is %s\n' "$3" 642 fi 643 msgs=${2:-$NUMMESSAGES} 644 echo injecting $msgs messages 645 echo injectmsg "${1:-0}" "$msgs" | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $? 646} 647 648# inject messages in INSTANCE 2 via our inject interface (imdiag) 649injectmsg2() { 650 msgs=${2:-$NUMMESSAGES} 651 echo injecting $2 messages into instance 2 652 echo injectmsg "$1:-0" "$msgs" $3 $4 | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT2 || error_exit $? 653 # TODO: some return state checking? (does it really make sense here?) 654} 655 656# inject literal payload via our inject interface (imdiag) 657injectmsg_literal() { 658 printf 'injecting msg payload: %s\n' "$1" 659 sed -e 's/^/injectmsg literal /g' <<< "$1" | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $? 660} 661 662# inject literal payload via our inject interface (imdiag) 663injectmsg_file() { 664 printf 'injecting msg payload: %s\n' "$1" 665 sed -e 's/^/injectmsg literal /g' < "$1" | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $? 666} 667 668 669# show the current main queue size. $1 is the instance. 670get_mainqueuesize() { 671 if [ "$1" == "2" ]; then 672 echo getmainmsgqueuesize | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT2 || error_exit $? 673 else 674 echo getmainmsgqueuesize | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $? 675 fi 676} 677 678# get pid of rsyslog instance $1 679getpid() { 680 printf '%s' "$(cat $RSYSLOG_PIDBASE$1.pid)" 681} 682 683# grep for (partial) content. $1 is the content to check for, $2 the file to check 684# option --check-only just returns success/fail but does not terminate on fail 685# this is meant for checking during queue shutdown processing. 686# option --regex is understood, in which case $1 is a regex 687content_check() { 688 if [ "$1" == "--check-only" ]; then 689 check_only="yes" 690 shift 691 else 692 check_only="no" 693 fi 694 if [ "$1" == "--regex" ]; then 695 grep_opt= 696 shift 697 else 698 grep_opt=-F 699 fi 700 if [ "$1" == "--output-results" ]; then 701 output_results="yes" 702 shift 703 else 704 output_results="no" 705 fi 706 file=${2:-$RSYSLOG_OUT_LOG} 707 if ! grep -q $grep_opt -- "$1" < "${file}"; then 708 if [ "$check_only" == "yes" ]; then 709 printf 'content_check did not yet succeed\n' 710 return 1 711 fi 712 printf '\n============================================================\n' 713 printf 'FILE "%s" content:\n' "$file" 714 cat -n ${file} 715 printf 'FAIL: content_check failed to find "%s"\n' "$1" 716 error_exit 1 717 else 718 if [ "$output_results" == "yes" ]; then 719 # Output GREP results 720 echo "SUCCESS: content_check found results for '$1'\n" 721 grep "$1" "${file}" 722 fi 723 fi 724 if [ "$check_only" == "yes" ]; then 725 return 0 726 fi 727} 728 729 730# grep for (partial) content. this checks the count of the content 731# $1 is the content to check for 732# $2 required count 733# $3 the file to check (if default not used) 734# option --regex is understood, in which case $1 is a regex 735content_count_check() { 736 if [ "$1" == "--regex" ]; then 737 grep_opt= 738 shift 739 else 740 grep_opt=-F 741 fi 742 file=${3:-$RSYSLOG_OUT_LOG} 743 count=$(grep -c $grep_opt -- "$1" <${RSYSLOG_OUT_LOG}) 744 if [ ${count:=0} -ne "$2" ]; then 745 grep -c -F -- "$1" <${RSYSLOG_OUT_LOG} 746 printf '\n============================================================\n' 747 printf 'FILE "%s" content:\n' "$file" 748 cat -n ${file} 749 printf 'FAIL: content count required %d but was %d\n' "$2" $count 750 printf 'FAIL: content_check failed to find "%s"\n' "$1" 751 error_exit 1 752 fi 753} 754 755 756 757# $1 - content to check for 758# $2 - number of times content must appear 759# $3 - timeout (default: 1) 760content_check_with_count() { 761 timeoutend=${3:-1} 762 timecounter=0 763 while [ $timecounter -lt $timeoutend ]; do 764 (( timecounter=timecounter+1 )) 765 count=0 766 if [ -f "${RSYSLOG_OUT_LOG}" ]; then 767 count=$(grep -c -F -- "$1" <${RSYSLOG_OUT_LOG}) 768 fi 769 if [ ${count:=0} -eq $2 ]; then 770 echo content_check_with_count SUCCESS, \"$1\" occured $2 times 771 break 772 else 773 if [ "$timecounter" == "$timeoutend" ]; then 774 shutdown_when_empty "" 775 wait_shutdown "" 776 777 echo "$(tb_timestamp)" content_check_with_count failed, expected \"$1\" to occur $2 times, but found it "$count" times 778 echo file $RSYSLOG_OUT_LOG content is: 779 if [ $(wc -l < "$RSYSLOG_OUT_LOG") -gt 10000 ]; then 780 printf 'truncation, we have %d lines, which is way too much\n' \ 781 $(wc -l < "$RSYSLOG_OUT_LOG") 782 printf 'showing first and last 5000 lines\n' 783 head -n 5000 < "$RSYSLOG_OUT_LOG" 784 print '\n ... CUT ..................................................\n\n' 785 tail -n 5000 < "$RSYSLOG_OUT_LOG" 786 else 787 cat -n "$RSYSLOG_OUT_LOG" 788 fi 789 error_exit 1 790 else 791 printf '%s content_check_with_count, try %d have %d, wait for %d, search for: "%s"\n' \ 792 "$(tb_timestamp)" "$timecounter" "$count" "$2" "$1" 793 $TESTTOOL_DIR/msleep 1000 794 fi 795 fi 796 printf '%s **** content_check_with_count DEBUG (timeout %s, need %s lines):\n' "$(tb_timestamp)" "$3" "$2" # rger: REMOVE ME when problems are fixed 797 if [ -f "${RSYSLOG_OUT_LOG}" ]; then cat -n "$RSYSLOG_OUT_LOG"; fi 798 done 799} 800 801 802custom_content_check() { 803 grep -qF -- "$1" < $2 804 if [ "$?" -ne "0" ]; then 805 echo FAIL: custom_content_check failed to find "'$1'" inside "'$2'" 806 echo "file contents:" 807 cat -n $2 808 error_exit 1 809 fi 810} 811 812# check that given content $1 is not present in file $2 (default: RSYSLOG_OUT_LOG) 813# regular expressions may be used 814check_not_present() { 815 if [ "$2" == "" ]; then 816 file=$RSYSLOG_OUT_LOG 817 else 818 file="$2" 819 fi 820 grep -q -- "$1" < "$file" 821 if [ "$?" -eq "0" ]; then 822 echo FAIL: check_not present found 823 echo $1 824 echo inside file $file of $(wc -l < $file) lines 825 echo samples: 826 cat -n "$file" | grep -- "$1" | head -10 827 error_exit 1 828 fi 829} 830 831 832# check if mainqueue spool files exist, if not abort (we just check .qi). 833check_mainq_spool() { 834 printf 'There must exist some files now:\n' 835 ls -l $RSYSLOG_DYNNAME.spool 836 printf '.qi file:\n' 837 cat $RSYSLOG_DYNNAME.spool/mainq.qi 838 if [ ! -f $RSYSLOG_DYNNAME.spool/mainq.qi ]; then 839 printf 'error: mainq.qi does not exist where expected to do so!\n' 840 error_exit 1 841 fi 842} 843# check that no spool file exists. Abort if they do. 844# This situation must exist after a successful termination of rsyslog 845# where the disk queue has properly been drained and shut down. 846check_spool_empty() { 847 if [ "$(ls $RSYSLOG_DYNNAME.spool/* 2> /dev/null)" != "" ]; then 848 printf 'error: spool files exists where they are not permitted to do so:\n' 849 ls -l $RSYSLOG_DYNNAME.spool/* 850 error_exit 1 851 fi 852} 853 854# general helper for imjournal tests: check that we got hold of the 855# injected test message. This is pretty lengthy as the journal has played 856# "a bit" with us and also seems to sometimes have a heavy latency in 857# forwarding messages. So let's centralize the check code. 858# 859# $TESTMSG must contain the test message 860check_journal_testmsg_received() { 861 printf 'checking that journal indeed contains test message - may take a short while...\n' 862 # search reverse, gets us to our message (much) faster .... if it is there... 863 journalctl -a -r | grep -qF "$TESTMSG" 864 if [ $? -ne 0 ]; then 865 print 'SKIP: cannot read journal - our testmessage not found via journalctl\n' 866 exit 77 867 fi 868 printf 'journal contains test message\n' 869 870 echo "INFO: $(wc -l < $RSYSLOG_OUT_LOG) lines in $RSYSLOG_OUT_LOG" 871 872 grep -qF "$TESTMSG" < $RSYSLOG_OUT_LOG 873 if [ $? -ne 0 ]; then 874 echo "FAIL: $RSYSLOG_OUT_LOG content (tail -n200):" 875 tail -n200 $RSYSLOG_OUT_LOG 876 echo "=======" 877 echo "searching journal for testbench messages:" 878 journalctl -a | grep -qF "TestBenCH-RSYSLog imjournal" 879 echo "=======" 880 echo "NOTE: showing only last 200 lines, may be insufficient on busy systems!" 881 echo "last entries from journal:" 882 journalctl -an 200 883 echo "=======" 884 echo "NOTE: showing only last 200 lines, may be insufficient on busy systems!" 885 echo "However, the test check all of the journal, we are just limiting the output" 886 echo "to 200 lines to not spam CI systems too much." 887 echo "=======" 888 echo "FAIL: imjournal test message could not be found!" 889 echo "Expected message content was:" 890 echo "$TESTMSG" 891 error_exit 1 892 fi; 893} 894 895# wait for main message queue to be empty. $1 is the instance. 896# we run in a loop to ensure rsyslog is *really* finished when a 897# function for the "finished predicate" is defined. This is done 898# by setting env var QUEUE_EMPTY_CHECK_FUNC to the bash 899# function name. 900wait_queueempty() { 901 while [ $(date +%s) -le $(( TB_STARTTEST + TB_TEST_MAX_RUNTIME )) ]; do 902 if [ "$1" == "2" ]; then 903 echo WaitMainQueueEmpty | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT2 || error_exit $? 904 else 905 echo WaitMainQueueEmpty | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $? 906 fi 907 if [ "$QUEUE_EMPTY_CHECK_FUNC" == "" ]; then 908 return 909 else 910 if $QUEUE_EMPTY_CHECK_FUNC ; then 911 return 912 fi 913 fi 914 done 915 error_exit $TB_ERR_TIMEOUT 916} 917 918 919# shut rsyslogd down when main queue is empty. $1 is the instance. 920shutdown_when_empty() { 921 echo Shutting down instance ${1:-1} 922 wait_queueempty $1 923 if [ "$RSYSLOG_PIDBASE" == "" ]; then 924 echo "RSYSLOG_PIDBASE is EMPTY! - bug in test? (instance $1)" 925 error_exit 1 926 fi 927 cp $RSYSLOG_PIDBASE$1.pid $RSYSLOG_PIDBASE$1.pid.save 928 $TESTTOOL_DIR/msleep 500 # wait a bit (think about slow testbench machines!) 929 kill $(cat $RSYSLOG_PIDBASE$1.pid) # note: we do not wait for the actual termination! 930} 931 932# shut rsyslogd down without emptying the queue. $2 is the instance. 933shutdown_immediate() { 934 pidfile=$RSYSLOG_PIDBASE${1:-}.pid 935 cp $pidfile $pidfile.save 936 kill $(cat $pidfile) 937} 938 939 940# actually, we wait for rsyslog.pid to be deleted. 941# $1 is the instance 942wait_shutdown() { 943 if [ "$USE_VALGRIND" == "YES" ] || [ "$USE_VALGRIND" == "YES-NOLEAK" ]; then 944 wait_shutdown_vg "$1" 945 return 946 fi 947 out_pid=$(cat $RSYSLOG_PIDBASE$1.pid.save) 948 printf '%s wait on shutdown of %s\n' "$(tb_timestamp)" "$out_pid" 949 if [[ "$out_pid" == "" ]] 950 then 951 terminated=1 952 else 953 terminated=0 954 fi 955 while [[ $terminated -eq 0 ]]; do 956 ps -p $out_pid &> /dev/null 957 if [[ $? != 0 ]]; then 958 terminated=1 959 fi 960 $TESTTOOL_DIR/msleep 100 # wait 100 milliseconds 961 if [ $(date +%s) -gt $(( TB_STARTTEST + TB_TEST_MAX_RUNTIME )) ]; then 962 printf '%s wait_shutdown ABORT! Timeout waiting on shutdown (pid %s)\n' "$(tb_timestamp)" $out_pid 963 ps -fp $out_pid 964 echo "Instance is possibly still running and may need" 965 echo "manual cleanup." 966 echo "TRYING TO capture status via gdb from hanging process" 967 $SUDO gdb ../tools/rsyslogd <<< "attach $out_pid 968set pagination off 969inf thr 970thread apply all bt 971quit" 972 echo "trying to kill -9 process" 973 kill -9 $out_pid 974 error_exit 1 975 fi 976 done 977 unset terminated 978 unset out_pid 979 if [ "$(ls core.* 2>/dev/null)" != "" ]; then 980 printf 'ABORT! core file exists (maybe from a parallel run!)\n' 981 pwd 982 ls -l core.* 983 error_exit 1 984 fi 985} 986 987 988# wait for all pending lookup table reloads to complete $1 is the instance. 989await_lookup_table_reload() { 990 if [ "$1" == "2" ]; then 991 echo AwaitLookupTableReload | $TESTTOOL_DIR/diagtalker -pIMDIAG_PORT2 || error_exit $? 992 else 993 echo AwaitLookupTableReload | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $? 994 fi 995} 996 997# $1 filename, default $RSYSLOG_OUT_LOG 998# $2 expected nbr of lines, default $NUMMESSAGES 999# $3 timeout in seconds 1000# options (need to be specified in THIS ORDER if multiple given): 1001# --delay ms -- if given, delay to use between retries 1002# --abort-on-oversize -- error_exit if more lines than expected are present 1003# --count-function func -- function to call to obtain current count 1004# this permits to override the default predicate and makes 1005# the wait really universally usable. 1006wait_file_lines() { 1007 delay=200 1008 if [ "$1" == "--delay" ]; then 1009 delay="$2" 1010 shift 2 1011 fi 1012 abort_oversize=NO 1013 if [ "$1" == "--abort-on-oversize" ]; then 1014 abort_oversize="YES" 1015 shift 1016 fi 1017 count_function= 1018 if [ "$1" == "--count-function" ]; then 1019 count_function="$2" 1020 shift 2 1021 fi 1022 timeout=${3:-$TB_TEST_TIMEOUT} 1023 timeoutbegin=$(date +%s) 1024 timeoutend=$(( timeoutbegin + timeout )) 1025 # TODO: change this to support odl mode, if needed: timeoutend=${3:-200} 1026 file=${1:-$RSYSLOG_OUT_LOG} 1027 waitlines=${2:-$NUMMESSAGES} 1028 1029 while true ; do 1030 count=0 1031 if [ "$count_function" == "" ]; then 1032 if [ -f "$file" ]; then 1033 if [ "$COUNT_FILE_IS_ZIPPED" == "yes" ]; then 1034 issue_HUP "" 1035 count=$(gunzip < "$file" | wc -l) 1036 else 1037 count=$(wc -l < "$file") 1038 fi 1039 fi 1040 else 1041 count=$($count_function) 1042 fi 1043 if [ ${count} -gt $waitlines ]; then 1044 if [ $abort_oversize == "YES" ] && [ ${count} -gt $waitlines ]; then 1045 printf 'FAIL: wait_file_lines, too many lines, expected %d, current %s, took %s seconds\n' \ 1046 $waitlines $count "$(( $(date +%s) - timeoutbegin ))" 1047 error_exit 1 1048 else 1049 printf 'wait_file_lines success, target %d or more lines, have %d, took %d seconds\n' \ 1050 "$waitlines" $count "$(( $(date +%s) - timeoutbegin ))" 1051 return 1052 fi 1053 fi 1054 if [ ${count} -eq $waitlines ]; then 1055 echo wait_file_lines success, have $waitlines lines, took $(( $(date +%s) - timeoutbegin )) seconds, file "$file" 1056 break 1057 else 1058 if [ $(date +%s) -ge $timeoutend ]; then 1059 echo wait_file_lines failed, expected $waitlines got $count after $timeoutend retries, took $(( $(date +%s) - timeoutbegin )) seconds 1060 error_exit 1 1061 else 1062 echo $(date +%H:%M:%S) wait_file_lines waiting, expected $waitlines, current $count lines 1063 $TESTTOOL_DIR/msleep $delay 1064 fi 1065 fi 1066 done 1067} 1068 1069 1070 1071 1072# wait until seq_check succeeds. This is used to synchronize various 1073# testbench timing-related issues, most importantly rsyslog shutdown 1074# all parameters are passed to seq_check 1075wait_seq_check() { 1076 timeoutend=$(( $(date +%s) + TB_TEST_TIMEOUT )) 1077 if [ "$SEQ_CHECK_FILE" == "" ]; then 1078 filename="$RSYSLOG_OUT_LOG" 1079 else 1080 filename="$SEQ_CHECK_FILE" 1081 fi 1082 1083 while true ; do 1084 if [ "$PRE_SEQ_CHECK_FUNC" != "" ]; then 1085 $PRE_SEQ_CHECK_FUNC 1086 fi 1087 if [ "${filename##.*}" != "gz" ]; then 1088 if [ -f "$filename" ]; then 1089 count=$(wc -l < "$filename") 1090 fi 1091 fi 1092 seq_check --check-only "$@" #&>/dev/null 1093 ret=$? 1094 if [ $ret == 0 ]; then 1095 printf 'wait_seq_check success (%d lines)\n' "$count" 1096 break 1097 else 1098 if [ $(date +%s) -ge $timeoutend ]; then 1099 printf 'wait_seq_check failed, no success before timeout\n' 1100 error_exit 1 1101 else 1102 printf 'wait_seq_check waiting (%d lines)\n' $count 1103 $TESTTOOL_DIR/msleep 500 1104 fi 1105 fi 1106 done 1107 unset count 1108} 1109 1110 1111# wait until some content appears in the specified file. 1112# This is used to synchronize various 1113# testbench timing-related issues, most importantly rsyslog shutdown 1114# all parameters are passed to seq_check 1115# $1 - content to search for 1116# $2 - file to check 1117wait_content() { 1118 file=${2:-$RSYSLOG_OUT_LOG} 1119 timeoutend=$(( $(date +%s) + TB_TEST_TIMEOUT )) 1120 count=0 1121 1122 while true ; do 1123 if [ -f "$file" ]; then 1124 count=$(wc -l < "$file") 1125 if grep -q "$1" < "$file"; then 1126 printf 'expected content found, continue test (%d lines)\n' "$count" 1127 break 1128 fi 1129 fi 1130 if [ $(date +%s) -ge $timeoutend ]; then 1131 printf 'wait_content failed, no success before timeout (%d lines)\n' "$count" 1132 printf 'searched content was:\n%s\n' "$1" 1133 error_exit 1 1134 else 1135 printf 'wait_content still waiting... (%d lines)\n' "$count" 1136 tail "$file" 1137 $TESTTOOL_DIR/msleep 500 1138 fi 1139 done 1140 unset count 1141} 1142 1143 1144assert_content_missing() { 1145 grep -qF -- "$1" < ${RSYSLOG_OUT_LOG} 1146 if [ "$?" -eq "0" ]; then 1147 echo content-missing assertion failed, some line matched pattern "'$1'" 1148 error_exit 1 1149 fi 1150} 1151 1152 1153custom_assert_content_missing() { 1154 grep -qF -- "$1" < $2 1155 if [ "$?" -eq "0" ]; then 1156 echo content-missing assertion failed, some line in "'$2'" matched pattern "'$1'" 1157 cat -n "$2" 1158 error_exit 1 1159 fi 1160} 1161 1162 1163# shut rsyslogd down when main queue is empty. $1 is the instance. 1164issue_HUP() { 1165 if [ "$1" == "--sleep" ]; then 1166 sleeptime="$2" 1167 shift 2 1168 else 1169 sleeptime=1000 1170 fi 1171 kill -HUP $(cat $RSYSLOG_PIDBASE$1.pid) 1172 printf 'HUP issued to pid %d\n' $(cat $RSYSLOG_PIDBASE$1.pid) 1173 $TESTTOOL_DIR/msleep $sleeptime 1174} 1175 1176 1177# actually, we wait for rsyslog.pid to be deleted. $1 is the instance 1178wait_shutdown_vg() { 1179 wait $(cat $RSYSLOG_PIDBASE$1.pid) 1180 export RSYSLOGD_EXIT=$? 1181 echo rsyslogd run exited with $RSYSLOGD_EXIT 1182 1183 if [ "$(ls vgcore.* 2>/dev/null)" != "" ]; then 1184 printf 'ABORT! core file exists:\n' 1185 ls -l vgcore.* 1186 error_exit 1 1187 fi 1188 if [ "$USE_VALGRIND" == "YES" ] || [ "$USE_VALGRIND" == "YES-NOLEAK" ]; then 1189 check_exit_vg 1190 fi 1191} 1192 1193check_file_exists() { 1194 if [ ! -f "$1" ]; then 1195 printf 'FAIL: file "%s" must exist, but does not\n' "$1" 1196 error_exit 1 1197 fi 1198} 1199 1200check_file_not_exists() { 1201 if [ -f "$1" ]; then 1202 printf 'FILE %s CONTENT:\n' "$1" 1203 cat -n -- "$1" 1204 printf 'FAIL: file "%s" must NOT exist, but does\n' "$1" 1205 error_exit 1 1206 fi 1207} 1208 1209# check exit code for valgrind error 1210check_exit_vg(){ 1211 if [ "$RSYSLOGD_EXIT" -eq "10" ]; then 1212 printf 'valgrind run FAILED with exceptions - terminating\n' 1213 error_exit 1 1214 fi 1215} 1216 1217 1218# do cleanup during exit processing 1219do_cleanup() { 1220 if [ "$(type -t test_error_exit_handler)" == "function" ]; then 1221 test_error_exit_handler 1222 fi 1223 1224 if [ -f $RSYSLOG_PIDBASE.pid ]; then 1225 printf 'rsyslog pid file still exists, trying to shutdown...\n' 1226 shutdown_immediate "" 1227 fi 1228 if [ -f ${RSYSLOG_PIDBASE}1.pid ]; then 1229 printf 'rsyslog pid file still exists, trying to shutdown...\n' 1230 shutdown_immediate 1 1231 fi 1232 if [ -f ${RSYSLOG_PIDBASE}2.pid ]; then 1233 printf 'rsyslog pid file still exists, trying to shutdown...\n' 1234 shutdown_immediate 2 1235 fi 1236} 1237 1238 1239# this is called if we had an error and need to abort. Here, we 1240# try to gather as much information as possible. That's most important 1241# for systems like Travis-CI where we cannot debug on the machine itself. 1242# our $1 is the to-be-used exit code. if $2 is "stacktrace", call gdb. 1243# 1244# NOTE: if a function test_error_exit_handler is defined, error_exit will 1245# call it immediately before termination. This may be used to cleanup 1246# some things or emit additional diagnostic information. 1247error_exit() { 1248 if [ $1 -eq $TB_ERR_TIMEOUT ]; then 1249 printf '%s Test %s exceeded max runtime of %d seconds\n' "$(tb_timestamp)" "$0" $TB_TEST_MAX_RUNTIME 1250 fi 1251 if [ "$(ls core* 2>/dev/null)" != "" ]; then 1252 echo trying to obtain crash location info 1253 echo note: this may not be the correct file, check it 1254 CORE=$(ls core*) 1255 echo "bt" >> gdb.in 1256 echo "q" >> gdb.in 1257 gdb ../tools/rsyslogd $CORE -batch -x gdb.in 1258 CORE= 1259 rm gdb.in 1260 fi 1261 if [[ "$2" == 'stacktrace' || ( ! -e IN_AUTO_DEBUG && "$USE_AUTO_DEBUG" == 'on' ) ]]; then 1262 if [ "$(ls core* 2>/dev/null)" != "" ]; then 1263 CORE=$(ls core*) 1264 printf 'trying to analyze core "%s" for main rsyslogd binary\n' "$CORE" 1265 printf 'note: this may not be the correct file, check it\n' 1266 $SUDO gdb ../tools/rsyslogd "$CORE" -batch <<- EOF 1267 bt 1268 echo === THREAD INFO === 1269 info thread 1270 echo === thread apply all bt full === 1271 thread apply all bt full 1272 q 1273 EOF 1274 $SUDO gdb ./tcpflood "$CORE" -batch <<- EOF 1275 bt 1276 echo === THREAD INFO === 1277 info thread 1278 echo === thread apply all bt full === 1279 thread apply all bt full 1280 q 1281 EOF 1282 fi 1283 fi 1284 if [[ ! -e IN_AUTO_DEBUG && "$USE_AUTO_DEBUG" == 'on' ]]; then 1285 touch IN_AUTO_DEBUG 1286 # OK, we have the testname and can re-run under valgrind 1287 echo re-running under valgrind control 1288 current_test="./$(basename $0)" # this path is probably wrong -- theinric 1289 $current_test 1290 # wait a little bit so that valgrind can finish 1291 $TESTTOOL_DIR/msleep 4000 1292 # next let's try us to get a debug log 1293 RSYSLOG_DEBUG_SAVE=$RSYSLOG_DEBUG 1294 export RSYSLOG_DEBUG="debug nologfuncflow noprintmutexaction" 1295 $current_test 1296 $TESTTOOL_DIR/msleep 4000 1297 RSYSLOG_DEBUG=$RSYSLOG_DEBUG_SAVE 1298 rm IN_AUTO_DEBUG 1299 fi 1300 # output listening ports as a temporary debug measure (2018-09-08 rgerhards), now disables, but not yet removed (2018-10-22) 1301 #if [ $(uname) == "Linux" ]; then 1302 # netstat -tlp 1303 #else 1304 # netstat 1305 #fi 1306 1307 # Extended debug output for dependencies started by testbench 1308 if [ "$EXTRA_EXITCHECK" == 'dumpkafkalogs' ] && [ "$TEST_OUTPUT" == "VERBOSE" ]; then 1309 # Dump Zookeeper log 1310 dump_zookeeper_serverlog 1311 # Dump Kafka log 1312 dump_kafka_serverlog 1313 fi 1314 1315 # Extended Exit handling for kafka / zookeeper instances 1316 kafka_exit_handling "false" 1317 1318 error_stats $1 # Report error to rsyslog testbench stats 1319 do_cleanup 1320 1321 exitval=$1 1322 if [ "$TEST_STATUS" == "unreliable" ] && [ "$1" -ne 100 ]; then 1323 # TODO: log github issue 1324 printf 'Test flagged as unreliable, exiting with SKIP. Original exit code was %d\n' "$1" 1325 printf 'GitHub ISSUE: %s\n' "$TEST_GITHUB_ISSUE" 1326 exitval=77 1327 else 1328 if [ "$1" -eq 177 ]; then 1329 exitval=77 1330 fi 1331 fi 1332 printf '%s FAIL: Test %s (took %s seconds)\n' "$(tb_timestamp)" "$0" "$(( $(date +%s) - TB_STARTTEST ))" 1333 if [ $exitval -ne 77 ]; then 1334 echo $0 > testbench_test_failed_rsyslog 1335 fi 1336 exit $exitval 1337} 1338 1339 1340# skip a test; do cleanup when we detect it is necessary 1341skip_test(){ 1342 do_cleanup 1343 exit 77 1344} 1345 1346 1347# Helper function to call rsyslog project test error script 1348# $1 is the exit code 1349error_stats() { 1350 if [ "$RSYSLOG_STATSURL" == "" ]; then 1351 printf 'not reporting failure as RSYSLOG_STATSURL is not set\n' 1352 else 1353 echo reporting failure to $RSYSLOG_STATSURL 1354 testname=$($PYTHON $srcdir/urlencode.py "$RSYSLOG_TESTNAME") 1355 testenv=$($PYTHON $srcdir/urlencode.py "${VCS_SLUG:-$PWD}") 1356 testmachine=$($PYTHON $srcdir/urlencode.py "$HOSTNAME") 1357 logurl=$($PYTHON $srcdir/urlencode.py "${CI_BUILD_URL:-}") 1358 wget -nv -O/dev/null $RSYSLOG_STATSURL\?Testname=$testname\&Testenv=$testenv\&Testmachine=$testmachine\&exitcode=${1:-1}\&logurl=$logurl\&rndstr=jnxv8i34u78fg23 1359 fi 1360} 1361 1362# do the usual sequence check to see if everything was properly received. 1363# $4... are just to have the ability to pass in more options... 1364# add -v to chkseq if you need more verbose output 1365# argument --check-only can be used to simply do a check without abort in fail case 1366# env var SEQ_CHECK_FILE permits to override file name to check 1367# env var SEQ_CHECK_OPTIONS provide the ability to add extra options for check program 1368seq_check() { 1369 if [ "$SEQ_CHECK_FILE" == "" ]; then 1370 SEQ_CHECK_FILE="$RSYSLOG_OUT_LOG" 1371 fi 1372 if [ "$1" == "--check-only" ]; then 1373 check_only="YES" 1374 shift 1375 else 1376 check_only="NO" 1377 fi 1378 if [ "$1" == "" ]; then 1379 if [ "$NUMMESSAGES" == "" ]; then 1380 printf 'FAIL: seq_check called without parameters but NUMMESSAGES is unset!\n' 1381 error_exit 100 1382 fi 1383 # use default parameters 1384 startnum=0 1385 endnum=$(( NUMMESSAGES - 1 )) 1386 else 1387 startnum=$1 1388 endnum=$2 1389 fi 1390 if [ ! -f "$SEQ_CHECK_FILE" ]; then 1391 if [ "$check_only" == "YES" ]; then 1392 return 1 1393 fi 1394 printf 'FAIL: %s does not exist in seq_check!\n' "$SEQ_CHECK_FILE" 1395 error_exit 1 1396 fi 1397 if [ "${SEQ_CHECK_FILE##*.}" == "gz" ]; then 1398 gunzip -c "${SEQ_CHECK_FILE}" | $RS_SORTCMD $RS_SORT_NUMERIC_OPT | ./chkseq -s$startnum -e$endnum $3 $4 $5 $6 $7 $SEQ_CHECK_OPTIONS 1399 else 1400 $RS_SORTCMD $RS_SORT_NUMERIC_OPT < "${SEQ_CHECK_FILE}" | ./chkseq -s$startnum -e$endnum $3 $4 $5 $6 $7 $SEQ_CHECK_OPTIONS 1401 fi 1402 ret=$? 1403 if [ "$check_only" == "YES" ]; then 1404 return $ret 1405 fi 1406 if [ $ret -ne 0 ]; then 1407 if [ "${SEQ_CHECK_FILE##*.}" == "gz" ]; then 1408 gunzip -c "${SEQ_CHECK_FILE}" | $RS_SORTCMD $RS_SORT_NUMERIC_OPT \ 1409 | ./chkseq -s$startnum -e$endnum $3 $4 $5 $6 $7 $SEQ_CHECK_OPTIONS \ 1410 > $RSYSLOG_DYNNAME.error.log 1411 else 1412 $RS_SORTCMD $RS_SORT_NUMERIC_OPT < ${SEQ_CHECK_FILE} \ 1413 > $RSYSLOG_DYNNAME.error.log 1414 fi 1415 echo "sequence error detected in $SEQ_CHECK_FILE" 1416 echo "number of lines in file: $(wc -l $SEQ_CHECK_FILE)" 1417 echo "sorted data has been placed in error.log, first 10 lines are:" 1418 cat -n "$RSYSLOG_DYNNAME.error.log" | head -10 1419 echo "---last 10 lines are:" 1420 cat -n "$RSYSLOG_DYNNAME.error.log" | tail -10 1421 echo "UNSORTED data, first 10 lines are:" 1422 cat -n "$RSYSLOG_DYNNAME.error.log" | head -10 1423 echo "---last 10 lines are:" 1424 cat -n "$RSYSLOG_DYNNAME.error.log" | tail -10 1425 # for interactive testing, create a static filename. We know this may get 1426 # mangled during a parallel test run 1427 mv -f $RSYSLOG_DYNNAME.error.log error.log 1428 error_exit 1 1429 fi 1430 return 0 1431} 1432 1433 1434# do the usual sequence check to see if everything was properly received. This is 1435# a duplicateof seq-check, but we could not change its calling conventions without 1436# breaking a lot of exitings test cases, so we preferred to duplicate the code here. 1437# $4... are just to have the ability to pass in more options... 1438# add -v to chkseq if you need more verbose output 1439seq_check2() { 1440 $RS_SORTCMD $RS_SORT_NUMERIC_OPT < ${RSYSLOG2_OUT_LOG} | ./chkseq -s$1 -e$2 $3 $4 $5 $6 $7 1441 if [ "$?" -ne "0" ]; then 1442 echo "sequence error detected" 1443 error_exit 1 1444 fi 1445} 1446 1447 1448# do the usual sequence check, but for gzip files 1449# $4... are just to have the ability to pass in more options... 1450gzip_seq_check() { 1451 if [ "$1" == "" ]; then 1452 if [ "$NUMMESSAGES" == "" ]; then 1453 printf 'FAIL: gzip_seq_check called without parameters but NUMMESSAGES is unset!\n' 1454 error_exit 100 1455 fi 1456 # use default parameters 1457 startnum=0 1458 endnum=$(( NUMMESSAGES - 1 )) 1459 else 1460 startnum=$1 1461 endnum=$2 1462 fi 1463 ls -l ${RSYSLOG_OUT_LOG} 1464 gunzip < ${RSYSLOG_OUT_LOG} | $RS_SORTCMD $RS_SORT_NUMERIC_OPT | ./chkseq -v -s$startnum -e$endnum $3 $4 $5 $6 $7 1465 if [ "$?" -ne "0" ]; then 1466 echo "sequence error detected" 1467 error_exit 1 1468 fi 1469} 1470 1471 1472# do a tcpflood run and check if it worked params are passed to tcpflood 1473tcpflood() { 1474 if [ "$1" == "--check-only" ]; then 1475 check_only="yes" 1476 shift 1477 else 1478 check_only="no" 1479 fi 1480 eval ./tcpflood -p$TCPFLOOD_PORT "$@" $TCPFLOOD_EXTRA_OPTS 1481 res=$? 1482 if [ "$check_only" == "yes" ]; then 1483 if [ "$res" -ne "0" ]; then 1484 echo "error during tcpflood on port ${TCPFLOOD_PORT}! But test continues..." 1485 fi 1486 return 0 1487 else 1488 if [ "$res" -ne "0" ]; then 1489 echo "error during tcpflood on port ${TCPFLOOD_PORT}! see ${RSYSLOG_OUT_LOG}.save for what was written" 1490 cp ${RSYSLOG_OUT_LOG} ${RSYSLOG_OUT_LOG}.save 1491 error_exit 1 stacktrace 1492 fi 1493 fi 1494} 1495 1496 1497# cleanup 1498# detect any left-over hanging instance 1499exit_test() { 1500 nhanging=0 1501 #for pid in $(ps -eo pid,args|grep '/tools/[r]syslogd ' |sed -e 's/\( *\)\([0-9]*\).*/\2/'); 1502 #do 1503 #echo "ERROR: left-over instance $pid, killing it" 1504 # ps -fp $pid 1505 # pwd 1506 # printf "we do NOT kill the instance as this does not work with multiple\n" 1507 # printf "builds per machine - this message is now informational to show prob exists!\n" 1508 # #kill -9 $pid 1509 # let "nhanging++" 1510 #done 1511 if test $nhanging -ne 0 1512 then 1513 echo "ABORT! hanging instances left at exit" 1514 #error_exit 1 1515 #exit 77 # for now, just skip - TODO: reconsider when supporting -j 1516 fi 1517 # now real cleanup 1518 rm -f rsyslog.action.*.include 1519 rm -f work rsyslog.out.* xlate*.lkp_tbl 1520 rm -rf test-logdir stat-file1 1521 rm -f rsyslog.conf.tlscert stat-file1 rsyslog.empty imfile-state:* 1522 rm -f ${TESTCONF_NM}.conf 1523 rm -f tmp.qi nocert 1524 rm -fr $RSYSLOG_DYNNAME* # delete all of our dynamic files 1525 unset TCPFLOOD_EXTRA_OPTS 1526 1527 # Extended Exit handling for kafka / zookeeper instances 1528 kafka_exit_handling "true" 1529 1530 printf '%s Test %s SUCCESSFUL (took %s seconds)\n' "$(tb_timestamp)" "$0" "$(( $(date +%s) - TB_STARTTEST ))" 1531 echo ------------------------------------------------------------------------------- 1532 exit 0 1533} 1534 1535# finds a free port that we can bind a listener to 1536# Obviously, any solution is race as another process could start 1537# just after us and grab the same port. However, in practice it seems 1538# to work pretty well. In any case, we should probably call this as 1539# late as possible before the usage of the port. 1540get_free_port() { 1541$PYTHON -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1]); s.close()' 1542} 1543 1544 1545# return the inode number of file $1; file MUST exist 1546get_inode() { 1547 if [ ! -f "$1" ]; then 1548 printf 'FAIL: file "%s" does not exist in get_inode\n' "$1" 1549 error_exit 100 1550 fi 1551 $PYTHON -c 'import os; import stat; print(os.lstat("'$1'")[stat.ST_INO])' 1552} 1553 1554 1555# check that logger supports -d option, if not skip test 1556# right now this is a bit dirty, we check distros which do not support it 1557check_logger_has_option_d() { 1558 skip_platform "FreeBSD" "We need logger -d option, which we do not have on FreeBSD" 1559 skip_platform "SunOS" "We need logger -d option, which we do not have on (all flavors of) Solaris" 1560 1561 # check also the case for busybox 1562 logger --help 2>&1 | head -n1 | grep -q BusyBox 1563 if [ $? -eq 0 ]; then 1564 echo "We need logger -d option, which we do not have have on Busybox" 1565 exit 77 1566 fi 1567} 1568 1569 1570require_relpEngineSetTLSLibByName() { 1571 ./have_relpEngineSetTLSLibByName 1572 if [ $? -eq 1 ]; then 1573 echo "relpEngineSetTLSLibByName API not available. Test stopped" 1574 exit 77 1575 fi; 1576} 1577 1578require_relpEngineVersion() { 1579 if [ "$1" == "" ]; then 1580 echo "require_relpEngineVersion missing required parameter (minimum version required)" 1581 exit 1 1582 else 1583 ./check_relpEngineVersion $1 1584 if [ $? -eq 1 ]; then 1585 echo "relpEngineVersion too OLD. Test stopped" 1586 exit 77 1587 fi; 1588 fi 1589} 1590 1591 1592# check if command $1 is available - will exit 77 when not OK 1593check_command_available() { 1594 have_cmd=0 1595 if [ "$1" == "timeout" ]; then 1596 if timeout --version &>/dev/null ; then 1597 have_cmd=1 1598 fi 1599 else 1600 command -v $1 1601 if [ $? -eq 0 ]; then 1602 have_cmd=1 1603 fi 1604 fi 1605 if [ $have_cmd -eq 0 ] ; then 1606 printf 'Testbench requires unavailable command: %s\n' "$1" 1607 exit 77 # do NOT error_exit here! 1608 fi 1609} 1610 1611 1612# sort the output file just like we normally do it, but do not call 1613# seqchk. This is needed for some operations where we need the sort 1614# result for some preprocessing. Note that a later seqchk will sort 1615# again, but that's not an issue. 1616presort() { 1617 rm -f $RSYSLOG_DYNNAME.presort 1618 $RS_SORTCMD $RS_SORT_NUMERIC_OPT < ${RSYSLOG_OUT_LOG} > $RSYSLOG_DYNNAME.presort 1619} 1620 1621 1622#START: ext kafka config 1623#dep_cache_dir=$(readlink -f .dep_cache) 1624export RS_ZK_DOWNLOAD=apache-zookeeper-3.6.3-bin.tar.gz 1625dep_cache_dir=$(pwd)/.dep_cache 1626dep_zk_url=https://downloads.apache.org/zookeeper/zookeeper-3.6.3/$RS_ZK_DOWNLOAD 1627dep_zk_cached_file=$dep_cache_dir/$RS_ZK_DOWNLOAD 1628 1629export RS_KAFKA_DOWNLOAD=kafka_2.13-2.8.0.tgz 1630dep_kafka_url="https://www.rsyslog.com/files/download/rsyslog/$RS_KAFKA_DOWNLOAD" 1631dep_kafka_cached_file=$dep_cache_dir/$RS_KAFKA_DOWNLOAD 1632 1633if [ -z "$ES_DOWNLOAD" ]; then 1634 export ES_DOWNLOAD=elasticsearch-7.14.1-linux-x86_64.tar.gz #elasticsearch-5.6.9.tar.gz 1635fi 1636if [ -z "$ES_PORT" ]; then 1637 export ES_PORT=19200 1638fi 1639dep_es_cached_file="$dep_cache_dir/$ES_DOWNLOAD" 1640 1641# kafka (including Zookeeper) 1642dep_kafka_dir_xform_pattern='s#^[^/]\+#kafka#g' 1643dep_zk_dir_xform_pattern='s#^[^/]\+#zk#g' 1644dep_es_dir_xform_pattern='s#^[^/]\+#es#g' 1645#dep_kafka_log_dump=$(readlink -f rsyslog.out.kafka.log) 1646 1647# TODO Make dynamic work dir for multiple instances 1648#dep_work_dir=$(readlink -f .dep_wrk) 1649dep_work_dir=$(pwd)/.dep_wrk 1650#dep_kafka_work_dir=$dep_work_dir/kafka 1651#dep_zk_work_dir=$dep_work_dir/zk 1652 1653#END: ext kafka config 1654 1655kafka_exit_handling() { 1656 1657 # Extended Exit handling for kafka / zookeeper instances 1658 if [[ "$EXTRA_EXIT" == 'kafka' ]]; then 1659 1660 echo "stop kafka instance" 1661 stop_kafka '.dep_wrk' $1 1662 1663 echo "stop zookeeper instance" 1664 stop_zookeeper '.dep_wrk' $1 1665 fi 1666 1667 # Extended Exit handling for kafka / zookeeper instances 1668 if [[ "$EXTRA_EXIT" == 'kafkamulti' ]]; then 1669 echo "stop kafka instances" 1670 stop_kafka '.dep_wrk1' $1 1671 stop_kafka '.dep_wrk2' $1 1672 stop_kafka '.dep_wrk3' $1 1673 1674 echo "stop zookeeper instances" 1675 stop_zookeeper '.dep_wrk1' $1 1676 stop_zookeeper '.dep_wrk2' $1 1677 stop_zookeeper '.dep_wrk3' $1 1678 fi 1679} 1680 1681download_kafka() { 1682 if [ ! -d $dep_cache_dir ]; then 1683 echo "Creating dependency cache dir $dep_cache_dir" 1684 mkdir $dep_cache_dir 1685 fi 1686 if [ ! -f $dep_zk_cached_file ]; then 1687 if [ -f /local_dep_cache/$RS_ZK_DOWNLOAD ]; then 1688 printf 'Zookeeper: satisfying dependency %s from system cache.\n' "$RS_ZK_DOWNLOAD" 1689 cp /local_dep_cache/$RS_ZK_DOWNLOAD $dep_zk_cached_file 1690 else 1691 echo "Downloading zookeeper" 1692 echo wget -q $dep_zk_url -O $dep_zk_cached_file 1693 wget -q $dep_zk_url -O $dep_zk_cached_file 1694 if [ $? -ne 0 ] 1695 then 1696 echo error during wget, retry: 1697 wget $dep_zk_url -O $dep_zk_cached_file 1698 if [ $? -ne 0 ] 1699 then 1700 error_exit 1 1701 fi 1702 fi 1703 fi 1704 fi 1705 if [ ! -f $dep_kafka_cached_file ]; then 1706 if [ -f /local_dep_cache/$RS_KAFKA_DOWNLOAD ]; then 1707 printf 'Kafka: satisfying dependency %s from system cache.\n' "$RS_KAFKA_DOWNLOAD" 1708 cp /local_dep_cache/$RS_KAFKA_DOWNLOAD $dep_kafka_cached_file 1709 else 1710 echo "Downloading kafka" 1711 wget -q $dep_kafka_url -O $dep_kafka_cached_file 1712 if [ $? -ne 0 ] 1713 then 1714 echo error during wget, retry: 1715 wget $dep_kafka_url -O $dep_kafka_cached_file 1716 if [ $? -ne 0 ] 1717 then 1718 rm $dep_kafka_cached_file # a 0-size file may be left over 1719 error_exit 1 1720 fi 1721 fi 1722 fi 1723 fi 1724} 1725 1726stop_kafka() { 1727 if [ "$KEEP_KAFKA_RUNNING" == "YES" ]; then 1728 return 1729 fi 1730 i=0 1731 if [ "x$1" == "x" ]; then 1732 dep_work_dir=$(readlink -f .dep_wrk) 1733 dep_work_kafka_config="kafka-server.properties" 1734 else 1735 dep_work_dir=$(readlink -f $srcdir/$1) 1736 if [[ ".dep_wrk" != "$1" ]]; then 1737 dep_work_kafka_config="kafka-server$1.properties" 1738 else 1739 dep_work_kafka_config="kafka-server.properties" 1740 fi 1741 fi 1742 if [ ! -d $dep_work_dir/kafka ]; then 1743 echo "Kafka work-dir $dep_work_dir/kafka does not exist, no action needed" 1744 else 1745 # shellcheck disable=SC2009 - we do not grep on the process name! 1746 kafkapid=$(ps aux | grep -i $dep_work_kafka_config | grep java | grep -v grep | awk '{print $2}') 1747 1748 echo "Stopping Kafka instance $1 ($dep_work_kafka_config/$kafkapid)" 1749 kill $kafkapid 1750 1751 # Check if kafka instance went down! 1752 while true; do 1753 # shellcheck disable=SC2009 - we do not grep on the process name! 1754 kafkapid=$(ps aux | grep -i $dep_work_kafka_config | grep java | grep -v grep | awk '{print $2}') 1755 if [[ "" != "$kafkapid" ]]; then 1756 $TESTTOOL_DIR/msleep 100 # wait 100 milliseconds 1757 if test $i -gt $TB_TIMEOUT_STARTSTOP; then 1758 echo "Kafka instance $dep_work_kafka_config (PID $kafkapid) still running - Performing hard shutdown (-9)" 1759 kill -9 $kafkapid 1760 break 1761 fi 1762 (( i++ )) 1763 else 1764 # Break the loop 1765 break 1766 fi 1767 done 1768 1769 if [[ "$2" == 'true' ]]; then 1770 # Process shutdown, do cleanup now 1771 cleanup_kafka $1 1772 fi 1773 fi 1774} 1775 1776cleanup_kafka() { 1777 if [ "x$1" == "x" ]; then 1778 dep_work_dir=$(readlink -f .dep_wrk) 1779 else 1780 dep_work_dir=$(readlink -f $srcdir/$1) 1781 fi 1782 if [ ! -d $dep_work_dir/kafka ]; then 1783 echo "Kafka work-dir $dep_work_dir/kafka does not exist, no action needed" 1784 else 1785 echo "Cleanup Kafka instance $1" 1786 rm -rf $dep_work_dir/kafka 1787 fi 1788} 1789 1790stop_zookeeper() { 1791 if [ "$KEEP_KAFKA_RUNNING" == "YES" ]; then 1792 return 1793 fi 1794 i=0 1795 if [ "x$1" == "x" ]; then 1796 dep_work_dir=$(readlink -f .dep_wrk) 1797 dep_work_tk_config="zoo.cfg" 1798 else 1799 dep_work_dir=$(readlink -f $srcdir/$1) 1800 if [[ ".dep_wrk" != "$1" ]]; then 1801 dep_work_tk_config="zoo$1.cfg" 1802 else 1803 dep_work_tk_config="zoo.cfg" 1804 fi 1805 fi 1806 1807 if [ ! -d $dep_work_dir/zk ]; then 1808 echo "Zookeeper work-dir $dep_work_dir/zk does not exist, no action needed" 1809 else 1810 # Get Zookeeper pid instance 1811 zkpid=$(ps aux | grep -i $dep_work_tk_config | grep java | grep -v grep | awk '{print $2}') 1812 echo "Stopping Zookeeper instance $1 ($dep_work_tk_config/$zkpid)" 1813 kill $zkpid 1814 1815 # Check if Zookeeper instance went down! 1816 zkpid=$(ps aux | grep -i $dep_work_tk_config | grep java | grep -v grep | awk '{print $2}') 1817 if [[ "" != "$zkpid" ]]; then 1818 while true; do 1819 zkpid=$(ps aux | grep -i $dep_work_tk_config | grep java | grep -v grep | awk '{print $2}') 1820 if [[ "" != "$zkpid" ]]; then 1821 $TESTTOOL_DIR/msleep 100 # wait 100 milliseconds 1822 if test $i -gt $TB_TIMEOUT_STARTSTOP; then 1823 echo "Zookeeper instance $dep_work_tk_config (PID $zkpid) still running - Performing hard shutdown (-9)" 1824 kill -9 $zkpid 1825 break 1826 fi 1827 (( i++ )) 1828 else 1829 break 1830 fi 1831 done 1832 fi 1833 1834 if [[ "$2" == 'true' ]]; then 1835 # Process shutdown, do cleanup now 1836 cleanup_zookeeper $1 1837 fi 1838 rm "$ZOOPIDFILE" 1839 fi 1840} 1841 1842cleanup_zookeeper() { 1843 if [ "x$1" == "x" ]; then 1844 dep_work_dir=$(readlink -f .dep_wrk) 1845 else 1846 dep_work_dir=$(readlink -f $srcdir/$1) 1847 fi 1848 rm -rf $dep_work_dir/zk 1849} 1850 1851start_zookeeper() { 1852 if [ "$KEEP_KAFKA_RUNNING" == "YES" ] && [ -f "$ZOOPIDFILE" ]; then 1853 if kill -0 "$(cat "$ZOOPIDFILE")"; then 1854 printf 'zookeeper already running, no need to start\n' 1855 return 1856 else 1857 printf 'INFO: zookeeper pidfile %s exists, but zookeeper not running\n' "$ZOOPIDFILE" 1858 printf 'deleting pid file\n' 1859 rm -f "$ZOOPIDFILE" 1860 fi 1861 fi 1862 if [ "x$1" == "x" ]; then 1863 dep_work_dir=$(readlink -f .dep_wrk) 1864 dep_work_tk_config="zoo.cfg" 1865 else 1866 dep_work_dir=$(readlink -f $srcdir/$1) 1867 dep_work_tk_config="zoo$1.cfg" 1868 fi 1869 1870 if [ ! -f $dep_zk_cached_file ]; then 1871 echo "Dependency-cache does not have zookeeper package, did you download dependencies?" 1872 error_exit 77 1873 fi 1874 if [ ! -d $dep_work_dir ]; then 1875 echo "Creating dependency working directory" 1876 mkdir -p $dep_work_dir 1877 fi 1878 if [ -d $dep_work_dir/zk ]; then 1879 (cd $dep_work_dir/zk && ./bin/zkServer.sh stop) 1880 $TESTTOOL_DIR/msleep 2000 1881 fi 1882 rm -rf $dep_work_dir/zk 1883 (cd $dep_work_dir && tar -zxvf $dep_zk_cached_file --xform $dep_zk_dir_xform_pattern --show-transformed-names) > /dev/null 1884 cp -f $srcdir/testsuites/$dep_work_tk_config $dep_work_dir/zk/conf/zoo.cfg 1885 echo "Starting Zookeeper instance $1" 1886 (cd $dep_work_dir/zk && ./bin/zkServer.sh start) 1887 wait_startup_pid "$ZOOPIDFILE" 1888} 1889 1890start_kafka() { 1891 printf '%s starting kafka\n' "$(tb_timestamp)" 1892 1893 # Force IPv4 usage of Kafka! 1894 export KAFKA_HEAP_OPTS="-Xms256m -Xmx256m" # we need to take care for smaller CI systems! 1895 export KAFKA_OPTS="-Djava.net.preferIPv4Stack=True" 1896 if [ "x$1" == "x" ]; then 1897 dep_work_dir=$(readlink -f .dep_wrk) 1898 dep_work_kafka_config="kafka-server.properties" 1899 else 1900 dep_work_dir=$(readlink -f $1) 1901 dep_work_kafka_config="kafka-server$1.properties" 1902 fi 1903 1904 # shellcheck disable=SC2009 - we do not grep on the process name! 1905 kafkapid=$(ps aux | grep -i $dep_work_kafka_config | grep java | grep -v grep | awk '{print $2}') 1906 if [ "$KEEP_KAFKA_RUNNING" == "YES" ] && [ "$kafkapid" != "" ]; then 1907 printf 'kafka already running, no need to start\n' 1908 return 1909 fi 1910 1911 if [ ! -f $dep_kafka_cached_file ]; then 1912 echo "Dependency-cache does not have kafka package, did you download dependencies?" 1913 error_exit 77 1914 fi 1915 if [ ! -d $dep_work_dir ]; then 1916 echo "Creating dependency working directory" 1917 mkdir -p $dep_work_dir 1918 fi 1919 rm -rf $dep_work_dir/kafka 1920 ( cd $dep_work_dir && 1921 tar -zxvf $dep_kafka_cached_file --xform $dep_kafka_dir_xform_pattern --show-transformed-names) > /dev/null 1922 cp -f $srcdir/testsuites/$dep_work_kafka_config $dep_work_dir/kafka/config/ 1923 #if [ "$(ps aux | grep -i $dep_work_kafka_config | grep java | grep -v grep | awk '{print $2}')" != "" ]; then 1924 echo "Starting Kafka instance $dep_work_kafka_config" 1925 (cd $dep_work_dir/kafka && ./bin/kafka-server-start.sh -daemon ./config/$dep_work_kafka_config) 1926 $TESTTOOL_DIR/msleep 4000 1927 1928 # Check if kafka instance came up! 1929 # shellcheck disable=SC2009 - we do not grep on the process name! 1930 kafkapid=$(ps aux | grep -i $dep_work_kafka_config | grep java | grep -v grep | awk '{print $2}') 1931 if [[ "" != "$kafkapid" ]]; 1932 then 1933 echo "Kafka instance $dep_work_kafka_config (PID $kafkapid) started ... " 1934 else 1935 echo "Starting Kafka instance $dep_work_kafka_config, SECOND ATTEMPT!" 1936 (cd $dep_work_dir/kafka && ./bin/kafka-server-start.sh -daemon ./config/$dep_work_kafka_config) 1937 $TESTTOOL_DIR/msleep 4000 1938 1939 # shellcheck disable=SC2009 - we do not grep on the process name! 1940 kafkapid=$(ps aux | grep -i $dep_work_kafka_config | grep java | grep -v grep | awk '{print $2}') 1941 if [[ "" != "$kafkapid" ]]; 1942 then 1943 echo "Kafka instance $dep_work_kafka_config (PID $kafkapid) started ... " 1944 else 1945 echo "Failed to start Kafka instance for $dep_work_kafka_config" 1946 echo "displaying all kafka logs now:" 1947 for logfile in $dep_work_dir/logs/*; do 1948 echo "FILE: $logfile" 1949 cat $logfile 1950 done 1951 error_exit 77 1952 fi 1953 fi 1954} 1955 1956create_kafka_topic() { 1957 if [ "x$2" == "x" ]; then 1958 dep_work_dir=$(readlink -f .dep_wrk) 1959 else 1960 dep_work_dir=$(readlink -f $2) 1961 fi 1962 if [ "x$3" == "x" ]; then 1963 dep_work_port='2181' 1964 else 1965 dep_work_port=$3 1966 fi 1967 if [ ! -d $dep_work_dir/kafka ]; then 1968 echo "Kafka work-dir $dep_work_dir/kafka does not exist, did you start kafka?" 1969 exit 1 1970 fi 1971 if [ "x$1" == "x" ]; then 1972 echo "Topic-name not provided." 1973 exit 1 1974 fi 1975 1976 # we need to make sure replication has is working. So let's loop until no more 1977 # errors (or timeout) - see also: https://github.com/rsyslog/rsyslog/issues/3045 1978 timeout_ready=100 # roughly 10 sec 1979 is_retry=0 1980 i=0 1981 while true; do 1982 text=$(cd $dep_work_dir/kafka && ./bin/kafka-topics.sh --zookeeper localhost:$dep_work_port/kafka --create --topic $1 --replication-factor 1 --partitions 2 ) 1983 grep "Error.* larger than available brokers: 0" <<<"$text" 1984 res=$? 1985 if [ $res -ne 0 ]; then 1986 echo looks like brokers are available - continue... 1987 break 1988 fi 1989 $TESTTOOL_DIR/msleep 100 # wait 100 milliseconds 1990 (( i++ )) 1991 if test $i -gt $timeout_ready; then 1992 echo "ENV ERROR: kafka brokers did not come up:" 1993 cat -n <<< $text 1994 if [ $is_retry == 1 ]; then 1995 echo "SKIPing test as the env is not ready for it" 1996 exit 177 1997 fi 1998 echo "RETRYING kafka startup, doing shutdown and startup" 1999 stop_zookeeper ""; stop_kafka "" 2000 start_zookeeper ""; start_kafka "" 2001 echo "READY for RETRY" 2002 is_retry=1 2003 i=0 2004 fi 2005 done 2006 2007 # we *assume* now all goes well 2008 (cd $dep_work_dir/kafka && ./bin/kafka-topics.sh --zookeeper localhost:$dep_work_port/kafka --alter --topic $1 --delete-config retention.ms) 2009 (cd $dep_work_dir/kafka && ./bin/kafka-topics.sh --zookeeper localhost:$dep_work_port/kafka --alter --topic $1 --delete-config retention.bytes) 2010} 2011 2012delete_kafka_topic() { 2013 if [ "x$2" == "x" ]; then 2014 dep_work_dir=$(readlink -f .dep_wrk) 2015 else 2016 dep_work_dir=$(readlink -f $srcdir/$2) 2017 fi 2018 if [ "x$3" == "x" ]; then 2019 dep_work_port='2181' 2020 else 2021 dep_work_port=$3 2022 fi 2023 2024 echo "deleting kafka-topic $1" 2025 (cd $dep_work_dir/kafka && ./bin/kafka-topics.sh --delete --zookeeper localhost:$dep_work_port/kafka --topic $1) 2026} 2027 2028dump_kafka_topic() { 2029 if [ "x$2" == "x" ]; then 2030 dep_work_dir=$(readlink -f .dep_wrk) 2031 dep_kafka_log_dump=$(readlink -f rsyslog.out.kafka.log) 2032 else 2033 dep_work_dir=$(readlink -f $srcdir/$2) 2034 dep_kafka_log_dump=$(readlink -f rsyslog.out.kafka$2.log) 2035 fi 2036 if [ "x$3" == "x" ]; then 2037 dep_work_port='2181' 2038 else 2039 dep_work_port=$3 2040 fi 2041 2042 echo "dumping kafka-topic $1" 2043 if [ ! -d $dep_work_dir/kafka ]; then 2044 echo "Kafka work-dir does not exist, did you start kafka?" 2045 exit 1 2046 fi 2047 if [ "x$1" == "x" ]; then 2048 echo "Topic-name not provided." 2049 exit 1 2050 fi 2051 2052 (cd $dep_work_dir/kafka && ./bin/kafka-console-consumer.sh --timeout-ms 2000 --from-beginning --zookeeper localhost:$dep_work_port/kafka --topic $1 > $dep_kafka_log_dump) 2053} 2054 2055dump_kafka_serverlog() { 2056 if [ "x$1" == "x" ]; then 2057 dep_work_dir=$(readlink -f .dep_wrk) 2058 else 2059 dep_work_dir=$(readlink -f $srcdir/$1) 2060 fi 2061 if [ ! -d $dep_work_dir/kafka ]; then 2062 echo "Kafka work-dir $dep_work_dir/kafka does not exist, no kafka debuglog" 2063 else 2064 echo "Dumping server.log from Kafka instance $1" 2065 echo "=========================================" 2066 cat $dep_work_dir/kafka/logs/server.log 2067 echo "=========================================" 2068 printf 'non-info is:\n' 2069 grep --invert-match '^\[.* INFO ' $dep_work_dir/kafka/logs/server.log | grep '^\[' 2070 fi 2071} 2072 2073dump_zookeeper_serverlog() { 2074 if [ "x$1" == "x" ]; then 2075 dep_work_dir=$(readlink -f .dep_wrk) 2076 else 2077 dep_work_dir=$(readlink -f $srcdir/$1) 2078 fi 2079 echo "Dumping zookeeper.out from Zookeeper instance $1" 2080 echo "=========================================" 2081 cat $dep_work_dir/zk/zookeeper.out 2082 echo "=========================================" 2083 printf 'non-info is:\n' 2084 grep --invert-match '^\[.* INFO ' $dep_work_dir/zk/zookeeper.out | grep '^\[' 2085} 2086 2087 2088# download elasticsearch files, if necessary 2089download_elasticsearch() { 2090 if [ ! -d $dep_cache_dir ]; then 2091 echo "Creating dependency cache dir $dep_cache_dir" 2092 mkdir $dep_cache_dir 2093 fi 2094 if [ ! -f $dep_es_cached_file ]; then 2095 if [ -f /local_dep_cache/$ES_DOWNLOAD ]; then 2096 printf 'ElasticSearch: satisfying dependency %s from system cache.\n' "$ES_DOWNLOAD" 2097 cp /local_dep_cache/$ES_DOWNLOAD $dep_es_cached_file 2098 else 2099 dep_es_url="https://www.rsyslog.com/files/download/rsyslog/$ES_DOWNLOAD" 2100 printf 'ElasticSearch: satisfying dependency %s from %s\n' "$ES_DOWNLOAD" "$dep_es_url" 2101 wget -q $dep_es_url -O $dep_es_cached_file 2102 fi 2103 fi 2104} 2105 2106 2107# prepare elasticsearch execution environment 2108# this also stops any previous elasticsearch instance, if found 2109prepare_elasticsearch() { 2110 stop_elasticsearch # stop if it is still running 2111 # Heap Size (limit to 128MB for testbench! default is way to HIGH) 2112 export ES_JAVA_OPTS="-Xms128m -Xmx128m" 2113 2114 dep_work_dir=$(readlink -f .dep_wrk) 2115 dep_work_es_config="es.yml" 2116 dep_work_es_pidfile="es.pid" 2117 2118 if [ ! -f $dep_es_cached_file ]; then 2119 echo "Dependency-cache does not have elasticsearch package, did " 2120 echo "you download dependencies?" 2121 error_exit 100 2122 fi 2123 if [ ! -d $dep_work_dir ]; then 2124 echo "Creating dependency working directory" 2125 mkdir -p $dep_work_dir 2126 fi 2127 if [ -d $dep_work_dir/es ]; then 2128 if [ -e $dep_work_es_pidfile ]; then 2129 es_pid=$(cat $dep_work_es_pidfile) 2130 kill -SIGTERM $es_pid 2131 wait_pid_termination $es_pid 2132 fi 2133 fi 2134 rm -rf $dep_work_dir/es 2135 echo TEST USES ELASTICSEARCH BINARY $dep_es_cached_file 2136 (cd $dep_work_dir && tar -zxf $dep_es_cached_file --xform $dep_es_dir_xform_pattern --show-transformed-names) > /dev/null 2137 if [ -n "${ES_PORT:-}" ] ; then 2138 rm -f $dep_work_dir/es/config/elasticsearch.yml 2139 sed "s/^http.port:.*\$/http.port: ${ES_PORT}/" $srcdir/testsuites/$dep_work_es_config > $dep_work_dir/es/config/elasticsearch.yml 2140 else 2141 cp -f $srcdir/testsuites/$dep_work_es_config $dep_work_dir/es/config/elasticsearch.yml 2142 fi 2143 2144 if [ ! -d $dep_work_dir/es/data ]; then 2145 echo "Creating elastic search directories" 2146 mkdir -p $dep_work_dir/es/data 2147 mkdir -p $dep_work_dir/es/logs 2148 mkdir -p $dep_work_dir/es/tmp 2149 fi 2150 echo ElasticSearch prepared for use in test. 2151} 2152 2153 2154# ensure that a basic, suitable instance of elasticsearch is running. This is part 2155# of an effort to avoid restarting elasticsearch more often than necessary. 2156ensure_elasticsearch_ready() { 2157 if printf '%s:%s:%s\n' "$ES_DOWNLOAD" "$ES_PORT" "$(cat es.pid)" \ 2158 | cmp -b - elasticsearch.running 2159 then 2160 printf 'Elasticsearch already running, NOT restarting it\n' 2161 else 2162 cat elasticsearch.running 2163 cleanup_elasticsearch 2164 dep_es_cached_file="$dep_cache_dir/$ES_DOWNLOAD" 2165 download_elasticsearch 2166 prepare_elasticsearch 2167 if [ "$1" != "--no-start" ]; then 2168 start_elasticsearch 2169 printf '%s:%s:%s\n' "$ES_DOWNLOAD" "$ES_PORT" "$(cat es.pid)" > elasticsearch.running 2170 fi 2171 fi 2172 if [ "$1" != "--no-start" ]; then 2173 printf 'running elasticsearch instance: %s\n' "$(cat elasticsearch.running)" 2174 init_elasticsearch 2175 fi 2176} 2177 2178 2179# $2, if set, is the number of additional ES instances 2180start_elasticsearch() { 2181 # Heap Size (limit to 128MB for testbench! defaults is way to HIGH) 2182 export ES_JAVA_OPTS="-Xms128m -Xmx128m" 2183 2184 dep_work_dir=$(readlink -f .dep_wrk) 2185 dep_work_es_config="es.yml" 2186 dep_work_es_pidfile="$(pwd)/es.pid" 2187 echo "Starting ElasticSearch" 2188 2189 # THIS IS THE ACTUAL START of ES 2190 $dep_work_dir/es/bin/elasticsearch -p $dep_work_es_pidfile -d 2191 $TESTTOOL_DIR/msleep 2000 2192 wait_startup_pid $dep_work_es_pidfile 2193 printf 'elasticsearch pid is %s\n' "$(cat $dep_work_es_pidfile)" 2194 2195 # Wait for startup with hardcoded timeout 2196 timeoutend=60 2197 timeseconds=0 2198 # Loop until elasticsearch port is reachable or until 2199 # timeout is reached! 2200 until [ "$(curl --silent --show-error --connect-timeout 1 http://localhost:${ES_PORT:-19200} | grep 'rsyslog-testbench')" != "" ]; do 2201 echo "--- waiting for ES startup: $timeseconds seconds" 2202 $TESTTOOL_DIR/msleep 1000 2203 (( timeseconds=timeseconds + 1 )) 2204 2205 if [ "$timeseconds" -gt "$timeoutend" ]; then 2206 echo "--- TIMEOUT ( $timeseconds ) reached!!!" 2207 error_exit 1 2208 fi 2209 done 2210 $TESTTOOL_DIR/msleep 2000 2211 echo ES startup succeeded 2212} 2213 2214# read data from ES to a local file so that we can process 2215# $1 - number of records (ES does not return all records unless you tell it explicitly). 2216# $2 - ES port 2217es_getdata() { 2218 curl --silent -XPUT --show-error -H 'Content-Type: application/json' "http://localhost:${2:-$ES_PORT}/rsyslog_testbench/_settings" -d '{ "index" : { "max_result_window" : '${1:-$NUMMESSAGES}' } }' 2219 # refresh to ensure we get the latest data 2220 curl --silent localhost:${2:-$ES_PORT}/rsyslog_testbench/_refresh 2221 curl --silent localhost:${2:-$ES_PORT}/rsyslog_testbench/_search?size=${1:-$NUMMESSAGES} > $RSYSLOG_DYNNAME.work 2222 $PYTHON $srcdir/es_response_get_msgnum.py > ${RSYSLOG_OUT_LOG} 2223} 2224 2225# a standard method to support shutdown & queue empty check for a wide range 2226# of elasticsearch tests. This works if we assume that ES has delivered messages 2227# to the default location. 2228es_shutdown_empty_check() { 2229 es_getdata $NUMMESSAGES $ES_PORT 2230 lines=$(wc -l < "$RSYSLOG_OUT_LOG") 2231 if [ "$lines" -eq $NUMMESSAGES ]; then 2232 printf '%s es_shutdown_empty_check: success, have %d lines\n' "$(tb_timestamp)" $lines 2233 return 0 2234 fi 2235 printf '%s es_shutdown_empty_check: have %d lines, expecting %d\n' "$(tb_timestamp)" $lines $NUMMESSAGES 2236 return 1 2237} 2238 2239 2240stop_elasticsearch() { 2241 dep_work_dir=$(readlink -f $srcdir) 2242 dep_work_es_pidfile="es.pid" 2243 rm elasticsearch.running 2244 if [ -e $dep_work_es_pidfile ]; then 2245 es_pid=$(cat $dep_work_es_pidfile) 2246 printf 'stopping ES with pid %d\n' $es_pid 2247 kill -SIGTERM $es_pid 2248 wait_pid_termination $es_pid 2249 fi 2250} 2251 2252# cleanup es leftovers when it is being stopped 2253cleanup_elasticsearch() { 2254 dep_work_dir=$(readlink -f .dep_wrk) 2255 dep_work_es_pidfile="es.pid" 2256 stop_elasticsearch 2257 rm -f $dep_work_es_pidfile 2258 rm -rf $dep_work_dir/es 2259} 2260 2261# initialize local Elasticsearch *testbench* instance for the next 2262# test. NOTE: do NOT put anything useful on that instance! 2263init_elasticsearch() { 2264 curl --silent -XDELETE localhost:${ES_PORT:-9200}/rsyslog_testbench 2265} 2266 2267omhttp_start_server() { 2268 # Args: 1=port 2=server args 2269 # Args 2 and up are passed along as is to omhttp_server.py 2270 omhttp_server_py=$srcdir/omhttp_server.py 2271 if [ ! -f $omhttp_server_py ]; then 2272 echo "Cannot find ${omhttp_server_py} for omhttp test" 2273 error_exit 1 2274 fi 2275 2276 if [ "x$1" == "x" ]; then 2277 omhttp_server_port="8080" 2278 else 2279 omhttp_server_port="$1" 2280 fi 2281 2282 # Create work directory for parallel tests 2283 omhttp_work_dir=$RSYSLOG_DYNNAME/omhttp 2284 2285 omhttp_server_pidfile="${omhttp_work_dir}/omhttp_server.pid" 2286 omhttp_server_logfile="${omhttp_work_dir}/omhttp_server.log" 2287 mkdir -p ${omhttp_work_dir} 2288 2289 server_args="-p $omhttp_server_port ${*:2}" 2290 2291 timeout 30m $PYTHON ${omhttp_server_py} ${server_args} >> ${omhttp_server_logfile} 2>&1 & 2292 if [ ! $? -eq 0 ]; then 2293 echo "Failed to start omhttp test server." 2294 rm -rf $omhttp_work_dir 2295 error_exit 1 2296 fi 2297 2298 omhttp_server_pid=$! 2299 echo ${omhttp_server_pid} > ${omhttp_server_pidfile} 2300 echo "Started omhttp test server with args ${server_args} with pid ${omhttp_server_pid}" 2301} 2302 2303omhttp_stop_server() { 2304 # Args: None 2305 omhttp_work_dir=$RSYSLOG_DYNNAME/omhttp 2306 if [ ! -d $omhttp_work_dir ]; then 2307 echo "omhttp server $omhttp_work_dir does not exist, no action needed" 2308 else 2309 echo "Stopping omhttp server" 2310 kill -9 $(cat ${omhttp_work_dir}/omhttp_server.pid) > /dev/null 2>&1 2311 rm -rf $omhttp_work_dir 2312 fi 2313} 2314 2315omhttp_get_data() { 2316 # Args: 1=port 2=endpoint 3=batchformat(optional) 2317 if [ "x$1" == "x" ]; then 2318 omhttp_server_port=8080 2319 else 2320 omhttp_server_port=$1 2321 fi 2322 2323 if [ "x$2" == "x" ]; then 2324 omhttp_path="" 2325 else 2326 omhttp_path=$2 2327 fi 2328 2329 # The test server returns a json encoded array of strings containing whatever omhttp sent to it in each request 2330 python_init="import json, sys; dat = json.load(sys.stdin)" 2331 python_print="print('\n'.join(out))" 2332 if [ "x$3" == "x" ]; then 2333 # dat = ['{"msgnum":"1"}, '{"msgnum":"2"}', '{"msgnum":"3"}', '{"msgnum":"4"}'] 2334 python_parse="$python_init; out = [json.loads(l)['msgnum'] for l in dat]; $python_print" 2335 else 2336 if [ "x$3" == "xjsonarray" ]; then 2337 # dat = ['[{"msgnum":"1"},{"msgnum":"2"}]', '[{"msgnum":"3"},{"msgnum":"4"}]'] 2338 python_parse="$python_init; out = [l['msgnum'] for a in dat for l in json.loads(a)]; $python_print" 2339 elif [ "x$3" == "xnewline" ]; then 2340 # dat = ['{"msgnum":"1"}\n{"msgnum":"2"}', '{"msgnum":"3"}\n{"msgnum":"4"}'] 2341 python_parse="$python_init; out = [json.loads(l)['msgnum'] for a in dat for l in a.split('\n')]; $python_print" 2342 elif [ "x$3" == "xkafkarest" ]; then 2343 # dat = ['{"records":[{"value":{"msgnum":"1"}},{"value":{"msgnum":"2"}}]}', 2344 # '{"records":[{"value":{"msgnum":"3"}},{"value":{"msgnum":"4"}}]}'] 2345 python_parse="$python_init; out = [l['value']['msgnum'] for a in dat for l in json.loads(a)['records']]; $python_print" 2346 elif [ "x$3" == "xlokirest" ]; then 2347 # dat = ['{"streams":[{"msgnum":"1"},{"msgnum":"2"}]}', 2348 # '{"streams":[{"msgnum":"3"},{"msgnum":"4"}]}'] 2349 python_parse="$python_init; out = [l['msgnum'] for a in dat for l in json.loads(a)['streams']]; $python_print" 2350 else 2351 # use newline parsing as default 2352 python_parse="$python_init; out = [json.loads(l)['msgnum'] for a in dat for l in a.split('\n')]; $python_print" 2353 fi 2354 2355 fi 2356 2357 omhttp_url="localhost:${omhttp_server_port}/${omhttp_path}" 2358 curl -s ${omhttp_url} \ 2359 | $PYTHON -c "${python_parse}" | sort -n \ 2360 > ${RSYSLOG_OUT_LOG} 2361} 2362 2363 2364# prepare MySQL for next test 2365# each test receives its own database so that we also can run in parallel 2366mysql_prep_for_test() { 2367 mysql -u rsyslog --password=testbench -e "CREATE DATABASE $RSYSLOG_DYNNAME; " 2368 mysql -u rsyslog --password=testbench --database $RSYSLOG_DYNNAME \ 2369 -e "CREATE TABLE SystemEvents (ID int unsigned not null auto_increment primary key, CustomerID bigint,ReceivedAt datetime NULL,DeviceReportedTime datetime NULL,Facility smallint NULL,Priority smallint NULL,FromHost varchar(60) NULL,Message text,NTSeverity int NULL,Importance int NULL,EventSource varchar(60),EventUser varchar(60) NULL,EventCategory int NULL,EventID int NULL,EventBinaryData text NULL,MaxAvailable int NULL,CurrUsage int NULL,MinUsage int NULL,MaxUsage int NULL,InfoUnitID int NULL,SysLogTag varchar(60),EventLogType varchar(60),GenericFileName VarChar(60),SystemID int NULL); CREATE TABLE SystemEventsProperties (ID int unsigned not null auto_increment primary key,SystemEventID int NULL,ParamName varchar(255) NULL,ParamValue text NULL);" 2370 mysql --user=rsyslog --password=testbench --database $RSYSLOG_DYNNAME \ 2371 -e "truncate table SystemEvents;" 2372 # TEST ONLY: 2373 #mysql -s --user=rsyslog --password=testbench --database $RSYSLOG_DYNNAME \ 2374 #-e "select substring(Message,9,8) from SystemEvents;" 2375 # END TEST 2376 printf 'mysql ready for test, database: %s\n' $RSYSLOG_DYNNAME 2377} 2378 2379# get data from mysql DB so that we can do seq_check on it. 2380mysql_get_data() { 2381 # note "-s" is required to suppress the select "field header" 2382 mysql -s --user=rsyslog --password=testbench --database $RSYSLOG_DYNNAME \ 2383 -e "select substring(Message,9,8) from SystemEvents;" \ 2384 > $RSYSLOG_OUT_LOG 2> "$RSYSLOG_DYNNAME.mysqlerr" 2385 grep -iv "Using a password on the command line interface can be insecure." < "$RSYSLOG_DYNNAME.mysqlerr" 2386} 2387 2388# cleanup any temp data from mysql test 2389# if we do not do this, we may run out of disk space 2390# especially in container environment. 2391mysql_cleanup_test() { 2392 mysql --user=rsyslog --password=testbench -e "drop database $RSYSLOG_DYNNAME;" \ 2393 2>&1 | grep -iv "Using a password on the command line interface can be insecure." 2394} 2395 2396 2397# $1 - replacement string 2398# $2 - start search string 2399# $3 - file name 2400# $4 - expected value 2401first_column_sum_check() { 2402 sum=$(grep "$2" < "$3" | sed -e "$1" | awk '{s+=$1} END {print s}') 2403 if [ "x${sum}" != "x$4" ]; then 2404 printf '\n============================================================\n' 2405 echo FAIL: sum of first column with edit-expr "'$1'" run over lines from file "'$3'" matched by "'$2'" equals "'$sum'" which is NOT equal to EXPECTED value of "'$4'" 2406 echo "file contents:" 2407 cat $3 2408 error_exit 1 2409 fi 2410} 2411 2412# 2413# Helper functions to start/stop python snmp trap receiver 2414# 2415snmp_start_trapreceiver() { 2416 # Args: 1=port 2=outputfilename 2417 # Args 2 and up are passed along as is to snmptrapreceiver.py 2418 snmptrapreceiver=$srcdir/snmptrapreceiver.py 2419 if [ ! -f ${snmptrapreceiver} ]; then 2420 echo "Cannot find ${snmptrapreceiver} for omsnmp test" 2421 error_exit 1 2422 fi 2423 2424 if [ "x$1" == "x" ]; then 2425 snmp_server_port="10162" 2426 else 2427 snmp_server_port="$1" 2428 fi 2429 2430 if [ "x$2" == "x" ]; then 2431 output_file="${RSYSLOG_DYNNAME}.snmp.out" 2432 else 2433 output_file="$2" 2434 fi 2435 2436 # Create work directory for parallel tests 2437 snmp_work_dir=${RSYSLOG_DYNNAME}/snmptrapreceiver 2438 2439 snmp_server_pidfile="${snmp_work_dir}/snmp_server.pid" 2440 snmp_server_logfile="${snmp_work_dir}/snmp_server.log" 2441 mkdir -p ${snmp_work_dir} 2442 2443 server_args="${snmp_server_port} 127.0.0.1 ${output_file}" 2444 2445 $PYTHON ${snmptrapreceiver} ${server_args} ${snmp_server_logfile} >> ${snmp_server_logfile} 2>&1 & 2446 if [ ! $? -eq 0 ]; then 2447 echo "Failed to start snmptrapreceiver." 2448 rm -rf ${snmp_work_dir} 2449 error_exit 1 2450 fi 2451 2452 snmp_server_pid=$! 2453 echo ${snmp_server_pid} > ${snmp_server_pidfile} 2454 2455 while test ! -s "${snmp_server_logfile}"; do 2456 $TESTTOOL_DIR/msleep 100 # wait 100 milliseconds 2457 if [ $(date +%s) -gt $(( TB_STARTTEST + TB_TEST_MAX_RUNTIME )) ]; then 2458 printf '%s ABORT! Timeout waiting on startup (pid file %s)\n' "$(tb_timestamp)" "$1" 2459 ls -l "$1" 2460 ps -fp $(cat "$1") 2461 snmp_stop_trapreceiver 2462 error_exit 1 2463# else 2464# echo "waiting...${snmp_server_logfile}..." 2465 fi 2466 done 2467 2468 echo "Started snmptrapreceiver with args ${server_args} with pid ${snmp_server_pid}" 2469} 2470 2471snmp_stop_trapreceiver() { 2472 # Args: None 2473 snmp_work_dir=${RSYSLOG_DYNNAME}/snmptrapreceiver 2474 if [ ! -d ${snmp_work_dir} ]; then 2475 echo "snmptrapreceiver server ${snmp_work_dir} does not exist, no action needed" 2476 else 2477 echo "Stopping snmptrapreceiver server" 2478 kill -9 $(cat ${snmp_work_dir}/snmp_server.pid) > /dev/null 2>&1 2479 # Done at testexit already!: rm -rf ${snmp_work_dir} 2480 fi 2481} 2482 2483wait_for_stats_flush() { 2484 echo "will wait for stats push" 2485 emitmsg=0 2486 while [[ ! -f $1 ]]; do 2487 if [ $((++emitmsg % 10)) == 0 ]; then 2488 echo waiting for stats file "'$1'" to be created 2489 fi 2490 $TESTTOOL_DIR/msleep 100 2491 done 2492 prev_count=$(grep -c 'BEGIN$' <$1) 2493 new_count=$prev_count 2494 start_loop="$(date +%s)" 2495 emit_waiting=0 2496 while [[ "x$prev_count" == "x$new_count" ]]; do 2497 # busy spin, because it allows as close timing-coordination 2498 # in actual test run as possible 2499 if [ $(date +%s) -gt $(( TB_STARTTEST + TB_TEST_MAX_RUNTIME )) ]; then 2500 printf '%s ABORT! Timeout waiting on stats push\n' "$(tb_timestamp)" "$1" 2501 error_exit 1 2502 else 2503 # waiting for 1000 is heuristically "sufficiently but not too 2504 # frequent" enough 2505 if [ $((++emit_waiting)) == 1000 ]; then 2506 printf 'still waiting for stats push...\n' 2507 emit_waiting=0 2508 fi 2509 fi 2510 new_count=$(grep -c 'BEGIN$' <"$1") 2511 done 2512 echo "stats push registered" 2513} 2514 2515 2516case $1 in 2517 'init') $srcdir/killrsyslog.sh # kill rsyslogd if it runs for some reason 2518 source set-envvars 2519 # for (solaris) load debugging, uncomment next 2 lines: 2520 #export LD_DEBUG=all 2521 #ldd ../tools/rsyslogd 2522 2523 # environment debug 2524 #find / -name "librelp.so*" 2525 #ps -ef |grep syslog 2526 #netstat -a | grep LISTEN 2527 2528 # cleanup of hanging instances from previous runs 2529 # practice has shown this is pretty useful! 2530 #for pid in $(ps -eo pid,args|grep '/tools/[r]syslogd ' |sed -e 's/\( *\)\([0-9]*\).*/\2/'); 2531 #do 2532 #echo "ERROR: left-over previous instance $pid, killing it" 2533 #ps -fp $pid 2534 #pwd 2535 #kill -9 $pid 2536 #done 2537 # end cleanup 2538 2539 # some default names (later to be set in other parts, once we support fully 2540 # parallel tests) 2541 export RSYSLOG_DFLT_LOG_INTERNAL=1 # testbench needs internal messages logged internally! 2542 if [ -f testbench_test_failed_rsyslog ] && [ "$ABORT_ALL_ON_TEST_FAIL" == "YES" ]; then 2543 echo NOT RUNNING TEST as previous test $(cat testbench_test_failed_rsyslog) failed. 2544 exit 77 2545 fi 2546 if [ "$RSYSLOG_DYNNAME" != "" ]; then 2547 echo "FAIL: \$RSYSLOG_DYNNAME already set in init" 2548 echo "hint: was init accidentally called twice?" 2549 exit 2 2550 fi 2551 export RSYSLOG_DYNNAME="rstb_$(./test_id $(basename $0))$(tr -dc 'a-zA-Z0-9' < /dev/urandom | head --bytes 4)" 2552 export RSYSLOG_OUT_LOG="${RSYSLOG_DYNNAME}.out.log" 2553 export RSYSLOG2_OUT_LOG="${RSYSLOG_DYNNAME}_2.out.log" 2554 export RSYSLOG_PIDBASE="${RSYSLOG_DYNNAME}:" # also used by instance 2! 2555 #export IMDIAG_PORT=13500 DELETE ME 2556 #export IMDIAG_PORT2=13501 DELETE ME 2557 #export TCPFLOOD_PORT=13514 DELETE ME 2558 2559 # Extra Variables for Test statistic reporting 2560 export RSYSLOG_TESTNAME=$(basename $0) 2561 2562 # we create one file with the test name, so that we know what was 2563 # left over if "make distcheck" complains 2564 touch $RSYSLOG_DYNNAME-$(basename $0).test_id 2565 2566 if [ -z $RS_SORTCMD ]; then 2567 RS_SORTCMD="sort" 2568 fi 2569 if [ -z $RS_SORT_NUMERIC_OPT ]; then 2570 if [ "$(uname)" == "AIX" ]; then 2571 RS_SORT_NUMERIC_OPT=-n 2572 else 2573 RS_SORT_NUMERIC_OPT=-g 2574 fi 2575 fi 2576 if [ -z $RS_CMPCMD ]; then 2577 RS_CMPCMD="cmp" 2578 fi 2579 if [ -z $RS_HEADCMD ]; then 2580 RS_HEADCMD="head" 2581 fi 2582 # we assume TZ is set, else most test will fail. So let's ensure 2583 # this really is the case 2584 if [ -z $TZ ]; then 2585 echo "testbench: TZ env var not set, setting it to UTC" 2586 export TZ=UTC 2587 fi 2588 ulimit -c unlimited &> /dev/null # at least try to get core dumps 2589 export TB_STARTTEST=$(date +%s) 2590 printf '%s\n' '------------------------------------------------------------' 2591 printf '%s Test: %s\n' "$(tb_timestamp)" "$0" 2592 printf '%s\n' '------------------------------------------------------------' 2593 rm -f xlate*.lkp_tbl 2594 rm -f log log* # RSyslog debug output 2595 rm -f work 2596 rm -rf test-logdir stat-file1 2597 rm -f rsyslog.empty imfile-state:* omkafka-failed.data 2598 rm -f tmp.qi nocert 2599 rm -f core.* vgcore.* core* 2600 # Note: rsyslog.action.*.include must NOT be deleted, as it 2601 # is used to setup some parameters BEFORE calling init. This 2602 # happens in chained test scripts. Delete on exit is fine, 2603 # though. 2604 # note: TCPFLOOD_EXTRA_OPTS MUST NOT be unset in init, because 2605 # some tests need to set it BEFORE calling init to accommodate 2606 # their generic test drivers. 2607 if [ "$TCPFLOOD_EXTRA_OPTS" != '' ] ; then 2608 echo TCPFLOOD_EXTRA_OPTS set: $TCPFLOOD_EXTRA_OPTS 2609 fi 2610 if [ "$USE_AUTO_DEBUG" != 'on' ] ; then 2611 rm -f IN_AUTO_DEBUG 2612 fi 2613 if [ -e IN_AUTO_DEBUG ]; then 2614 export valgrind="valgrind --malloc-fill=ff --free-fill=fe --suppressions=$srcdir/known_issues.supp ${EXTRA_VALGRIND_SUPPRESSIONS:-} --log-fd=1" 2615 fi 2616 ;; 2617 2618 'check-ipv6-available') # check if IPv6 - will exit 77 when not OK 2619 if ip address > /dev/null ; then 2620 cmd="ip address" 2621 else 2622 cmd="ifconfig -a" 2623 fi 2624 echo command used for ipv6 detection: $cmd 2625 $cmd | grep ::1 > /dev/null 2626 if [ $? -ne 0 ] ; then 2627 printf 'this test requires an active IPv6 stack, which we do not have here\n' 2628 error_exit 77 2629 fi 2630 ;; 2631 'kill-immediate') # kill rsyslog unconditionally 2632 kill -9 $(cat $RSYSLOG_PIDBASE.pid) 2633 # note: we do not wait for the actual termination! 2634 ;; 2635 'ensure-no-process-exists') 2636 ps -ef | grep -v grep | grep -qF "$2" 2637 if [ "x$?" == "x0" ]; then 2638 echo "assertion failed: process with name-fragment matching '$2' found" 2639 error_exit 1 2640 fi 2641 ;; 2642 'grep-check') # grep for "$EXPECTED" present in rsyslog.log - env var must be set 2643 # before this method is called 2644 grep "$EXPECTED" ${RSYSLOG_OUT_LOG} > /dev/null 2645 if [ $? -eq 1 ]; then 2646 echo "GREP FAIL: ${RSYSLOG_OUT_LOG} content:" 2647 cat ${RSYSLOG_OUT_LOG} 2648 echo "GREP FAIL: expected text not found:" 2649 echo "$EXPECTED" 2650 error_exit 1 2651 fi; 2652 ;; 2653 'block-stats-flush') 2654 echo blocking stats flush 2655 echo "blockStatsReporting" | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $? 2656 ;; 2657 'await-stats-flush-after-block') 2658 echo unblocking stats flush and waiting for it 2659 echo "awaitStatsReport" | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $? 2660 ;; 2661 'allow-single-stats-flush-after-block-and-wait-for-it') 2662 echo blocking stats flush 2663 echo "awaitStatsReport block_again" | $TESTTOOL_DIR/diagtalker -p$IMDIAG_PORT || error_exit $? 2664 ;; 2665 'wait-for-dyn-stats-reset') 2666 echo "will wait for dyn-stats-reset" 2667 while [[ ! -f $2 ]]; do 2668 echo waiting for stats file "'$2'" to be created 2669 $TESTTOOL_DIR/msleep 100 2670 done 2671 prev_purged=$(grep -F 'origin=dynstats' < $2 | grep -F "${3}.purge_triggered=" | sed -e 's/.\+.purge_triggered=//g' | awk '{s+=$1} END {print s}') 2672 new_purged=$prev_purged 2673 while [[ "x$prev_purged" == "x$new_purged" ]]; do 2674 new_purged=$(grep -F 'origin=dynstats' < "$2" | grep -F "${3}.purge_triggered=" | sed -e 's/.\+\.purge_triggered=//g' | awk '{s+=$1} END {print s}') # busy spin, because it allows as close timing-coordination in actual test run as possible 2675 $TESTTOOL_DIR/msleep 10 2676 done 2677 echo "dyn-stats reset for bucket ${3} registered" 2678 ;; 2679 'assert-first-column-sum-greater-than') 2680 sum=$(grep $3 <$4| sed -e $2 | awk '{s+=$1} END {print s}') 2681 if [ ! $sum -gt $5 ]; then 2682 echo sum of first column with edit-expr "'$2'" run over lines from file "'$4'" matched by "'$3'" equals "'$sum'" which is smaller than expected lower-limit of "'$5'" 2683 echo "file contents:" 2684 cat $4 2685 error_exit 1 2686 fi 2687 ;; 2688 'content-pattern-check') 2689 grep -q "$2" < ${RSYSLOG_OUT_LOG} 2690 if [ "$?" -ne "0" ]; then 2691 echo content-check failed, not every line matched pattern "'$2'" 2692 echo "file contents:" 2693 cat -n $4 2694 error_exit 1 2695 fi 2696 ;; 2697 'require-journalctl') # check if journalctl exists on the system 2698 if ! hash journalctl 2>/dev/null ; then 2699 echo "journalctl command missing, skipping test" 2700 exit 77 2701 fi 2702 ;; 2703 'check-inotify') # Check for inotify/fen support 2704 if [ -n "$(find /usr/include -name 'inotify.h' -print -quit)" ]; then 2705 echo [inotify mode] 2706 elif [ -n "$(find /usr/include/sys/ -name 'port.h' -print -quit)" ]; then 2707 grep -qF "PORT_SOURCE_FILE" < /usr/include/sys/port.h 2708 if [ "$?" -ne "0" ]; then 2709 echo [port.h found but FEN API not implemented , skipping...] 2710 exit 77 # FEN API not available, skip this test 2711 fi 2712 echo [fen mode] 2713 else 2714 echo [inotify/fen not supported, skipping...] 2715 exit 77 # no inotify available, skip this test 2716 fi 2717 ;; 2718 'check-inotify-only') # Check for ONLY inotify support 2719 if [ -n "$(find /usr/include -name 'inotify.h' -print -quit)" ]; then 2720 echo [inotify mode] 2721 else 2722 echo [inotify not supported, skipping...] 2723 exit 77 # no inotify available, skip this test 2724 fi 2725 ;; 2726 *) echo "TESTBENCH error: invalid argument" $1 2727 exit 100 2728esac 2729