1#!/usr/bin/env bash
2# Copyright 2014 Cloudera, Inc.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16# Script which wraps running a test and redirects its output to a
17# test log directory.
18#
19# Arguments:
20#    $1 - Base path for logs/artifacts.
21#    $2 - type of test (e.g. test or benchmark)
22#    $3 - path to executable
23#    $ARGN - arguments for executable
24#
25
26OUTPUT_ROOT=$1
27shift
28ROOT=$(cd $(dirname $BASH_SOURCE)/..; pwd)
29
30TEST_LOGDIR=$OUTPUT_ROOT/build/$1-logs
31mkdir -p $TEST_LOGDIR
32
33RUN_TYPE=$1
34shift
35TEST_DEBUGDIR=$OUTPUT_ROOT/build/$RUN_TYPE-debug
36mkdir -p $TEST_DEBUGDIR
37
38TEST_DIRNAME=$(cd $(dirname $1); pwd)
39TEST_FILENAME=$(basename $1)
40shift
41TEST_EXECUTABLE="$TEST_DIRNAME/$TEST_FILENAME"
42TEST_NAME=$(echo $TEST_FILENAME | perl -pe 's/\..+?$//') # Remove path and extension (if any).
43
44# We run each test in its own subdir to avoid core file related races.
45TEST_WORKDIR=$OUTPUT_ROOT/build/test-work/$TEST_NAME
46mkdir -p $TEST_WORKDIR
47pushd $TEST_WORKDIR >/dev/null || exit 1
48rm -f *
49
50set -o pipefail
51
52LOGFILE=$TEST_LOGDIR/$TEST_NAME.txt
53XMLFILE=$TEST_LOGDIR/$TEST_NAME.xml
54
55TEST_EXECUTION_ATTEMPTS=1
56
57# Remove both the uncompressed output, so the developer doesn't accidentally get confused
58# and read output from a prior test run.
59rm -f $LOGFILE $LOGFILE.gz
60
61pipe_cmd=cat
62
63function setup_sanitizers() {
64  # Sets environment variables for different sanitizers (it configures how) the run_tests. Function works.
65
66  # Configure TSAN (ignored if this isn't a TSAN build).
67  #
68  TSAN_OPTIONS="$TSAN_OPTIONS suppressions=$ROOT/build-support/tsan-suppressions.txt"
69  TSAN_OPTIONS="$TSAN_OPTIONS history_size=7"
70  # Some tests deliberately fail allocating memory
71  TSAN_OPTIONS="$TSAN_OPTIONS allocator_may_return_null=1"
72  export TSAN_OPTIONS
73
74  UBSAN_OPTIONS="$UBSAN_OPTIONS print_stacktrace=1"
75  UBSAN_OPTIONS="$UBSAN_OPTIONS suppressions=$ROOT/build-support/ubsan-suppressions.txt"
76  export UBSAN_OPTIONS
77
78  # Enable leak detection even under LLVM 3.4, where it was disabled by default.
79  # This flag only takes effect when running an ASAN build.
80  # ASAN_OPTIONS="$ASAN_OPTIONS detect_leaks=1"
81  # export ASAN_OPTIONS
82
83  # Set up suppressions for LeakSanitizer
84  LSAN_OPTIONS="$LSAN_OPTIONS suppressions=$ROOT/build-support/lsan-suppressions.txt"
85  export LSAN_OPTIONS
86}
87
88function run_test() {
89  # Run gtest style tests with sanitizers if they are setup appropriately.
90
91  # gtest won't overwrite old junit test files, resulting in a build failure
92  # even when retries are successful.
93  rm -f $XMLFILE
94
95  $TEST_EXECUTABLE "$@" > $LOGFILE.raw 2>&1
96  STATUS=$?
97  cat $LOGFILE.raw \
98    | ${PYTHON:-python} $ROOT/build-support/asan_symbolize.py \
99    | ${CXXFILT:-c++filt} \
100    | $ROOT/build-support/stacktrace_addr2line.pl $TEST_EXECUTABLE \
101    | $pipe_cmd 2>&1 | tee $LOGFILE
102  rm -f $LOGFILE.raw
103
104  # TSAN doesn't always exit with a non-zero exit code due to a bug:
105  # mutex errors don't get reported through the normal error reporting infrastructure.
106  # So we make sure to detect this and exit 1.
107  #
108  # Additionally, certain types of failures won't show up in the standard JUnit
109  # XML output from gtest. We assume that gtest knows better than us and our
110  # regexes in most cases, but for certain errors we delete the resulting xml
111  # file and let our own post-processing step regenerate it.
112  export GREP=$(which egrep)
113  if zgrep --silent "ThreadSanitizer|Leak check.*detected leaks" $LOGFILE ; then
114    echo ThreadSanitizer or leak check failures in $LOGFILE
115    STATUS=1
116    rm -f $XMLFILE
117  fi
118}
119
120function print_coredumps() {
121  # The script expects core files relative to the build directory with unique
122  # names per test executable because of the parallel running. So the corefile
123  # patterns must be set with prefix `core.{test-executable}*`:
124  #
125  # In case of macOS:
126  #   sudo sysctl -w kern.corefile=core.%N.%P
127  # On Linux:
128  #   sudo sysctl -w kernel.core_pattern=core.%e.%p
129  #
130  # and the ulimit must be increased:
131  #   ulimit -c unlimited
132
133  # filename is truncated to the first 15 characters in case of linux, so limit
134  # the pattern for the first 15 characters
135  FILENAME=$(basename "${TEST_EXECUTABLE}")
136  FILENAME=$(echo ${FILENAME} | cut -c-15)
137  PATTERN="^core\.${FILENAME}"
138
139  COREFILES=$(ls | grep $PATTERN)
140  if [ -n "$COREFILES" ]; then
141    echo "Found core dump, printing backtrace:"
142
143    for COREFILE in $COREFILES; do
144      # Print backtrace
145      if [ "$(uname)" == "Darwin" ]; then
146        lldb -c "${COREFILE}" --batch --one-line "thread backtrace all -e true"
147      else
148        gdb -c "${COREFILE}" $TEST_EXECUTABLE -ex "thread apply all bt" -ex "set pagination 0" -batch
149      fi
150      # Remove the coredump, regenerate it via running the test case directly
151      rm "${COREFILE}"
152    done
153  fi
154}
155
156function post_process_tests() {
157  # If we have a LeakSanitizer report, and XML reporting is configured, add a new test
158  # case result to the XML file for the leak report. Otherwise Jenkins won't show
159  # us which tests had LSAN errors.
160  if zgrep --silent "ERROR: LeakSanitizer: detected memory leaks" $LOGFILE ; then
161      echo Test had memory leaks. Editing XML
162      perl -p -i -e '
163      if (m#</testsuite>#) {
164        print "<testcase name=\"LeakSanitizer\" status=\"run\" classname=\"LSAN\">\n";
165        print "  <failure message=\"LeakSanitizer failed\" type=\"\">\n";
166        print "    See txt log file for details\n";
167        print "  </failure>\n";
168        print "</testcase>\n";
169      }' $XMLFILE
170  fi
171}
172
173function run_other() {
174  # Generic run function for test like executables that aren't actually gtest
175  $TEST_EXECUTABLE "$@" 2>&1 | $pipe_cmd > $LOGFILE
176  STATUS=$?
177}
178
179if [ $RUN_TYPE = "test" ]; then
180  setup_sanitizers
181fi
182
183# Run the actual test.
184for ATTEMPT_NUMBER in $(seq 1 $TEST_EXECUTION_ATTEMPTS) ; do
185  if [ $ATTEMPT_NUMBER -lt $TEST_EXECUTION_ATTEMPTS ]; then
186    # If the test fails, the test output may or may not be left behind,
187    # depending on whether the test cleaned up or exited immediately. Either
188    # way we need to clean it up. We do this by comparing the data directory
189    # contents before and after the test runs, and deleting anything new.
190    #
191    # The comm program requires that its two inputs be sorted.
192    TEST_TMPDIR_BEFORE=$(find $TEST_TMPDIR -maxdepth 1 -type d | sort)
193  fi
194
195  if [ $ATTEMPT_NUMBER -lt $TEST_EXECUTION_ATTEMPTS ]; then
196    # Now delete any new test output.
197    TEST_TMPDIR_AFTER=$(find $TEST_TMPDIR -maxdepth 1 -type d | sort)
198    DIFF=$(comm -13 <(echo "$TEST_TMPDIR_BEFORE") \
199                    <(echo "$TEST_TMPDIR_AFTER"))
200    for DIR in $DIFF; do
201      # Multiple tests may be running concurrently. To avoid deleting the
202      # wrong directories, constrain to only directories beginning with the
203      # test name.
204      #
205      # This may delete old test directories belonging to this test, but
206      # that's not typically a concern when rerunning flaky tests.
207      if [[ $DIR =~ ^$TEST_TMPDIR/$TEST_NAME ]]; then
208        echo Deleting leftover flaky test directory "$DIR"
209        rm -Rf "$DIR"
210      fi
211    done
212  fi
213  echo "Running $TEST_NAME, redirecting output into $LOGFILE" \
214    "(attempt ${ATTEMPT_NUMBER}/$TEST_EXECUTION_ATTEMPTS)"
215  if [ $RUN_TYPE = "test" ]; then
216    run_test $*
217  else
218    run_other $*
219  fi
220  if [ "$STATUS" -eq "0" ]; then
221    break
222  elif [ "$ATTEMPT_NUMBER" -lt "$TEST_EXECUTION_ATTEMPTS" ]; then
223    echo Test failed attempt number $ATTEMPT_NUMBER
224    echo Will retry...
225  fi
226done
227
228if [ $RUN_TYPE = "test" ]; then
229  post_process_tests
230fi
231
232print_coredumps
233
234popd
235rm -Rf $TEST_WORKDIR
236
237exit $STATUS
238